]> git.proxmox.com Git - libgit2.git/blame - src/pack.c
pack: iterate objects in offset order
[libgit2.git] / src / pack.c
CommitLineData
7d0cdf82 1/*
5e0de328 2 * Copyright (C) 2009-2012 the libgit2 contributors
7d0cdf82 3 *
bb742ede
VM
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
7d0cdf82
CMN
6 */
7
a15c550d 8#include "common.h"
7d0cdf82
CMN
9#include "odb.h"
10#include "pack.h"
11#include "delta-apply.h"
a070f152 12#include "sha1_lookup.h"
a15c550d
VM
13#include "mwindow.h"
14#include "fileops.h"
7d0cdf82
CMN
15
16#include "git2/oid.h"
0c3bae62 17#include <zlib.h>
7d0cdf82 18
a070f152 19static int packfile_open(struct git_pack_file *p);
e1de726c 20static git_off_t nth_packed_object_offset(const struct git_pack_file *p, uint32_t n);
a070f152
CMN
21int packfile_unpack_compressed(
22 git_rawobj *obj,
23 struct git_pack_file *p,
24 git_mwindow **w_curs,
e1de726c 25 git_off_t *curpos,
a070f152
CMN
26 size_t size,
27 git_otype type);
28
29/* Can find the offset of an object given
30 * a prefix of an identifier.
904b67e6 31 * Throws GIT_EAMBIGUOUSOIDPREFIX if short oid
a070f152
CMN
32 * is ambiguous within the pack.
33 * This method assumes that len is between
34 * GIT_OID_MINPREFIXLEN and GIT_OID_HEXSZ.
35 */
36static int pack_entry_find_offset(
e1de726c 37 git_off_t *offset_out,
a070f152
CMN
38 git_oid *found_oid,
39 struct git_pack_file *p,
40 const git_oid *short_oid,
b8457baa 41 size_t len);
a070f152 42
e1de726c
RB
43static int packfile_error(const char *message)
44{
45 giterr_set(GITERR_ODB, "Invalid pack file - %s", message);
46 return -1;
47}
48
a070f152
CMN
49/***********************************************************
50 *
51 * PACK INDEX METHODS
52 *
53 ***********************************************************/
54
55static void pack_index_free(struct git_pack_file *p)
56{
60ecdf59
DMB
57 if (p->oids) {
58 git__free(p->oids);
59 p->oids = NULL;
60 }
a070f152
CMN
61 if (p->index_map.data) {
62 git_futils_mmap_free(&p->index_map);
63 p->index_map.data = NULL;
64 }
65}
66
87d9869f 67static int pack_index_check(const char *path, struct git_pack_file *p)
a070f152
CMN
68{
69 struct git_pack_idx_header *hdr;
70 uint32_t version, nr, i, *index;
a070f152
CMN
71 void *idx_map;
72 size_t idx_size;
a070f152 73 struct stat st;
a070f152 74 int error;
e1de726c
RB
75 /* TODO: properly open the file without access time using O_NOATIME */
76 git_file fd = git_futils_open_ro(path);
a070f152 77 if (fd < 0)
e1de726c 78 return fd;
a070f152 79
e1de726c
RB
80 if (p_fstat(fd, &st) < 0 ||
81 !S_ISREG(st.st_mode) ||
82 !git__is_sizet(st.st_size) ||
83 (idx_size = (size_t)st.st_size) < 4 * 256 + 20 + 20)
84 {
a070f152 85 p_close(fd);
e1de726c
RB
86 giterr_set(GITERR_OS, "Failed to check pack index.");
87 return -1;
a070f152
CMN
88 }
89
90 error = git_futils_mmap_ro(&p->index_map, fd, 0, idx_size);
e1de726c 91
a070f152
CMN
92 p_close(fd);
93
e1de726c
RB
94 if (error < 0)
95 return error;
a070f152
CMN
96
97 hdr = idx_map = p->index_map.data;
98
99 if (hdr->idx_signature == htonl(PACK_IDX_SIGNATURE)) {
100 version = ntohl(hdr->idx_version);
101
102 if (version < 2 || version > 2) {
103 git_futils_mmap_free(&p->index_map);
e1de726c 104 return packfile_error("unsupported index version");
a070f152
CMN
105 }
106
107 } else
108 version = 1;
109
110 nr = 0;
111 index = idx_map;
112
113 if (version > 1)
87d9869f 114 index += 2; /* skip index header */
a070f152
CMN
115
116 for (i = 0; i < 256; i++) {
117 uint32_t n = ntohl(index[i]);
118 if (n < nr) {
119 git_futils_mmap_free(&p->index_map);
e1de726c 120 return packfile_error("index is non-monotonic");
a070f152
CMN
121 }
122 nr = n;
123 }
124
125 if (version == 1) {
126 /*
127 * Total size:
87d9869f
VM
128 * - 256 index entries 4 bytes each
129 * - 24-byte entries * nr (20-byte sha1 + 4-byte offset)
130 * - 20-byte SHA1 of the packfile
131 * - 20-byte SHA1 file checksum
a070f152
CMN
132 */
133 if (idx_size != 4*256 + nr * 24 + 20 + 20) {
134 git_futils_mmap_free(&p->index_map);
e1de726c 135 return packfile_error("index is corrupted");
a070f152
CMN
136 }
137 } else if (version == 2) {
138 /*
139 * Minimum size:
87d9869f
VM
140 * - 8 bytes of header
141 * - 256 index entries 4 bytes each
142 * - 20-byte sha1 entry * nr
143 * - 4-byte crc entry * nr
144 * - 4-byte offset entry * nr
145 * - 20-byte SHA1 of the packfile
146 * - 20-byte SHA1 file checksum
a070f152
CMN
147 * And after the 4-byte offset table might be a
148 * variable sized table containing 8-byte entries
149 * for offsets larger than 2^31.
150 */
151 unsigned long min_size = 8 + 4*256 + nr*(20 + 4 + 4) + 20 + 20;
152 unsigned long max_size = min_size;
153
154 if (nr)
155 max_size += (nr - 1)*8;
156
157 if (idx_size < min_size || idx_size > max_size) {
158 git_futils_mmap_free(&p->index_map);
e1de726c 159 return packfile_error("wrong index size");
a070f152 160 }
a070f152
CMN
161 }
162
163 p->index_version = version;
164 p->num_objects = nr;
e1de726c 165 return 0;
a070f152
CMN
166}
167
168static int pack_index_open(struct git_pack_file *p)
169{
170 char *idx_name;
171 int error;
44ef8b1b 172 size_t name_len, offset;
a070f152
CMN
173
174 if (p->index_map.data)
e1de726c 175 return 0;
a070f152
CMN
176
177 idx_name = git__strdup(p->pack_name);
e1de726c
RB
178 GITERR_CHECK_ALLOC(idx_name);
179
44ef8b1b
RB
180 name_len = strlen(idx_name);
181 offset = name_len - strlen(".pack");
182 assert(offset < name_len); /* make sure no underflow */
183
184 strncpy(idx_name + offset, ".idx", name_len - offset);
a070f152
CMN
185
186 error = pack_index_check(idx_name, p);
3286c408 187 git__free(idx_name);
a070f152 188
e1de726c 189 return error;
a070f152
CMN
190}
191
192static unsigned char *pack_window_open(
193 struct git_pack_file *p,
7d0cdf82 194 git_mwindow **w_cursor,
e1de726c 195 git_off_t offset,
7d0cdf82
CMN
196 unsigned int *left)
197{
e1de726c 198 if (p->mwf.fd == -1 && packfile_open(p) < 0)
7d0cdf82
CMN
199 return NULL;
200
201 /* Since packfiles end in a hash of their content and it's
202 * pointless to ask for an offset into the middle of that
203 * hash, and the pack_window_contains function above wouldn't match
204 * don't allow an offset too close to the end of the file.
205 */
206 if (offset > (p->mwf.size - 20))
207 return NULL;
208
209 return git_mwindow_open(&p->mwf, w_cursor, offset, 20, left);
210 }
211
45d773ef
CMN
212static int packfile_unpack_header1(
213 unsigned long *usedp,
7d0cdf82
CMN
214 size_t *sizep,
215 git_otype *type,
216 const unsigned char *buf,
217 unsigned long len)
218{
219 unsigned shift;
220 unsigned long size, c;
221 unsigned long used = 0;
2aeadb9c 222
7d0cdf82
CMN
223 c = buf[used++];
224 *type = (c >> 4) & 7;
225 size = c & 15;
226 shift = 4;
227 while (c & 0x80) {
45d773ef 228 if (len <= used)
904b67e6 229 return GIT_EBUFS;
45d773ef
CMN
230
231 if (bitsizeof(long) <= shift) {
232 *usedp = 0;
233 return -1;
234 }
7d0cdf82
CMN
235
236 c = buf[used++];
237 size += (c & 0x7f) << shift;
238 shift += 7;
239 }
240
241 *sizep = (size_t)size;
45d773ef
CMN
242 *usedp = used;
243 return 0;
7d0cdf82
CMN
244}
245
246int git_packfile_unpack_header(
247 size_t *size_p,
248 git_otype *type_p,
249 git_mwindow_file *mwf,
250 git_mwindow **w_curs,
e1de726c 251 git_off_t *curpos)
7d0cdf82
CMN
252{
253 unsigned char *base;
254 unsigned int left;
255 unsigned long used;
45d773ef 256 int ret;
7d0cdf82
CMN
257
258 /* pack_window_open() assures us we have [base, base + 20) available
87d9869f
VM
259 * as a range that we can look at at. (Its actually the hash
260 * size that is assured.) With our object header encoding
7d0cdf82
CMN
261 * the maximum deflated object size is 2^137, which is just
262 * insane, so we know won't exceed what we have been given.
263 */
264// base = pack_window_open(p, w_curs, *curpos, &left);
265 base = git_mwindow_open(mwf, w_curs, *curpos, 20, &left);
266 if (base == NULL)
904b67e6 267 return GIT_EBUFS;
2aeadb9c
CY
268
269 ret = packfile_unpack_header1(&used, size_p, type_p, base, left);
45d773ef 270 git_mwindow_close(w_curs);
904b67e6 271 if (ret == GIT_EBUFS)
45d773ef
CMN
272 return ret;
273 else if (ret < 0)
e1de726c 274 return packfile_error("header length is zero");
7d0cdf82
CMN
275
276 *curpos += used;
e1de726c 277 return 0;
7d0cdf82
CMN
278}
279
a070f152 280static int packfile_unpack_delta(
7d0cdf82 281 git_rawobj *obj,
a070f152 282 struct git_pack_file *p,
7d0cdf82 283 git_mwindow **w_curs,
e1de726c 284 git_off_t *curpos,
7d0cdf82
CMN
285 size_t delta_size,
286 git_otype delta_type,
e1de726c 287 git_off_t obj_offset)
7d0cdf82 288{
e1de726c 289 git_off_t base_offset;
7d0cdf82
CMN
290 git_rawobj base, delta;
291 int error;
292
b5b474dd 293 base_offset = get_delta_base(p, w_curs, curpos, delta_type, obj_offset);
45d773ef 294 git_mwindow_close(w_curs);
7d0cdf82 295 if (base_offset == 0)
e1de726c
RB
296 return packfile_error("delta offset is zero");
297 if (base_offset < 0) /* must actually be an error code */
298 return (int)base_offset;
7d0cdf82 299
b5b474dd 300 error = git_packfile_unpack(&base, p, &base_offset);
7d0cdf82
CMN
301
302 /*
303 * TODO: git.git tries to load the base from other packfiles
304 * or loose objects.
305 *
306 * We'll need to do this in order to support thin packs.
307 */
e1de726c
RB
308 if (error < 0)
309 return error;
7d0cdf82
CMN
310
311 error = packfile_unpack_compressed(&delta, p, w_curs, curpos, delta_size, delta_type);
45d773ef 312 git_mwindow_close(w_curs);
e1de726c 313 if (error < 0) {
3286c408 314 git__free(base.data);
e1de726c 315 return error;
7d0cdf82
CMN
316 }
317
318 obj->type = base.type;
e1de726c 319 error = git__delta_apply(obj, base.data, base.len, delta.data, delta.len);
7d0cdf82 320
3286c408
VM
321 git__free(base.data);
322 git__free(delta.data);
7d0cdf82
CMN
323
324 /* TODO: we might want to cache this shit. eventually */
325 //add_delta_base_cache(p, base_offset, base, base_size, *type);
e1de726c 326
7d0cdf82
CMN
327 return error; /* error set by git__delta_apply */
328}
329
a070f152 330int git_packfile_unpack(
e1de726c
RB
331 git_rawobj *obj,
332 struct git_pack_file *p,
333 git_off_t *obj_offset)
7d0cdf82
CMN
334{
335 git_mwindow *w_curs = NULL;
e1de726c 336 git_off_t curpos = *obj_offset;
7d0cdf82
CMN
337 int error;
338
339 size_t size = 0;
340 git_otype type;
341
342 /*
343 * TODO: optionally check the CRC on the packfile
344 */
345
346 obj->data = NULL;
347 obj->len = 0;
348 obj->type = GIT_OBJ_BAD;
349
350 error = git_packfile_unpack_header(&size, &type, &p->mwf, &w_curs, &curpos);
45d773ef
CMN
351 git_mwindow_close(&w_curs);
352
e1de726c
RB
353 if (error < 0)
354 return error;
7d0cdf82
CMN
355
356 switch (type) {
357 case GIT_OBJ_OFS_DELTA:
358 case GIT_OBJ_REF_DELTA:
359 error = packfile_unpack_delta(
b5b474dd
CMN
360 obj, p, &w_curs, &curpos,
361 size, type, *obj_offset);
7d0cdf82
CMN
362 break;
363
364 case GIT_OBJ_COMMIT:
365 case GIT_OBJ_TREE:
366 case GIT_OBJ_BLOB:
367 case GIT_OBJ_TAG:
368 error = packfile_unpack_compressed(
b5b474dd 369 obj, p, &w_curs, &curpos,
7d0cdf82
CMN
370 size, type);
371 break;
372
373 default:
e1de726c 374 error = packfile_error("invalid packfile type in header");;
7d0cdf82
CMN
375 break;
376 }
377
b5b474dd 378 *obj_offset = curpos;
e1de726c 379 return error;
7d0cdf82
CMN
380}
381
282283ac
RB
382static void *use_git_alloc(void *opaq, unsigned int count, unsigned int size)
383{
384 GIT_UNUSED(opaq);
385 return git__calloc(count, size);
386}
387
388static void use_git_free(void *opaq, void *ptr)
389{
390 GIT_UNUSED(opaq);
391 git__free(ptr);
392}
393
7d0cdf82 394int packfile_unpack_compressed(
e1de726c
RB
395 git_rawobj *obj,
396 struct git_pack_file *p,
397 git_mwindow **w_curs,
398 git_off_t *curpos,
399 size_t size,
400 git_otype type)
7d0cdf82
CMN
401{
402 int st;
403 z_stream stream;
404 unsigned char *buffer, *in;
405
e1de726c
RB
406 buffer = git__calloc(1, size + 1);
407 GITERR_CHECK_ALLOC(buffer);
7d0cdf82
CMN
408
409 memset(&stream, 0, sizeof(stream));
410 stream.next_out = buffer;
1c3fac4d 411 stream.avail_out = (uInt)size + 1;
282283ac
RB
412 stream.zalloc = use_git_alloc;
413 stream.zfree = use_git_free;
7d0cdf82
CMN
414
415 st = inflateInit(&stream);
416 if (st != Z_OK) {
3286c408 417 git__free(buffer);
e1de726c 418 giterr_set(GITERR_ZLIB, "Failed to inflate packfile");
45d773ef 419
e1de726c 420 return -1;
7d0cdf82
CMN
421 }
422
423 do {
b5b474dd 424 in = pack_window_open(p, w_curs, *curpos, &stream.avail_in);
7d0cdf82
CMN
425 stream.next_in = in;
426 st = inflate(&stream, Z_FINISH);
45d773ef 427 git_mwindow_close(w_curs);
7d0cdf82
CMN
428
429 if (!stream.avail_out)
430 break; /* the payload is larger than it should be */
431
45d773ef
CMN
432 if (st == Z_BUF_ERROR && in == NULL) {
433 inflateEnd(&stream);
434 git__free(buffer);
904b67e6 435 return GIT_EBUFS;
45d773ef
CMN
436 }
437
b5b474dd 438 *curpos += stream.next_in - in;
7d0cdf82
CMN
439 } while (st == Z_OK || st == Z_BUF_ERROR);
440
441 inflateEnd(&stream);
442
443 if ((st != Z_STREAM_END) || stream.total_out != size) {
3286c408 444 git__free(buffer);
e1de726c
RB
445 giterr_set(GITERR_ZLIB, "Failed to inflate packfile");
446 return -1;
7d0cdf82
CMN
447 }
448
449 obj->type = type;
450 obj->len = size;
451 obj->data = buffer;
e1de726c 452 return 0;
7d0cdf82
CMN
453}
454
b5b474dd
CMN
455/*
456 * curpos is where the data starts, delta_obj_offset is the where the
457 * header starts
458 */
e1de726c
RB
459git_off_t get_delta_base(
460 struct git_pack_file *p,
461 git_mwindow **w_curs,
462 git_off_t *curpos,
463 git_otype type,
464 git_off_t delta_obj_offset)
7d0cdf82 465{
45d773ef
CMN
466 unsigned int left = 0;
467 unsigned char *base_info;
e1de726c 468 git_off_t base_offset;
7d0cdf82
CMN
469 git_oid unused;
470
45d773ef
CMN
471 base_info = pack_window_open(p, w_curs, *curpos, &left);
472 /* Assumption: the only reason this would fail is because the file is too small */
473 if (base_info == NULL)
904b67e6 474 return GIT_EBUFS;
7d0cdf82
CMN
475 /* pack_window_open() assured us we have [base_info, base_info + 20)
476 * as a range that we can look at without walking off the
87d9869f
VM
477 * end of the mapped window. Its actually the hash size
478 * that is assured. An OFS_DELTA longer than the hash size
7d0cdf82
CMN
479 * is stupid, as then a REF_DELTA would be smaller to store.
480 */
481 if (type == GIT_OBJ_OFS_DELTA) {
482 unsigned used = 0;
483 unsigned char c = base_info[used++];
484 base_offset = c & 127;
485 while (c & 128) {
45d773ef 486 if (left <= used)
904b67e6 487 return GIT_EBUFS;
7d0cdf82
CMN
488 base_offset += 1;
489 if (!base_offset || MSB(base_offset, 7))
87d9869f 490 return 0; /* overflow */
7d0cdf82
CMN
491 c = base_info[used++];
492 base_offset = (base_offset << 7) + (c & 127);
493 }
494 base_offset = delta_obj_offset - base_offset;
495 if (base_offset <= 0 || base_offset >= delta_obj_offset)
87d9869f 496 return 0; /* out of bound */
7d0cdf82
CMN
497 *curpos += used;
498 } else if (type == GIT_OBJ_REF_DELTA) {
c1af5a39
CMN
499 /* If we have the cooperative cache, search in it first */
500 if (p->has_cache) {
501 int pos;
502 struct git_pack_entry key;
503
504 git_oid_fromraw(&key.sha1, base_info);
505 pos = git_vector_bsearch(&p->cache, &key);
506 if (pos >= 0) {
507 *curpos += 20;
508 return ((struct git_pack_entry *)git_vector_get(&p->cache, pos))->offset;
509 }
510 }
7d0cdf82 511 /* The base entry _must_ be in the same pack */
e1de726c
RB
512 if (pack_entry_find_offset(&base_offset, &unused, p, (git_oid *)base_info, GIT_OID_HEXSZ) < 0)
513 return packfile_error("base entry delta is not in the same pack");
7d0cdf82
CMN
514 *curpos += 20;
515 } else
516 return 0;
517
518 return base_offset;
519}
a070f152
CMN
520
521/***********************************************************
522 *
523 * PACKFILE METHODS
524 *
525 ***********************************************************/
526
44ef8b1b 527static struct git_pack_file *packfile_alloc(size_t extra)
a070f152 528{
e1de726c
RB
529 struct git_pack_file *p = git__calloc(1, sizeof(*p) + extra);
530 if (p != NULL)
531 p->mwf.fd = -1;
a070f152
CMN
532 return p;
533}
534
535
536void packfile_free(struct git_pack_file *p)
537{
538 assert(p);
539
540 /* clear_delta_base_cache(); */
541 git_mwindow_free_all(&p->mwf);
1d8943c6 542 git_mwindow_file_deregister(&p->mwf);
a070f152
CMN
543
544 if (p->mwf.fd != -1)
545 p_close(p->mwf.fd);
546
547 pack_index_free(p);
548
3286c408
VM
549 git__free(p->bad_object_sha1);
550 git__free(p);
a070f152
CMN
551}
552
553static int packfile_open(struct git_pack_file *p)
554{
555 struct stat st;
556 struct git_pack_header hdr;
557 git_oid sha1;
558 unsigned char *idx_sha1;
559
e1de726c
RB
560 assert(p->index_map.data);
561
0d0fa7c3 562 if (!p->index_map.data && pack_index_open(p) < 0)
282283ac 563 return git_odb__error_notfound("failed to open packfile", NULL);
a070f152
CMN
564
565 /* TODO: open with noatime */
e1de726c
RB
566 p->mwf.fd = git_futils_open_ro(p->pack_name);
567 if (p->mwf.fd < 0)
568 return p->mwf.fd;
a070f152 569
e1de726c
RB
570 if (p_fstat(p->mwf.fd, &st) < 0 ||
571 git_mwindow_file_register(&p->mwf) < 0)
572 goto cleanup;
a070f152
CMN
573
574 /* If we created the struct before we had the pack we lack size. */
575 if (!p->mwf.size) {
576 if (!S_ISREG(st.st_mode))
577 goto cleanup;
e1de726c 578 p->mwf.size = (git_off_t)st.st_size;
a070f152
CMN
579 } else if (p->mwf.size != st.st_size)
580 goto cleanup;
581
582#if 0
583 /* We leave these file descriptors open with sliding mmap;
584 * there is no point keeping them open across exec(), though.
585 */
586 fd_flag = fcntl(p->mwf.fd, F_GETFD, 0);
587 if (fd_flag < 0)
e1de726c 588 goto cleanup;
a070f152
CMN
589
590 fd_flag |= FD_CLOEXEC;
591 if (fcntl(p->pack_fd, F_SETFD, fd_flag) == -1)
e1de726c 592 goto cleanup;
a070f152
CMN
593#endif
594
595 /* Verify we recognize this pack file format. */
e1de726c
RB
596 if (p_read(p->mwf.fd, &hdr, sizeof(hdr)) < 0 ||
597 hdr.hdr_signature != htonl(PACK_SIGNATURE) ||
598 !pack_version_ok(hdr.hdr_version))
a070f152
CMN
599 goto cleanup;
600
601 /* Verify the pack matches its index. */
e1de726c
RB
602 if (p->num_objects != ntohl(hdr.hdr_entries) ||
603 p_lseek(p->mwf.fd, p->mwf.size - GIT_OID_RAWSZ, SEEK_SET) == -1 ||
604 p_read(p->mwf.fd, sha1.id, GIT_OID_RAWSZ) < 0)
a070f152
CMN
605 goto cleanup;
606
607 idx_sha1 = ((unsigned char *)p->index_map.data) + p->index_map.len - 40;
608
e1de726c
RB
609 if (git_oid_cmp(&sha1, (git_oid *)idx_sha1) == 0)
610 return 0;
a070f152
CMN
611
612cleanup:
e1de726c 613 giterr_set(GITERR_OS, "Invalid packfile '%s'", p->pack_name);
a070f152
CMN
614 p_close(p->mwf.fd);
615 p->mwf.fd = -1;
e1de726c 616 return -1;
a070f152
CMN
617}
618
619int git_packfile_check(struct git_pack_file **pack_out, const char *path)
620{
621 struct stat st;
622 struct git_pack_file *p;
623 size_t path_len;
624
625 *pack_out = NULL;
626 path_len = strlen(path);
627 p = packfile_alloc(path_len + 2);
e1de726c 628 GITERR_CHECK_ALLOC(p);
a070f152
CMN
629
630 /*
631 * Make sure a corresponding .pack file exists and that
632 * the index looks sane.
633 */
932669b8 634 path_len -= strlen(".idx");
a070f152 635 if (path_len < 1) {
3286c408 636 git__free(p);
282283ac 637 return git_odb__error_notfound("invalid packfile path", NULL);
a070f152
CMN
638 }
639
640 memcpy(p->pack_name, path, path_len);
641
642 strcpy(p->pack_name + path_len, ".keep");
1a481123 643 if (git_path_exists(p->pack_name) == true)
a070f152
CMN
644 p->pack_keep = 1;
645
646 strcpy(p->pack_name + path_len, ".pack");
e1de726c 647 if (p_stat(p->pack_name, &st) < 0 || !S_ISREG(st.st_mode)) {
3286c408 648 git__free(p);
282283ac 649 return git_odb__error_notfound("packfile not found", NULL);
a070f152
CMN
650 }
651
652 /* ok, it looks sane as far as we can check without
653 * actually mapping the pack file.
654 */
1af56d7d 655 p->mwf.size = st.st_size;
a070f152
CMN
656 p->pack_local = 1;
657 p->mtime = (git_time_t)st.st_mtime;
658
659 /* see if we can parse the sha1 oid in the packfile name */
660 if (path_len < 40 ||
e1de726c 661 git_oid_fromstr(&p->sha1, path + path_len - GIT_OID_HEXSZ) < 0)
a070f152
CMN
662 memset(&p->sha1, 0x0, GIT_OID_RAWSZ);
663
664 *pack_out = p;
e1de726c
RB
665
666 return 0;
a070f152
CMN
667}
668
669/***********************************************************
670 *
671 * PACKFILE ENTRY SEARCH INTERNALS
672 *
673 ***********************************************************/
674
e1de726c 675static git_off_t nth_packed_object_offset(const struct git_pack_file *p, uint32_t n)
a070f152
CMN
676{
677 const unsigned char *index = p->index_map.data;
678 index += 4 * 256;
679 if (p->index_version == 1) {
680 return ntohl(*((uint32_t *)(index + 24 * n)));
681 } else {
682 uint32_t off;
683 index += 8 + p->num_objects * (20 + 4);
684 off = ntohl(*((uint32_t *)(index + 4 * n)));
685 if (!(off & 0x80000000))
686 return off;
687 index += p->num_objects * 4 + (off & 0x7fffffff) * 8;
688 return (((uint64_t)ntohl(*((uint32_t *)(index + 0)))) << 32) |
87d9869f 689 ntohl(*((uint32_t *)(index + 4)));
a070f152
CMN
690 }
691}
692
60ecdf59
DMB
693static int git__memcmp4(const void *a, const void *b) {
694 return memcmp(a, b, 4);
695}
696
521aedad 697int git_pack_foreach_entry(
5dca2010
RB
698 struct git_pack_file *p,
699 int (*cb)(git_oid *oid, void *data),
700 void *data)
521aedad
CMN
701{
702 const unsigned char *index = p->index_map.data, *current;
521aedad
CMN
703 uint32_t i;
704
705 if (index == NULL) {
706 int error;
707
708 if ((error = pack_index_open(p)) < 0)
709 return error;
710
711 assert(p->index_map.data);
712
713 index = p->index_map.data;
714 }
715
716 if (p->index_version > 1) {
717 index += 8;
718 }
719
720 index += 4 * 256;
721
60ecdf59
DMB
722 if (p->oids == NULL) {
723 git_vector offsets, oids;
724 int error;
521aedad 725
60ecdf59
DMB
726 if ((error = git_vector_init(&oids, p->num_objects, NULL)))
727 return error;
728
729 if ((error = git_vector_init(&offsets, p->num_objects, git__memcmp4)))
730 return error;
5dca2010 731
60ecdf59
DMB
732 if (p->index_version > 1) {
733 const unsigned char *off = index + 24 * p->num_objects;
734 for (i = 0; i < p->num_objects; i++)
735 git_vector_insert(&offsets, (void*)&off[4 * i]);
736 git_vector_sort(&offsets);
737 git_vector_foreach(&offsets, i, current)
738 git_vector_insert(&oids, (void*)&index[5 * (current - off)]);
739 } else {
740 for (i = 0; i < p->num_objects; i++)
741 git_vector_insert(&offsets, (void*)&index[24 * i]);
742 git_vector_sort(&offsets);
743 git_vector_foreach(&offsets, i, current)
744 git_vector_insert(&oids, (void*)&current[4]);
745 }
746 git_vector_free(&offsets);
747 p->oids = (git_oid **)oids.contents;
521aedad
CMN
748 }
749
60ecdf59
DMB
750 for (i = 0; i < p->num_objects; i++)
751 if (cb(p->oids[i], data))
752 return GIT_EUSER;
753
521aedad
CMN
754 return 0;
755}
756
a070f152 757static int pack_entry_find_offset(
e1de726c
RB
758 git_off_t *offset_out,
759 git_oid *found_oid,
760 struct git_pack_file *p,
761 const git_oid *short_oid,
b8457baa 762 size_t len)
a070f152
CMN
763{
764 const uint32_t *level1_ofs = p->index_map.data;
765 const unsigned char *index = p->index_map.data;
766 unsigned hi, lo, stride;
767 int pos, found = 0;
768 const unsigned char *current = 0;
769
770 *offset_out = 0;
771
772 if (index == NULL) {
773 int error;
774
e1de726c
RB
775 if ((error = pack_index_open(p)) < 0)
776 return error;
a070f152
CMN
777
778 assert(p->index_map.data);
779
780 index = p->index_map.data;
781 level1_ofs = p->index_map.data;
782 }
783
784 if (p->index_version > 1) {
785 level1_ofs += 2;
786 index += 8;
787 }
788
789 index += 4 * 256;
790 hi = ntohl(level1_ofs[(int)short_oid->id[0]]);
791 lo = ((short_oid->id[0] == 0x0) ? 0 : ntohl(level1_ofs[(int)short_oid->id[0] - 1]));
792
793 if (p->index_version > 1) {
794 stride = 20;
795 } else {
796 stride = 24;
797 index += 4;
798 }
799
800#ifdef INDEX_DEBUG_LOOKUP
801 printf("%02x%02x%02x... lo %u hi %u nr %d\n",
802 short_oid->id[0], short_oid->id[1], short_oid->id[2], lo, hi, p->num_objects);
803#endif
804
805 /* Use git.git lookup code */
87d9869f 806 pos = sha1_entry_pos(index, stride, 0, lo, hi, p->num_objects, short_oid->id);
a070f152
CMN
807
808 if (pos >= 0) {
809 /* An object matching exactly the oid was found */
810 found = 1;
811 current = index + pos * stride;
812 } else {
813 /* No object was found */
814 /* pos refers to the object with the "closest" oid to short_oid */
815 pos = - 1 - pos;
816 if (pos < (int)p->num_objects) {
817 current = index + pos * stride;
818
282283ac 819 if (!git_oid_ncmp(short_oid, (const git_oid *)current, len))
a070f152 820 found = 1;
a070f152
CMN
821 }
822 }
823
b2a2702d 824 if (found && len != GIT_OID_HEXSZ && pos + 1 < (int)p->num_objects) {
a070f152
CMN
825 /* Check for ambiguousity */
826 const unsigned char *next = current + stride;
827
828 if (!git_oid_ncmp(short_oid, (const git_oid *)next, len)) {
829 found = 2;
830 }
831 }
832
e1de726c 833 if (!found)
282283ac 834 return git_odb__error_notfound("failed to find offset for pack entry", short_oid);
e1de726c
RB
835 if (found > 1)
836 return git_odb__error_ambiguous("found multiple offsets for pack entry");
837 *offset_out = nth_packed_object_offset(p, pos);
838 git_oid_fromraw(found_oid, current);
a070f152
CMN
839
840#ifdef INDEX_DEBUG_LOOKUP
e1de726c 841 {
a070f152
CMN
842 unsigned char hex_sha1[GIT_OID_HEXSZ + 1];
843 git_oid_fmt(hex_sha1, found_oid);
844 hex_sha1[GIT_OID_HEXSZ] = '\0';
845 printf("found lo=%d %s\n", lo, hex_sha1);
a070f152 846 }
e1de726c
RB
847#endif
848 return 0;
a070f152
CMN
849}
850
851int git_pack_entry_find(
852 struct git_pack_entry *e,
853 struct git_pack_file *p,
854 const git_oid *short_oid,
b8457baa 855 size_t len)
a070f152 856{
e1de726c 857 git_off_t offset;
a070f152
CMN
858 git_oid found_oid;
859 int error;
860
861 assert(p);
862
863 if (len == GIT_OID_HEXSZ && p->num_bad_objects) {
864 unsigned i;
865 for (i = 0; i < p->num_bad_objects; i++)
866 if (git_oid_cmp(short_oid, &p->bad_object_sha1[i]) == 0)
e1de726c 867 return packfile_error("bad object found in packfile");
a070f152
CMN
868 }
869
870 error = pack_entry_find_offset(&offset, &found_oid, p, short_oid, len);
e1de726c
RB
871 if (error < 0)
872 return error;
a070f152
CMN
873
874 /* we found a unique entry in the index;
875 * make sure the packfile backing the index
876 * still exists on disk */
e1de726c
RB
877 if (p->mwf.fd == -1 && (error = packfile_open(p)) < 0)
878 return error;
a070f152
CMN
879
880 e->offset = offset;
881 e->p = p;
882
883 git_oid_cpy(&e->sha1, &found_oid);
e1de726c 884 return 0;
a070f152 885}