]> git.proxmox.com Git - libgit2.git/blame - src/pack.c
Merge pull request #1208 from ethomson/ppc_sha1_asm_deadness
[libgit2.git] / src / pack.c
CommitLineData
7d0cdf82 1/*
5e0de328 2 * Copyright (C) 2009-2012 the libgit2 contributors
7d0cdf82 3 *
bb742ede
VM
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
7d0cdf82
CMN
6 */
7
a15c550d 8#include "common.h"
7d0cdf82
CMN
9#include "odb.h"
10#include "pack.h"
11#include "delta-apply.h"
a070f152 12#include "sha1_lookup.h"
a15c550d
VM
13#include "mwindow.h"
14#include "fileops.h"
7d0cdf82
CMN
15
16#include "git2/oid.h"
0c3bae62 17#include <zlib.h>
7d0cdf82 18
a070f152 19static int packfile_open(struct git_pack_file *p);
e1de726c 20static git_off_t nth_packed_object_offset(const struct git_pack_file *p, uint32_t n);
a070f152
CMN
21int packfile_unpack_compressed(
22 git_rawobj *obj,
23 struct git_pack_file *p,
24 git_mwindow **w_curs,
e1de726c 25 git_off_t *curpos,
a070f152
CMN
26 size_t size,
27 git_otype type);
28
29/* Can find the offset of an object given
30 * a prefix of an identifier.
904b67e6 31 * Throws GIT_EAMBIGUOUSOIDPREFIX if short oid
a070f152
CMN
32 * is ambiguous within the pack.
33 * This method assumes that len is between
34 * GIT_OID_MINPREFIXLEN and GIT_OID_HEXSZ.
35 */
36static int pack_entry_find_offset(
e1de726c 37 git_off_t *offset_out,
a070f152
CMN
38 git_oid *found_oid,
39 struct git_pack_file *p,
40 const git_oid *short_oid,
b8457baa 41 size_t len);
a070f152 42
e1de726c
RB
43static int packfile_error(const char *message)
44{
45 giterr_set(GITERR_ODB, "Invalid pack file - %s", message);
46 return -1;
47}
48
a070f152
CMN
49/***********************************************************
50 *
51 * PACK INDEX METHODS
52 *
53 ***********************************************************/
54
55static void pack_index_free(struct git_pack_file *p)
56{
60ecdf59
DMB
57 if (p->oids) {
58 git__free(p->oids);
59 p->oids = NULL;
60 }
a070f152
CMN
61 if (p->index_map.data) {
62 git_futils_mmap_free(&p->index_map);
63 p->index_map.data = NULL;
64 }
65}
66
87d9869f 67static int pack_index_check(const char *path, struct git_pack_file *p)
a070f152
CMN
68{
69 struct git_pack_idx_header *hdr;
70 uint32_t version, nr, i, *index;
a070f152
CMN
71 void *idx_map;
72 size_t idx_size;
a070f152 73 struct stat st;
a070f152 74 int error;
e1de726c
RB
75 /* TODO: properly open the file without access time using O_NOATIME */
76 git_file fd = git_futils_open_ro(path);
a070f152 77 if (fd < 0)
e1de726c 78 return fd;
a070f152 79
e1de726c
RB
80 if (p_fstat(fd, &st) < 0 ||
81 !S_ISREG(st.st_mode) ||
82 !git__is_sizet(st.st_size) ||
83 (idx_size = (size_t)st.st_size) < 4 * 256 + 20 + 20)
84 {
a070f152 85 p_close(fd);
e1de726c
RB
86 giterr_set(GITERR_OS, "Failed to check pack index.");
87 return -1;
a070f152
CMN
88 }
89
90 error = git_futils_mmap_ro(&p->index_map, fd, 0, idx_size);
e1de726c 91
a070f152
CMN
92 p_close(fd);
93
e1de726c
RB
94 if (error < 0)
95 return error;
a070f152
CMN
96
97 hdr = idx_map = p->index_map.data;
98
99 if (hdr->idx_signature == htonl(PACK_IDX_SIGNATURE)) {
100 version = ntohl(hdr->idx_version);
101
102 if (version < 2 || version > 2) {
103 git_futils_mmap_free(&p->index_map);
e1de726c 104 return packfile_error("unsupported index version");
a070f152
CMN
105 }
106
107 } else
108 version = 1;
109
110 nr = 0;
111 index = idx_map;
112
113 if (version > 1)
87d9869f 114 index += 2; /* skip index header */
a070f152
CMN
115
116 for (i = 0; i < 256; i++) {
117 uint32_t n = ntohl(index[i]);
118 if (n < nr) {
119 git_futils_mmap_free(&p->index_map);
e1de726c 120 return packfile_error("index is non-monotonic");
a070f152
CMN
121 }
122 nr = n;
123 }
124
125 if (version == 1) {
126 /*
127 * Total size:
87d9869f
VM
128 * - 256 index entries 4 bytes each
129 * - 24-byte entries * nr (20-byte sha1 + 4-byte offset)
130 * - 20-byte SHA1 of the packfile
131 * - 20-byte SHA1 file checksum
a070f152
CMN
132 */
133 if (idx_size != 4*256 + nr * 24 + 20 + 20) {
134 git_futils_mmap_free(&p->index_map);
e1de726c 135 return packfile_error("index is corrupted");
a070f152
CMN
136 }
137 } else if (version == 2) {
138 /*
139 * Minimum size:
87d9869f
VM
140 * - 8 bytes of header
141 * - 256 index entries 4 bytes each
142 * - 20-byte sha1 entry * nr
143 * - 4-byte crc entry * nr
144 * - 4-byte offset entry * nr
145 * - 20-byte SHA1 of the packfile
146 * - 20-byte SHA1 file checksum
a070f152
CMN
147 * And after the 4-byte offset table might be a
148 * variable sized table containing 8-byte entries
149 * for offsets larger than 2^31.
150 */
151 unsigned long min_size = 8 + 4*256 + nr*(20 + 4 + 4) + 20 + 20;
152 unsigned long max_size = min_size;
153
154 if (nr)
155 max_size += (nr - 1)*8;
156
157 if (idx_size < min_size || idx_size > max_size) {
158 git_futils_mmap_free(&p->index_map);
e1de726c 159 return packfile_error("wrong index size");
a070f152 160 }
a070f152
CMN
161 }
162
163 p->index_version = version;
164 p->num_objects = nr;
e1de726c 165 return 0;
a070f152
CMN
166}
167
168static int pack_index_open(struct git_pack_file *p)
169{
170 char *idx_name;
171 int error;
44ef8b1b 172 size_t name_len, offset;
a070f152
CMN
173
174 if (p->index_map.data)
e1de726c 175 return 0;
a070f152
CMN
176
177 idx_name = git__strdup(p->pack_name);
e1de726c
RB
178 GITERR_CHECK_ALLOC(idx_name);
179
44ef8b1b
RB
180 name_len = strlen(idx_name);
181 offset = name_len - strlen(".pack");
182 assert(offset < name_len); /* make sure no underflow */
183
184 strncpy(idx_name + offset, ".idx", name_len - offset);
a070f152
CMN
185
186 error = pack_index_check(idx_name, p);
3286c408 187 git__free(idx_name);
a070f152 188
e1de726c 189 return error;
a070f152
CMN
190}
191
192static unsigned char *pack_window_open(
193 struct git_pack_file *p,
7d0cdf82 194 git_mwindow **w_cursor,
e1de726c 195 git_off_t offset,
7d0cdf82
CMN
196 unsigned int *left)
197{
e1de726c 198 if (p->mwf.fd == -1 && packfile_open(p) < 0)
7d0cdf82
CMN
199 return NULL;
200
201 /* Since packfiles end in a hash of their content and it's
202 * pointless to ask for an offset into the middle of that
203 * hash, and the pack_window_contains function above wouldn't match
204 * don't allow an offset too close to the end of the file.
205 */
206 if (offset > (p->mwf.size - 20))
207 return NULL;
208
209 return git_mwindow_open(&p->mwf, w_cursor, offset, 20, left);
210 }
211
45d773ef
CMN
212static int packfile_unpack_header1(
213 unsigned long *usedp,
7d0cdf82
CMN
214 size_t *sizep,
215 git_otype *type,
216 const unsigned char *buf,
217 unsigned long len)
218{
219 unsigned shift;
220 unsigned long size, c;
221 unsigned long used = 0;
2aeadb9c 222
7d0cdf82
CMN
223 c = buf[used++];
224 *type = (c >> 4) & 7;
225 size = c & 15;
226 shift = 4;
227 while (c & 0x80) {
45d773ef 228 if (len <= used)
904b67e6 229 return GIT_EBUFS;
45d773ef
CMN
230
231 if (bitsizeof(long) <= shift) {
232 *usedp = 0;
233 return -1;
234 }
7d0cdf82
CMN
235
236 c = buf[used++];
237 size += (c & 0x7f) << shift;
238 shift += 7;
239 }
240
241 *sizep = (size_t)size;
45d773ef
CMN
242 *usedp = used;
243 return 0;
7d0cdf82
CMN
244}
245
246int git_packfile_unpack_header(
247 size_t *size_p,
248 git_otype *type_p,
249 git_mwindow_file *mwf,
250 git_mwindow **w_curs,
e1de726c 251 git_off_t *curpos)
7d0cdf82
CMN
252{
253 unsigned char *base;
254 unsigned int left;
255 unsigned long used;
45d773ef 256 int ret;
7d0cdf82
CMN
257
258 /* pack_window_open() assures us we have [base, base + 20) available
87d9869f
VM
259 * as a range that we can look at at. (Its actually the hash
260 * size that is assured.) With our object header encoding
7d0cdf82
CMN
261 * the maximum deflated object size is 2^137, which is just
262 * insane, so we know won't exceed what we have been given.
263 */
264// base = pack_window_open(p, w_curs, *curpos, &left);
265 base = git_mwindow_open(mwf, w_curs, *curpos, 20, &left);
266 if (base == NULL)
904b67e6 267 return GIT_EBUFS;
2aeadb9c
CY
268
269 ret = packfile_unpack_header1(&used, size_p, type_p, base, left);
45d773ef 270 git_mwindow_close(w_curs);
904b67e6 271 if (ret == GIT_EBUFS)
45d773ef
CMN
272 return ret;
273 else if (ret < 0)
e1de726c 274 return packfile_error("header length is zero");
7d0cdf82
CMN
275
276 *curpos += used;
e1de726c 277 return 0;
7d0cdf82
CMN
278}
279
44f9f547
DMB
280int git_packfile_resolve_header(
281 size_t *size_p,
282 git_otype *type_p,
283 struct git_pack_file *p,
284 git_off_t offset)
285{
286 git_mwindow *w_curs = NULL;
287 git_off_t curpos = offset;
288 size_t size;
289 git_otype type;
290 git_off_t base_offset;
291 int error;
292
293 error = git_packfile_unpack_header(&size, &type, &p->mwf, &w_curs, &curpos);
294 git_mwindow_close(&w_curs);
295 if (error < 0)
296 return error;
297
298 if (type == GIT_OBJ_OFS_DELTA || type == GIT_OBJ_REF_DELTA) {
299 size_t base_size;
300 git_rawobj delta;
301 base_offset = get_delta_base(p, &w_curs, &curpos, type, offset);
302 git_mwindow_close(&w_curs);
303 error = packfile_unpack_compressed(&delta, p, &w_curs, &curpos, size, type);
304 git_mwindow_close(&w_curs);
305 if (error < 0)
306 return error;
307 error = git__delta_read_header(delta.data, delta.len, &base_size, size_p);
308 git__free(delta.data);
309 if (error < 0)
310 return error;
311 } else
312 *size_p = size;
313
314 while (type == GIT_OBJ_OFS_DELTA || type == GIT_OBJ_REF_DELTA) {
315 curpos = base_offset;
316 error = git_packfile_unpack_header(&size, &type, &p->mwf, &w_curs, &curpos);
317 git_mwindow_close(&w_curs);
318 if (error < 0)
319 return error;
320 if (type != GIT_OBJ_OFS_DELTA && type != GIT_OBJ_REF_DELTA)
321 break;
322 base_offset = get_delta_base(p, &w_curs, &curpos, type, base_offset);
323 git_mwindow_close(&w_curs);
324 }
325 *type_p = type;
326
327 return error;
328}
329
a070f152 330static int packfile_unpack_delta(
7d0cdf82 331 git_rawobj *obj,
a070f152 332 struct git_pack_file *p,
7d0cdf82 333 git_mwindow **w_curs,
e1de726c 334 git_off_t *curpos,
7d0cdf82
CMN
335 size_t delta_size,
336 git_otype delta_type,
e1de726c 337 git_off_t obj_offset)
7d0cdf82 338{
e1de726c 339 git_off_t base_offset;
7d0cdf82
CMN
340 git_rawobj base, delta;
341 int error;
342
b5b474dd 343 base_offset = get_delta_base(p, w_curs, curpos, delta_type, obj_offset);
45d773ef 344 git_mwindow_close(w_curs);
7d0cdf82 345 if (base_offset == 0)
e1de726c
RB
346 return packfile_error("delta offset is zero");
347 if (base_offset < 0) /* must actually be an error code */
348 return (int)base_offset;
7d0cdf82 349
b5b474dd 350 error = git_packfile_unpack(&base, p, &base_offset);
7d0cdf82
CMN
351
352 /*
353 * TODO: git.git tries to load the base from other packfiles
354 * or loose objects.
355 *
356 * We'll need to do this in order to support thin packs.
357 */
e1de726c
RB
358 if (error < 0)
359 return error;
7d0cdf82
CMN
360
361 error = packfile_unpack_compressed(&delta, p, w_curs, curpos, delta_size, delta_type);
45d773ef 362 git_mwindow_close(w_curs);
e1de726c 363 if (error < 0) {
3286c408 364 git__free(base.data);
e1de726c 365 return error;
7d0cdf82
CMN
366 }
367
368 obj->type = base.type;
e1de726c 369 error = git__delta_apply(obj, base.data, base.len, delta.data, delta.len);
7d0cdf82 370
3286c408
VM
371 git__free(base.data);
372 git__free(delta.data);
7d0cdf82 373
826bc4a8 374 /* TODO: we might want to cache this. eventually */
7d0cdf82 375 //add_delta_base_cache(p, base_offset, base, base_size, *type);
e1de726c 376
7d0cdf82
CMN
377 return error; /* error set by git__delta_apply */
378}
379
a070f152 380int git_packfile_unpack(
e1de726c
RB
381 git_rawobj *obj,
382 struct git_pack_file *p,
383 git_off_t *obj_offset)
7d0cdf82
CMN
384{
385 git_mwindow *w_curs = NULL;
e1de726c 386 git_off_t curpos = *obj_offset;
7d0cdf82
CMN
387 int error;
388
389 size_t size = 0;
390 git_otype type;
391
392 /*
393 * TODO: optionally check the CRC on the packfile
394 */
395
396 obj->data = NULL;
397 obj->len = 0;
398 obj->type = GIT_OBJ_BAD;
399
400 error = git_packfile_unpack_header(&size, &type, &p->mwf, &w_curs, &curpos);
45d773ef
CMN
401 git_mwindow_close(&w_curs);
402
e1de726c
RB
403 if (error < 0)
404 return error;
7d0cdf82
CMN
405
406 switch (type) {
407 case GIT_OBJ_OFS_DELTA:
408 case GIT_OBJ_REF_DELTA:
409 error = packfile_unpack_delta(
b5b474dd
CMN
410 obj, p, &w_curs, &curpos,
411 size, type, *obj_offset);
7d0cdf82
CMN
412 break;
413
414 case GIT_OBJ_COMMIT:
415 case GIT_OBJ_TREE:
416 case GIT_OBJ_BLOB:
417 case GIT_OBJ_TAG:
418 error = packfile_unpack_compressed(
b5b474dd 419 obj, p, &w_curs, &curpos,
7d0cdf82
CMN
420 size, type);
421 break;
422
423 default:
e1de726c 424 error = packfile_error("invalid packfile type in header");;
7d0cdf82
CMN
425 break;
426 }
427
b5b474dd 428 *obj_offset = curpos;
e1de726c 429 return error;
7d0cdf82
CMN
430}
431
282283ac
RB
432static void *use_git_alloc(void *opaq, unsigned int count, unsigned int size)
433{
434 GIT_UNUSED(opaq);
435 return git__calloc(count, size);
436}
437
438static void use_git_free(void *opaq, void *ptr)
439{
440 GIT_UNUSED(opaq);
441 git__free(ptr);
442}
443
46635339
CMN
444int git_packfile_stream_open(git_packfile_stream *obj, struct git_pack_file *p, git_off_t curpos)
445{
446 int st;
447
448 memset(obj, 0, sizeof(git_packfile_stream));
449 obj->curpos = curpos;
450 obj->p = p;
451 obj->zstream.zalloc = use_git_alloc;
452 obj->zstream.zfree = use_git_free;
453 obj->zstream.next_in = Z_NULL;
454 obj->zstream.next_out = Z_NULL;
455 st = inflateInit(&obj->zstream);
456 if (st != Z_OK) {
457 git__free(obj);
458 giterr_set(GITERR_ZLIB, "Failed to inflate packfile");
459 return -1;
460 }
461
462 return 0;
463}
464
465ssize_t git_packfile_stream_read(git_packfile_stream *obj, void *buffer, size_t len)
466{
467 unsigned char *in;
468 size_t written;
469 int st;
470
471 if (obj->done)
472 return 0;
473
474 in = pack_window_open(obj->p, &obj->mw, obj->curpos, &obj->zstream.avail_in);
475 if (in == NULL)
476 return GIT_EBUFS;
477
478 obj->zstream.next_out = buffer;
479 obj->zstream.avail_out = len;
480 obj->zstream.next_in = in;
481
482 st = inflate(&obj->zstream, Z_SYNC_FLUSH);
483 git_mwindow_close(&obj->mw);
484
485 obj->curpos += obj->zstream.next_in - in;
486 written = len - obj->zstream.avail_out;
487
488 if (st != Z_OK && st != Z_STREAM_END) {
489 giterr_set(GITERR_ZLIB, "Failed to inflate packfile");
490 return -1;
491 }
492
493 if (st == Z_STREAM_END)
494 obj->done = 1;
495
496
497 /* If we didn't write anything out but we're not done, we need more data */
498 if (!written && st != Z_STREAM_END)
499 return GIT_EBUFS;
500
501 return written;
502
503}
504
505void git_packfile_stream_free(git_packfile_stream *obj)
506{
507 inflateEnd(&obj->zstream);
508}
509
7d0cdf82 510int packfile_unpack_compressed(
e1de726c
RB
511 git_rawobj *obj,
512 struct git_pack_file *p,
513 git_mwindow **w_curs,
514 git_off_t *curpos,
515 size_t size,
516 git_otype type)
7d0cdf82
CMN
517{
518 int st;
519 z_stream stream;
520 unsigned char *buffer, *in;
521
e1de726c
RB
522 buffer = git__calloc(1, size + 1);
523 GITERR_CHECK_ALLOC(buffer);
7d0cdf82
CMN
524
525 memset(&stream, 0, sizeof(stream));
526 stream.next_out = buffer;
1c3fac4d 527 stream.avail_out = (uInt)size + 1;
282283ac
RB
528 stream.zalloc = use_git_alloc;
529 stream.zfree = use_git_free;
7d0cdf82
CMN
530
531 st = inflateInit(&stream);
532 if (st != Z_OK) {
3286c408 533 git__free(buffer);
e1de726c 534 giterr_set(GITERR_ZLIB, "Failed to inflate packfile");
45d773ef 535
e1de726c 536 return -1;
7d0cdf82
CMN
537 }
538
539 do {
b5b474dd 540 in = pack_window_open(p, w_curs, *curpos, &stream.avail_in);
7d0cdf82
CMN
541 stream.next_in = in;
542 st = inflate(&stream, Z_FINISH);
45d773ef 543 git_mwindow_close(w_curs);
7d0cdf82
CMN
544
545 if (!stream.avail_out)
546 break; /* the payload is larger than it should be */
547
45d773ef
CMN
548 if (st == Z_BUF_ERROR && in == NULL) {
549 inflateEnd(&stream);
550 git__free(buffer);
904b67e6 551 return GIT_EBUFS;
45d773ef
CMN
552 }
553
b5b474dd 554 *curpos += stream.next_in - in;
7d0cdf82
CMN
555 } while (st == Z_OK || st == Z_BUF_ERROR);
556
557 inflateEnd(&stream);
558
559 if ((st != Z_STREAM_END) || stream.total_out != size) {
3286c408 560 git__free(buffer);
e1de726c
RB
561 giterr_set(GITERR_ZLIB, "Failed to inflate packfile");
562 return -1;
7d0cdf82
CMN
563 }
564
565 obj->type = type;
566 obj->len = size;
567 obj->data = buffer;
e1de726c 568 return 0;
7d0cdf82
CMN
569}
570
b5b474dd
CMN
571/*
572 * curpos is where the data starts, delta_obj_offset is the where the
573 * header starts
574 */
e1de726c
RB
575git_off_t get_delta_base(
576 struct git_pack_file *p,
577 git_mwindow **w_curs,
578 git_off_t *curpos,
579 git_otype type,
580 git_off_t delta_obj_offset)
7d0cdf82 581{
45d773ef
CMN
582 unsigned int left = 0;
583 unsigned char *base_info;
e1de726c 584 git_off_t base_offset;
7d0cdf82
CMN
585 git_oid unused;
586
45d773ef
CMN
587 base_info = pack_window_open(p, w_curs, *curpos, &left);
588 /* Assumption: the only reason this would fail is because the file is too small */
589 if (base_info == NULL)
904b67e6 590 return GIT_EBUFS;
7d0cdf82
CMN
591 /* pack_window_open() assured us we have [base_info, base_info + 20)
592 * as a range that we can look at without walking off the
87d9869f
VM
593 * end of the mapped window. Its actually the hash size
594 * that is assured. An OFS_DELTA longer than the hash size
7d0cdf82
CMN
595 * is stupid, as then a REF_DELTA would be smaller to store.
596 */
597 if (type == GIT_OBJ_OFS_DELTA) {
598 unsigned used = 0;
599 unsigned char c = base_info[used++];
600 base_offset = c & 127;
601 while (c & 128) {
45d773ef 602 if (left <= used)
904b67e6 603 return GIT_EBUFS;
7d0cdf82
CMN
604 base_offset += 1;
605 if (!base_offset || MSB(base_offset, 7))
87d9869f 606 return 0; /* overflow */
7d0cdf82
CMN
607 c = base_info[used++];
608 base_offset = (base_offset << 7) + (c & 127);
609 }
610 base_offset = delta_obj_offset - base_offset;
611 if (base_offset <= 0 || base_offset >= delta_obj_offset)
87d9869f 612 return 0; /* out of bound */
7d0cdf82
CMN
613 *curpos += used;
614 } else if (type == GIT_OBJ_REF_DELTA) {
c1af5a39
CMN
615 /* If we have the cooperative cache, search in it first */
616 if (p->has_cache) {
617 int pos;
618 struct git_pack_entry key;
619
620 git_oid_fromraw(&key.sha1, base_info);
621 pos = git_vector_bsearch(&p->cache, &key);
622 if (pos >= 0) {
623 *curpos += 20;
624 return ((struct git_pack_entry *)git_vector_get(&p->cache, pos))->offset;
625 }
626 }
7d0cdf82 627 /* The base entry _must_ be in the same pack */
e1de726c
RB
628 if (pack_entry_find_offset(&base_offset, &unused, p, (git_oid *)base_info, GIT_OID_HEXSZ) < 0)
629 return packfile_error("base entry delta is not in the same pack");
7d0cdf82
CMN
630 *curpos += 20;
631 } else
632 return 0;
633
634 return base_offset;
635}
a070f152
CMN
636
637/***********************************************************
638 *
639 * PACKFILE METHODS
640 *
641 ***********************************************************/
642
44ef8b1b 643static struct git_pack_file *packfile_alloc(size_t extra)
a070f152 644{
e1de726c
RB
645 struct git_pack_file *p = git__calloc(1, sizeof(*p) + extra);
646 if (p != NULL)
647 p->mwf.fd = -1;
a070f152
CMN
648 return p;
649}
650
651
652void packfile_free(struct git_pack_file *p)
653{
654 assert(p);
655
656 /* clear_delta_base_cache(); */
657 git_mwindow_free_all(&p->mwf);
1d8943c6 658 git_mwindow_file_deregister(&p->mwf);
a070f152
CMN
659
660 if (p->mwf.fd != -1)
661 p_close(p->mwf.fd);
662
663 pack_index_free(p);
664
3286c408
VM
665 git__free(p->bad_object_sha1);
666 git__free(p);
a070f152
CMN
667}
668
669static int packfile_open(struct git_pack_file *p)
670{
671 struct stat st;
672 struct git_pack_header hdr;
673 git_oid sha1;
674 unsigned char *idx_sha1;
675
e1de726c
RB
676 assert(p->index_map.data);
677
0d0fa7c3 678 if (!p->index_map.data && pack_index_open(p) < 0)
282283ac 679 return git_odb__error_notfound("failed to open packfile", NULL);
a070f152
CMN
680
681 /* TODO: open with noatime */
e1de726c 682 p->mwf.fd = git_futils_open_ro(p->pack_name);
fcb48e06
SS
683 if (p->mwf.fd < 0) {
684 p->mwf.fd = -1;
685 return -1;
686 }
a070f152 687
e1de726c
RB
688 if (p_fstat(p->mwf.fd, &st) < 0 ||
689 git_mwindow_file_register(&p->mwf) < 0)
690 goto cleanup;
a070f152
CMN
691
692 /* If we created the struct before we had the pack we lack size. */
693 if (!p->mwf.size) {
694 if (!S_ISREG(st.st_mode))
695 goto cleanup;
e1de726c 696 p->mwf.size = (git_off_t)st.st_size;
a070f152
CMN
697 } else if (p->mwf.size != st.st_size)
698 goto cleanup;
699
700#if 0
701 /* We leave these file descriptors open with sliding mmap;
702 * there is no point keeping them open across exec(), though.
703 */
704 fd_flag = fcntl(p->mwf.fd, F_GETFD, 0);
705 if (fd_flag < 0)
e1de726c 706 goto cleanup;
a070f152
CMN
707
708 fd_flag |= FD_CLOEXEC;
709 if (fcntl(p->pack_fd, F_SETFD, fd_flag) == -1)
e1de726c 710 goto cleanup;
a070f152
CMN
711#endif
712
713 /* Verify we recognize this pack file format. */
e1de726c
RB
714 if (p_read(p->mwf.fd, &hdr, sizeof(hdr)) < 0 ||
715 hdr.hdr_signature != htonl(PACK_SIGNATURE) ||
716 !pack_version_ok(hdr.hdr_version))
a070f152
CMN
717 goto cleanup;
718
719 /* Verify the pack matches its index. */
e1de726c
RB
720 if (p->num_objects != ntohl(hdr.hdr_entries) ||
721 p_lseek(p->mwf.fd, p->mwf.size - GIT_OID_RAWSZ, SEEK_SET) == -1 ||
722 p_read(p->mwf.fd, sha1.id, GIT_OID_RAWSZ) < 0)
a070f152
CMN
723 goto cleanup;
724
725 idx_sha1 = ((unsigned char *)p->index_map.data) + p->index_map.len - 40;
726
e1de726c
RB
727 if (git_oid_cmp(&sha1, (git_oid *)idx_sha1) == 0)
728 return 0;
a070f152
CMN
729
730cleanup:
e1de726c 731 giterr_set(GITERR_OS, "Invalid packfile '%s'", p->pack_name);
a070f152
CMN
732 p_close(p->mwf.fd);
733 p->mwf.fd = -1;
e1de726c 734 return -1;
a070f152
CMN
735}
736
737int git_packfile_check(struct git_pack_file **pack_out, const char *path)
738{
739 struct stat st;
740 struct git_pack_file *p;
741 size_t path_len;
742
743 *pack_out = NULL;
744 path_len = strlen(path);
745 p = packfile_alloc(path_len + 2);
e1de726c 746 GITERR_CHECK_ALLOC(p);
a070f152
CMN
747
748 /*
749 * Make sure a corresponding .pack file exists and that
750 * the index looks sane.
751 */
932669b8 752 path_len -= strlen(".idx");
a070f152 753 if (path_len < 1) {
3286c408 754 git__free(p);
282283ac 755 return git_odb__error_notfound("invalid packfile path", NULL);
a070f152
CMN
756 }
757
758 memcpy(p->pack_name, path, path_len);
759
760 strcpy(p->pack_name + path_len, ".keep");
1a481123 761 if (git_path_exists(p->pack_name) == true)
a070f152
CMN
762 p->pack_keep = 1;
763
764 strcpy(p->pack_name + path_len, ".pack");
e1de726c 765 if (p_stat(p->pack_name, &st) < 0 || !S_ISREG(st.st_mode)) {
3286c408 766 git__free(p);
282283ac 767 return git_odb__error_notfound("packfile not found", NULL);
a070f152
CMN
768 }
769
770 /* ok, it looks sane as far as we can check without
771 * actually mapping the pack file.
772 */
1af56d7d 773 p->mwf.size = st.st_size;
a070f152
CMN
774 p->pack_local = 1;
775 p->mtime = (git_time_t)st.st_mtime;
776
777 /* see if we can parse the sha1 oid in the packfile name */
778 if (path_len < 40 ||
e1de726c 779 git_oid_fromstr(&p->sha1, path + path_len - GIT_OID_HEXSZ) < 0)
a070f152
CMN
780 memset(&p->sha1, 0x0, GIT_OID_RAWSZ);
781
782 *pack_out = p;
e1de726c
RB
783
784 return 0;
a070f152
CMN
785}
786
787/***********************************************************
788 *
789 * PACKFILE ENTRY SEARCH INTERNALS
790 *
791 ***********************************************************/
792
e1de726c 793static git_off_t nth_packed_object_offset(const struct git_pack_file *p, uint32_t n)
a070f152
CMN
794{
795 const unsigned char *index = p->index_map.data;
796 index += 4 * 256;
797 if (p->index_version == 1) {
798 return ntohl(*((uint32_t *)(index + 24 * n)));
799 } else {
800 uint32_t off;
801 index += 8 + p->num_objects * (20 + 4);
802 off = ntohl(*((uint32_t *)(index + 4 * n)));
803 if (!(off & 0x80000000))
804 return off;
805 index += p->num_objects * 4 + (off & 0x7fffffff) * 8;
806 return (((uint64_t)ntohl(*((uint32_t *)(index + 0)))) << 32) |
87d9869f 807 ntohl(*((uint32_t *)(index + 4)));
a070f152
CMN
808 }
809}
810
60ecdf59
DMB
811static int git__memcmp4(const void *a, const void *b) {
812 return memcmp(a, b, 4);
813}
814
521aedad 815int git_pack_foreach_entry(
5dca2010 816 struct git_pack_file *p,
c3fb7d04 817 git_odb_foreach_cb cb,
5dca2010 818 void *data)
521aedad
CMN
819{
820 const unsigned char *index = p->index_map.data, *current;
521aedad
CMN
821 uint32_t i;
822
823 if (index == NULL) {
824 int error;
825
826 if ((error = pack_index_open(p)) < 0)
827 return error;
828
829 assert(p->index_map.data);
830
831 index = p->index_map.data;
832 }
833
834 if (p->index_version > 1) {
835 index += 8;
836 }
837
838 index += 4 * 256;
839
60ecdf59
DMB
840 if (p->oids == NULL) {
841 git_vector offsets, oids;
842 int error;
521aedad 843
60ecdf59
DMB
844 if ((error = git_vector_init(&oids, p->num_objects, NULL)))
845 return error;
846
847 if ((error = git_vector_init(&offsets, p->num_objects, git__memcmp4)))
848 return error;
5dca2010 849
60ecdf59
DMB
850 if (p->index_version > 1) {
851 const unsigned char *off = index + 24 * p->num_objects;
852 for (i = 0; i < p->num_objects; i++)
853 git_vector_insert(&offsets, (void*)&off[4 * i]);
854 git_vector_sort(&offsets);
855 git_vector_foreach(&offsets, i, current)
856 git_vector_insert(&oids, (void*)&index[5 * (current - off)]);
857 } else {
858 for (i = 0; i < p->num_objects; i++)
859 git_vector_insert(&offsets, (void*)&index[24 * i]);
860 git_vector_sort(&offsets);
861 git_vector_foreach(&offsets, i, current)
862 git_vector_insert(&oids, (void*)&current[4]);
863 }
864 git_vector_free(&offsets);
865 p->oids = (git_oid **)oids.contents;
521aedad
CMN
866 }
867
60ecdf59
DMB
868 for (i = 0; i < p->num_objects; i++)
869 if (cb(p->oids[i], data))
870 return GIT_EUSER;
871
521aedad
CMN
872 return 0;
873}
874
a070f152 875static int pack_entry_find_offset(
e1de726c
RB
876 git_off_t *offset_out,
877 git_oid *found_oid,
878 struct git_pack_file *p,
879 const git_oid *short_oid,
b8457baa 880 size_t len)
a070f152
CMN
881{
882 const uint32_t *level1_ofs = p->index_map.data;
883 const unsigned char *index = p->index_map.data;
884 unsigned hi, lo, stride;
885 int pos, found = 0;
886 const unsigned char *current = 0;
887
888 *offset_out = 0;
889
890 if (index == NULL) {
891 int error;
892
e1de726c
RB
893 if ((error = pack_index_open(p)) < 0)
894 return error;
a070f152
CMN
895
896 assert(p->index_map.data);
897
898 index = p->index_map.data;
899 level1_ofs = p->index_map.data;
900 }
901
902 if (p->index_version > 1) {
903 level1_ofs += 2;
904 index += 8;
905 }
906
907 index += 4 * 256;
908 hi = ntohl(level1_ofs[(int)short_oid->id[0]]);
909 lo = ((short_oid->id[0] == 0x0) ? 0 : ntohl(level1_ofs[(int)short_oid->id[0] - 1]));
910
911 if (p->index_version > 1) {
912 stride = 20;
913 } else {
914 stride = 24;
915 index += 4;
916 }
917
918#ifdef INDEX_DEBUG_LOOKUP
919 printf("%02x%02x%02x... lo %u hi %u nr %d\n",
920 short_oid->id[0], short_oid->id[1], short_oid->id[2], lo, hi, p->num_objects);
921#endif
922
923 /* Use git.git lookup code */
87d9869f 924 pos = sha1_entry_pos(index, stride, 0, lo, hi, p->num_objects, short_oid->id);
a070f152
CMN
925
926 if (pos >= 0) {
927 /* An object matching exactly the oid was found */
928 found = 1;
929 current = index + pos * stride;
930 } else {
931 /* No object was found */
932 /* pos refers to the object with the "closest" oid to short_oid */
933 pos = - 1 - pos;
934 if (pos < (int)p->num_objects) {
935 current = index + pos * stride;
936
282283ac 937 if (!git_oid_ncmp(short_oid, (const git_oid *)current, len))
a070f152 938 found = 1;
a070f152
CMN
939 }
940 }
941
b2a2702d 942 if (found && len != GIT_OID_HEXSZ && pos + 1 < (int)p->num_objects) {
a070f152
CMN
943 /* Check for ambiguousity */
944 const unsigned char *next = current + stride;
945
946 if (!git_oid_ncmp(short_oid, (const git_oid *)next, len)) {
947 found = 2;
948 }
949 }
950
e1de726c 951 if (!found)
282283ac 952 return git_odb__error_notfound("failed to find offset for pack entry", short_oid);
e1de726c
RB
953 if (found > 1)
954 return git_odb__error_ambiguous("found multiple offsets for pack entry");
955 *offset_out = nth_packed_object_offset(p, pos);
956 git_oid_fromraw(found_oid, current);
a070f152
CMN
957
958#ifdef INDEX_DEBUG_LOOKUP
e1de726c 959 {
a070f152
CMN
960 unsigned char hex_sha1[GIT_OID_HEXSZ + 1];
961 git_oid_fmt(hex_sha1, found_oid);
962 hex_sha1[GIT_OID_HEXSZ] = '\0';
963 printf("found lo=%d %s\n", lo, hex_sha1);
a070f152 964 }
e1de726c
RB
965#endif
966 return 0;
a070f152
CMN
967}
968
969int git_pack_entry_find(
970 struct git_pack_entry *e,
971 struct git_pack_file *p,
972 const git_oid *short_oid,
b8457baa 973 size_t len)
a070f152 974{
e1de726c 975 git_off_t offset;
a070f152
CMN
976 git_oid found_oid;
977 int error;
978
979 assert(p);
980
981 if (len == GIT_OID_HEXSZ && p->num_bad_objects) {
982 unsigned i;
983 for (i = 0; i < p->num_bad_objects; i++)
984 if (git_oid_cmp(short_oid, &p->bad_object_sha1[i]) == 0)
e1de726c 985 return packfile_error("bad object found in packfile");
a070f152
CMN
986 }
987
988 error = pack_entry_find_offset(&offset, &found_oid, p, short_oid, len);
e1de726c
RB
989 if (error < 0)
990 return error;
a070f152
CMN
991
992 /* we found a unique entry in the index;
993 * make sure the packfile backing the index
994 * still exists on disk */
e1de726c
RB
995 if (p->mwf.fd == -1 && (error = packfile_open(p)) < 0)
996 return error;
a070f152
CMN
997
998 e->offset = offset;
999 e->p = p;
1000
1001 git_oid_cpy(&e->sha1, &found_oid);
e1de726c 1002 return 0;
a070f152 1003}