]>
Commit | Line | Data |
---|---|---|
7d0cdf82 | 1 | /* |
bb742ede | 2 | * Copyright (C) 2009-2011 the libgit2 contributors |
7d0cdf82 | 3 | * |
bb742ede VM |
4 | * This file is part of libgit2, distributed under the GNU GPL v2 with |
5 | * a Linking Exception. For full terms see the included COPYING file. | |
7d0cdf82 CMN |
6 | */ |
7 | ||
8 | #include "mwindow.h" | |
9 | #include "odb.h" | |
10 | #include "pack.h" | |
11 | #include "delta-apply.h" | |
a070f152 | 12 | #include "sha1_lookup.h" |
7d0cdf82 CMN |
13 | |
14 | #include "git2/oid.h" | |
15 | #include "git2/zlib.h" | |
16 | ||
a070f152 CMN |
17 | static int packfile_open(struct git_pack_file *p); |
18 | static off_t nth_packed_object_offset(const struct git_pack_file *p, uint32_t n); | |
19 | int packfile_unpack_compressed( | |
20 | git_rawobj *obj, | |
21 | struct git_pack_file *p, | |
22 | git_mwindow **w_curs, | |
b5b474dd | 23 | off_t *curpos, |
a070f152 CMN |
24 | size_t size, |
25 | git_otype type); | |
26 | ||
27 | /* Can find the offset of an object given | |
28 | * a prefix of an identifier. | |
29 | * Throws GIT_EAMBIGUOUSOIDPREFIX if short oid | |
30 | * is ambiguous within the pack. | |
31 | * This method assumes that len is between | |
32 | * GIT_OID_MINPREFIXLEN and GIT_OID_HEXSZ. | |
33 | */ | |
34 | static int pack_entry_find_offset( | |
35 | off_t *offset_out, | |
36 | git_oid *found_oid, | |
37 | struct git_pack_file *p, | |
38 | const git_oid *short_oid, | |
39 | unsigned int len); | |
40 | ||
41 | /*********************************************************** | |
42 | * | |
43 | * PACK INDEX METHODS | |
44 | * | |
45 | ***********************************************************/ | |
46 | ||
47 | static void pack_index_free(struct git_pack_file *p) | |
48 | { | |
49 | if (p->index_map.data) { | |
50 | git_futils_mmap_free(&p->index_map); | |
51 | p->index_map.data = NULL; | |
52 | } | |
53 | } | |
54 | ||
87d9869f | 55 | static int pack_index_check(const char *path, struct git_pack_file *p) |
a070f152 CMN |
56 | { |
57 | struct git_pack_idx_header *hdr; | |
58 | uint32_t version, nr, i, *index; | |
59 | ||
60 | void *idx_map; | |
61 | size_t idx_size; | |
62 | ||
63 | struct stat st; | |
64 | ||
65 | /* TODO: properly open the file without access time */ | |
66 | git_file fd = p_open(path, O_RDONLY /*| O_NOATIME */); | |
67 | ||
68 | int error; | |
69 | ||
70 | if (fd < 0) | |
71 | return git__throw(GIT_EOSERR, "Failed to check index. File missing or corrupted"); | |
72 | ||
73 | if (p_fstat(fd, &st) < GIT_SUCCESS) { | |
74 | p_close(fd); | |
75 | return git__throw(GIT_EOSERR, "Failed to check index. File appears to be corrupted"); | |
76 | } | |
77 | ||
78 | if (!git__is_sizet(st.st_size)) | |
79 | return GIT_ENOMEM; | |
80 | ||
81 | idx_size = (size_t)st.st_size; | |
82 | ||
83 | if (idx_size < 4 * 256 + 20 + 20) { | |
84 | p_close(fd); | |
85 | return git__throw(GIT_EOBJCORRUPTED, "Failed to check index. Object is corrupted"); | |
86 | } | |
87 | ||
88 | error = git_futils_mmap_ro(&p->index_map, fd, 0, idx_size); | |
89 | p_close(fd); | |
90 | ||
91 | if (error < GIT_SUCCESS) | |
92 | return git__rethrow(error, "Failed to check index"); | |
93 | ||
94 | hdr = idx_map = p->index_map.data; | |
95 | ||
96 | if (hdr->idx_signature == htonl(PACK_IDX_SIGNATURE)) { | |
97 | version = ntohl(hdr->idx_version); | |
98 | ||
99 | if (version < 2 || version > 2) { | |
100 | git_futils_mmap_free(&p->index_map); | |
101 | return git__throw(GIT_EOBJCORRUPTED, "Failed to check index. Unsupported index version"); | |
102 | } | |
103 | ||
104 | } else | |
105 | version = 1; | |
106 | ||
107 | nr = 0; | |
108 | index = idx_map; | |
109 | ||
110 | if (version > 1) | |
87d9869f | 111 | index += 2; /* skip index header */ |
a070f152 CMN |
112 | |
113 | for (i = 0; i < 256; i++) { | |
114 | uint32_t n = ntohl(index[i]); | |
115 | if (n < nr) { | |
116 | git_futils_mmap_free(&p->index_map); | |
117 | return git__throw(GIT_EOBJCORRUPTED, "Failed to check index. Index is non-monotonic"); | |
118 | } | |
119 | nr = n; | |
120 | } | |
121 | ||
122 | if (version == 1) { | |
123 | /* | |
124 | * Total size: | |
87d9869f VM |
125 | * - 256 index entries 4 bytes each |
126 | * - 24-byte entries * nr (20-byte sha1 + 4-byte offset) | |
127 | * - 20-byte SHA1 of the packfile | |
128 | * - 20-byte SHA1 file checksum | |
a070f152 CMN |
129 | */ |
130 | if (idx_size != 4*256 + nr * 24 + 20 + 20) { | |
131 | git_futils_mmap_free(&p->index_map); | |
132 | return git__throw(GIT_EOBJCORRUPTED, "Failed to check index. Object is corrupted"); | |
133 | } | |
134 | } else if (version == 2) { | |
135 | /* | |
136 | * Minimum size: | |
87d9869f VM |
137 | * - 8 bytes of header |
138 | * - 256 index entries 4 bytes each | |
139 | * - 20-byte sha1 entry * nr | |
140 | * - 4-byte crc entry * nr | |
141 | * - 4-byte offset entry * nr | |
142 | * - 20-byte SHA1 of the packfile | |
143 | * - 20-byte SHA1 file checksum | |
a070f152 CMN |
144 | * And after the 4-byte offset table might be a |
145 | * variable sized table containing 8-byte entries | |
146 | * for offsets larger than 2^31. | |
147 | */ | |
148 | unsigned long min_size = 8 + 4*256 + nr*(20 + 4 + 4) + 20 + 20; | |
149 | unsigned long max_size = min_size; | |
150 | ||
151 | if (nr) | |
152 | max_size += (nr - 1)*8; | |
153 | ||
154 | if (idx_size < min_size || idx_size > max_size) { | |
155 | git_futils_mmap_free(&p->index_map); | |
156 | return git__throw(GIT_EOBJCORRUPTED, "Failed to check index. Wrong index size"); | |
157 | } | |
158 | ||
159 | /* Make sure that off_t is big enough to access the whole pack... | |
160 | * Is this an issue in libgit2? It shouldn't. */ | |
161 | if (idx_size != min_size && (sizeof(off_t) <= 4)) { | |
162 | git_futils_mmap_free(&p->index_map); | |
163 | return git__throw(GIT_EOSERR, "Failed to check index. off_t not big enough to access the whole pack"); | |
164 | } | |
165 | } | |
166 | ||
167 | p->index_version = version; | |
168 | p->num_objects = nr; | |
169 | return GIT_SUCCESS; | |
170 | } | |
171 | ||
172 | static int pack_index_open(struct git_pack_file *p) | |
173 | { | |
174 | char *idx_name; | |
175 | int error; | |
176 | ||
177 | if (p->index_map.data) | |
178 | return GIT_SUCCESS; | |
179 | ||
180 | idx_name = git__strdup(p->pack_name); | |
932669b8 | 181 | strcpy(idx_name + strlen(idx_name) - strlen(".pack"), ".idx"); |
a070f152 CMN |
182 | |
183 | error = pack_index_check(idx_name, p); | |
184 | free(idx_name); | |
185 | ||
186 | return error == GIT_SUCCESS ? GIT_SUCCESS : git__rethrow(error, "Failed to open index"); | |
187 | } | |
188 | ||
189 | static unsigned char *pack_window_open( | |
190 | struct git_pack_file *p, | |
7d0cdf82 CMN |
191 | git_mwindow **w_cursor, |
192 | off_t offset, | |
193 | unsigned int *left) | |
194 | { | |
195 | if (p->mwf.fd == -1 && packfile_open(p) < GIT_SUCCESS) | |
196 | return NULL; | |
197 | ||
198 | /* Since packfiles end in a hash of their content and it's | |
199 | * pointless to ask for an offset into the middle of that | |
200 | * hash, and the pack_window_contains function above wouldn't match | |
201 | * don't allow an offset too close to the end of the file. | |
202 | */ | |
203 | if (offset > (p->mwf.size - 20)) | |
204 | return NULL; | |
205 | ||
206 | return git_mwindow_open(&p->mwf, w_cursor, offset, 20, left); | |
207 | } | |
208 | ||
209 | static unsigned long packfile_unpack_header1( | |
210 | size_t *sizep, | |
211 | git_otype *type, | |
212 | const unsigned char *buf, | |
213 | unsigned long len) | |
214 | { | |
215 | unsigned shift; | |
216 | unsigned long size, c; | |
217 | unsigned long used = 0; | |
218 | ||
219 | c = buf[used++]; | |
220 | *type = (c >> 4) & 7; | |
221 | size = c & 15; | |
222 | shift = 4; | |
223 | while (c & 0x80) { | |
224 | if (len <= used || bitsizeof(long) <= shift) | |
225 | return 0; | |
226 | ||
227 | c = buf[used++]; | |
228 | size += (c & 0x7f) << shift; | |
229 | shift += 7; | |
230 | } | |
231 | ||
232 | *sizep = (size_t)size; | |
233 | return used; | |
234 | } | |
235 | ||
236 | int git_packfile_unpack_header( | |
237 | size_t *size_p, | |
238 | git_otype *type_p, | |
239 | git_mwindow_file *mwf, | |
240 | git_mwindow **w_curs, | |
241 | off_t *curpos) | |
242 | { | |
243 | unsigned char *base; | |
244 | unsigned int left; | |
245 | unsigned long used; | |
246 | ||
247 | /* pack_window_open() assures us we have [base, base + 20) available | |
87d9869f VM |
248 | * as a range that we can look at at. (Its actually the hash |
249 | * size that is assured.) With our object header encoding | |
7d0cdf82 CMN |
250 | * the maximum deflated object size is 2^137, which is just |
251 | * insane, so we know won't exceed what we have been given. | |
252 | */ | |
253 | // base = pack_window_open(p, w_curs, *curpos, &left); | |
254 | base = git_mwindow_open(mwf, w_curs, *curpos, 20, &left); | |
255 | if (base == NULL) | |
256 | return GIT_ENOMEM; | |
257 | ||
258 | used = packfile_unpack_header1(size_p, type_p, base, left); | |
259 | ||
260 | if (used == 0) | |
261 | return git__throw(GIT_EOBJCORRUPTED, "Header length is zero"); | |
262 | ||
263 | *curpos += used; | |
264 | return GIT_SUCCESS; | |
265 | } | |
266 | ||
a070f152 | 267 | static int packfile_unpack_delta( |
7d0cdf82 | 268 | git_rawobj *obj, |
a070f152 | 269 | struct git_pack_file *p, |
7d0cdf82 | 270 | git_mwindow **w_curs, |
b5b474dd | 271 | off_t *curpos, |
7d0cdf82 CMN |
272 | size_t delta_size, |
273 | git_otype delta_type, | |
274 | off_t obj_offset) | |
275 | { | |
276 | off_t base_offset; | |
277 | git_rawobj base, delta; | |
278 | int error; | |
279 | ||
b5b474dd | 280 | base_offset = get_delta_base(p, w_curs, curpos, delta_type, obj_offset); |
7d0cdf82 CMN |
281 | if (base_offset == 0) |
282 | return git__throw(GIT_EOBJCORRUPTED, "Delta offset is zero"); | |
97f40a0d CMN |
283 | if (base_offset < 0) |
284 | return git__rethrow(base_offset, "Failed to get delta base"); | |
7d0cdf82 CMN |
285 | |
286 | git_mwindow_close(w_curs); | |
b5b474dd | 287 | error = git_packfile_unpack(&base, p, &base_offset); |
7d0cdf82 CMN |
288 | |
289 | /* | |
290 | * TODO: git.git tries to load the base from other packfiles | |
291 | * or loose objects. | |
292 | * | |
293 | * We'll need to do this in order to support thin packs. | |
294 | */ | |
295 | if (error < GIT_SUCCESS) | |
296 | return git__rethrow(error, "Corrupted delta"); | |
297 | ||
298 | error = packfile_unpack_compressed(&delta, p, w_curs, curpos, delta_size, delta_type); | |
299 | if (error < GIT_SUCCESS) { | |
300 | free(base.data); | |
301 | return git__rethrow(error, "Corrupted delta"); | |
302 | } | |
303 | ||
304 | obj->type = base.type; | |
305 | error = git__delta_apply(obj, | |
306 | base.data, base.len, | |
307 | delta.data, delta.len); | |
308 | ||
309 | free(base.data); | |
310 | free(delta.data); | |
311 | ||
312 | /* TODO: we might want to cache this shit. eventually */ | |
313 | //add_delta_base_cache(p, base_offset, base, base_size, *type); | |
314 | return error; /* error set by git__delta_apply */ | |
315 | } | |
316 | ||
a070f152 | 317 | int git_packfile_unpack( |
7d0cdf82 | 318 | git_rawobj *obj, |
a070f152 | 319 | struct git_pack_file *p, |
b5b474dd | 320 | off_t *obj_offset) |
7d0cdf82 CMN |
321 | { |
322 | git_mwindow *w_curs = NULL; | |
b5b474dd | 323 | off_t curpos = *obj_offset; |
7d0cdf82 CMN |
324 | int error; |
325 | ||
326 | size_t size = 0; | |
327 | git_otype type; | |
328 | ||
329 | /* | |
330 | * TODO: optionally check the CRC on the packfile | |
331 | */ | |
332 | ||
333 | obj->data = NULL; | |
334 | obj->len = 0; | |
335 | obj->type = GIT_OBJ_BAD; | |
336 | ||
337 | error = git_packfile_unpack_header(&size, &type, &p->mwf, &w_curs, &curpos); | |
338 | if (error < GIT_SUCCESS) | |
339 | return git__rethrow(error, "Failed to unpack packfile"); | |
340 | ||
341 | switch (type) { | |
342 | case GIT_OBJ_OFS_DELTA: | |
343 | case GIT_OBJ_REF_DELTA: | |
344 | error = packfile_unpack_delta( | |
b5b474dd CMN |
345 | obj, p, &w_curs, &curpos, |
346 | size, type, *obj_offset); | |
7d0cdf82 CMN |
347 | break; |
348 | ||
349 | case GIT_OBJ_COMMIT: | |
350 | case GIT_OBJ_TREE: | |
351 | case GIT_OBJ_BLOB: | |
352 | case GIT_OBJ_TAG: | |
353 | error = packfile_unpack_compressed( | |
b5b474dd | 354 | obj, p, &w_curs, &curpos, |
7d0cdf82 CMN |
355 | size, type); |
356 | break; | |
357 | ||
358 | default: | |
359 | error = GIT_EOBJCORRUPTED; | |
360 | break; | |
361 | } | |
362 | ||
363 | git_mwindow_close(&w_curs); | |
b5b474dd CMN |
364 | |
365 | if (error < GIT_SUCCESS) | |
366 | return git__rethrow(error, "Failed to unpack object"); | |
367 | ||
368 | *obj_offset = curpos; | |
369 | return GIT_SUCCESS; | |
7d0cdf82 CMN |
370 | } |
371 | ||
372 | int packfile_unpack_compressed( | |
373 | git_rawobj *obj, | |
a070f152 | 374 | struct git_pack_file *p, |
7d0cdf82 | 375 | git_mwindow **w_curs, |
b5b474dd | 376 | off_t *curpos, |
7d0cdf82 CMN |
377 | size_t size, |
378 | git_otype type) | |
379 | { | |
380 | int st; | |
381 | z_stream stream; | |
382 | unsigned char *buffer, *in; | |
383 | ||
384 | buffer = git__malloc(size + 1); | |
385 | memset(buffer, 0x0, size + 1); | |
386 | ||
387 | memset(&stream, 0, sizeof(stream)); | |
388 | stream.next_out = buffer; | |
1c3fac4d | 389 | stream.avail_out = (uInt)size + 1; |
7d0cdf82 CMN |
390 | |
391 | st = inflateInit(&stream); | |
392 | if (st != Z_OK) { | |
393 | free(buffer); | |
394 | return git__throw(GIT_EZLIB, "Error in zlib"); | |
395 | } | |
396 | ||
397 | do { | |
b5b474dd | 398 | in = pack_window_open(p, w_curs, *curpos, &stream.avail_in); |
7d0cdf82 CMN |
399 | stream.next_in = in; |
400 | st = inflate(&stream, Z_FINISH); | |
401 | ||
402 | if (!stream.avail_out) | |
403 | break; /* the payload is larger than it should be */ | |
404 | ||
b5b474dd | 405 | *curpos += stream.next_in - in; |
7d0cdf82 CMN |
406 | } while (st == Z_OK || st == Z_BUF_ERROR); |
407 | ||
408 | inflateEnd(&stream); | |
409 | ||
410 | if ((st != Z_STREAM_END) || stream.total_out != size) { | |
411 | free(buffer); | |
412 | return git__throw(GIT_EZLIB, "Error in zlib"); | |
413 | } | |
414 | ||
415 | obj->type = type; | |
416 | obj->len = size; | |
417 | obj->data = buffer; | |
418 | return GIT_SUCCESS; | |
419 | } | |
420 | ||
b5b474dd CMN |
421 | /* |
422 | * curpos is where the data starts, delta_obj_offset is the where the | |
423 | * header starts | |
424 | */ | |
7d0cdf82 | 425 | off_t get_delta_base( |
a070f152 | 426 | struct git_pack_file *p, |
7d0cdf82 CMN |
427 | git_mwindow **w_curs, |
428 | off_t *curpos, | |
429 | git_otype type, | |
430 | off_t delta_obj_offset) | |
431 | { | |
432 | unsigned char *base_info = pack_window_open(p, w_curs, *curpos, NULL); | |
433 | off_t base_offset; | |
434 | git_oid unused; | |
435 | ||
436 | /* pack_window_open() assured us we have [base_info, base_info + 20) | |
437 | * as a range that we can look at without walking off the | |
87d9869f VM |
438 | * end of the mapped window. Its actually the hash size |
439 | * that is assured. An OFS_DELTA longer than the hash size | |
7d0cdf82 CMN |
440 | * is stupid, as then a REF_DELTA would be smaller to store. |
441 | */ | |
442 | if (type == GIT_OBJ_OFS_DELTA) { | |
443 | unsigned used = 0; | |
444 | unsigned char c = base_info[used++]; | |
445 | base_offset = c & 127; | |
446 | while (c & 128) { | |
447 | base_offset += 1; | |
448 | if (!base_offset || MSB(base_offset, 7)) | |
87d9869f | 449 | return 0; /* overflow */ |
7d0cdf82 CMN |
450 | c = base_info[used++]; |
451 | base_offset = (base_offset << 7) + (c & 127); | |
452 | } | |
453 | base_offset = delta_obj_offset - base_offset; | |
454 | if (base_offset <= 0 || base_offset >= delta_obj_offset) | |
87d9869f | 455 | return 0; /* out of bound */ |
7d0cdf82 CMN |
456 | *curpos += used; |
457 | } else if (type == GIT_OBJ_REF_DELTA) { | |
c1af5a39 CMN |
458 | /* If we have the cooperative cache, search in it first */ |
459 | if (p->has_cache) { | |
460 | int pos; | |
461 | struct git_pack_entry key; | |
462 | ||
463 | git_oid_fromraw(&key.sha1, base_info); | |
464 | pos = git_vector_bsearch(&p->cache, &key); | |
465 | if (pos >= 0) { | |
466 | *curpos += 20; | |
467 | return ((struct git_pack_entry *)git_vector_get(&p->cache, pos))->offset; | |
468 | } | |
469 | } | |
7d0cdf82 CMN |
470 | /* The base entry _must_ be in the same pack */ |
471 | if (pack_entry_find_offset(&base_offset, &unused, p, (git_oid *)base_info, GIT_OID_HEXSZ) < GIT_SUCCESS) | |
061047cc | 472 | return git__rethrow(GIT_EPACKCORRUPTED, "Base entry delta is not in the same pack"); |
7d0cdf82 CMN |
473 | *curpos += 20; |
474 | } else | |
475 | return 0; | |
476 | ||
477 | return base_offset; | |
478 | } | |
a070f152 CMN |
479 | |
480 | /*********************************************************** | |
481 | * | |
482 | * PACKFILE METHODS | |
483 | * | |
484 | ***********************************************************/ | |
485 | ||
486 | static struct git_pack_file *packfile_alloc(int extra) | |
487 | { | |
488 | struct git_pack_file *p = git__malloc(sizeof(*p) + extra); | |
489 | memset(p, 0, sizeof(*p)); | |
490 | p->mwf.fd = -1; | |
491 | return p; | |
492 | } | |
493 | ||
494 | ||
495 | void packfile_free(struct git_pack_file *p) | |
496 | { | |
497 | assert(p); | |
498 | ||
499 | /* clear_delta_base_cache(); */ | |
500 | git_mwindow_free_all(&p->mwf); | |
501 | ||
502 | if (p->mwf.fd != -1) | |
503 | p_close(p->mwf.fd); | |
504 | ||
505 | pack_index_free(p); | |
506 | ||
507 | free(p->bad_object_sha1); | |
508 | free(p); | |
509 | } | |
510 | ||
511 | static int packfile_open(struct git_pack_file *p) | |
512 | { | |
513 | struct stat st; | |
514 | struct git_pack_header hdr; | |
515 | git_oid sha1; | |
516 | unsigned char *idx_sha1; | |
517 | ||
518 | if (!p->index_map.data && pack_index_open(p) < GIT_SUCCESS) | |
519 | return git__throw(GIT_ENOTFOUND, "Failed to open packfile. File not found"); | |
520 | ||
521 | /* TODO: open with noatime */ | |
522 | p->mwf.fd = p_open(p->pack_name, O_RDONLY); | |
523 | if (p->mwf.fd < 0 || p_fstat(p->mwf.fd, &st) < GIT_SUCCESS) | |
524 | return git__throw(GIT_EOSERR, "Failed to open packfile. File appears to be corrupted"); | |
525 | ||
526 | if (git_mwindow_file_register(&p->mwf) < GIT_SUCCESS) { | |
527 | p_close(p->mwf.fd); | |
528 | return git__throw(GIT_ERROR, "Failed to register packfile windows"); | |
529 | } | |
530 | ||
531 | /* If we created the struct before we had the pack we lack size. */ | |
532 | if (!p->mwf.size) { | |
533 | if (!S_ISREG(st.st_mode)) | |
534 | goto cleanup; | |
535 | p->mwf.size = (off_t)st.st_size; | |
536 | } else if (p->mwf.size != st.st_size) | |
537 | goto cleanup; | |
538 | ||
539 | #if 0 | |
540 | /* We leave these file descriptors open with sliding mmap; | |
541 | * there is no point keeping them open across exec(), though. | |
542 | */ | |
543 | fd_flag = fcntl(p->mwf.fd, F_GETFD, 0); | |
544 | if (fd_flag < 0) | |
545 | return error("cannot determine file descriptor flags"); | |
546 | ||
547 | fd_flag |= FD_CLOEXEC; | |
548 | if (fcntl(p->pack_fd, F_SETFD, fd_flag) == -1) | |
549 | return GIT_EOSERR; | |
550 | #endif | |
551 | ||
552 | /* Verify we recognize this pack file format. */ | |
553 | if (p_read(p->mwf.fd, &hdr, sizeof(hdr)) < GIT_SUCCESS) | |
554 | goto cleanup; | |
555 | ||
556 | if (hdr.hdr_signature != htonl(PACK_SIGNATURE)) | |
557 | goto cleanup; | |
558 | ||
559 | if (!pack_version_ok(hdr.hdr_version)) | |
560 | goto cleanup; | |
561 | ||
562 | /* Verify the pack matches its index. */ | |
563 | if (p->num_objects != ntohl(hdr.hdr_entries)) | |
564 | goto cleanup; | |
565 | ||
566 | if (p_lseek(p->mwf.fd, p->mwf.size - GIT_OID_RAWSZ, SEEK_SET) == -1) | |
567 | goto cleanup; | |
568 | ||
569 | if (p_read(p->mwf.fd, sha1.id, GIT_OID_RAWSZ) < GIT_SUCCESS) | |
570 | goto cleanup; | |
571 | ||
572 | idx_sha1 = ((unsigned char *)p->index_map.data) + p->index_map.len - 40; | |
573 | ||
574 | if (git_oid_cmp(&sha1, (git_oid *)idx_sha1) != 0) | |
575 | goto cleanup; | |
576 | ||
577 | return GIT_SUCCESS; | |
578 | ||
579 | cleanup: | |
580 | p_close(p->mwf.fd); | |
581 | p->mwf.fd = -1; | |
582 | return git__throw(GIT_EPACKCORRUPTED, "Failed to open packfile. Pack is corrupted"); | |
583 | } | |
584 | ||
585 | int git_packfile_check(struct git_pack_file **pack_out, const char *path) | |
586 | { | |
587 | struct stat st; | |
588 | struct git_pack_file *p; | |
589 | size_t path_len; | |
590 | ||
591 | *pack_out = NULL; | |
592 | path_len = strlen(path); | |
593 | p = packfile_alloc(path_len + 2); | |
594 | ||
595 | /* | |
596 | * Make sure a corresponding .pack file exists and that | |
597 | * the index looks sane. | |
598 | */ | |
932669b8 | 599 | path_len -= strlen(".idx"); |
a070f152 CMN |
600 | if (path_len < 1) { |
601 | free(p); | |
602 | return git__throw(GIT_ENOTFOUND, "Failed to check packfile. Wrong path name"); | |
603 | } | |
604 | ||
605 | memcpy(p->pack_name, path, path_len); | |
606 | ||
607 | strcpy(p->pack_name + path_len, ".keep"); | |
608 | if (git_futils_exists(p->pack_name) == GIT_SUCCESS) | |
609 | p->pack_keep = 1; | |
610 | ||
611 | strcpy(p->pack_name + path_len, ".pack"); | |
612 | if (p_stat(p->pack_name, &st) < GIT_SUCCESS || !S_ISREG(st.st_mode)) { | |
613 | free(p); | |
614 | return git__throw(GIT_ENOTFOUND, "Failed to check packfile. File not found"); | |
615 | } | |
616 | ||
617 | /* ok, it looks sane as far as we can check without | |
618 | * actually mapping the pack file. | |
619 | */ | |
620 | p->mwf.size = (off_t)st.st_size; | |
621 | p->pack_local = 1; | |
622 | p->mtime = (git_time_t)st.st_mtime; | |
623 | ||
624 | /* see if we can parse the sha1 oid in the packfile name */ | |
625 | if (path_len < 40 || | |
626 | git_oid_fromstr(&p->sha1, path + path_len - GIT_OID_HEXSZ) < GIT_SUCCESS) | |
627 | memset(&p->sha1, 0x0, GIT_OID_RAWSZ); | |
628 | ||
629 | *pack_out = p; | |
630 | return GIT_SUCCESS; | |
631 | } | |
632 | ||
633 | /*********************************************************** | |
634 | * | |
635 | * PACKFILE ENTRY SEARCH INTERNALS | |
636 | * | |
637 | ***********************************************************/ | |
638 | ||
639 | static off_t nth_packed_object_offset(const struct git_pack_file *p, uint32_t n) | |
640 | { | |
641 | const unsigned char *index = p->index_map.data; | |
642 | index += 4 * 256; | |
643 | if (p->index_version == 1) { | |
644 | return ntohl(*((uint32_t *)(index + 24 * n))); | |
645 | } else { | |
646 | uint32_t off; | |
647 | index += 8 + p->num_objects * (20 + 4); | |
648 | off = ntohl(*((uint32_t *)(index + 4 * n))); | |
649 | if (!(off & 0x80000000)) | |
650 | return off; | |
651 | index += p->num_objects * 4 + (off & 0x7fffffff) * 8; | |
652 | return (((uint64_t)ntohl(*((uint32_t *)(index + 0)))) << 32) | | |
87d9869f | 653 | ntohl(*((uint32_t *)(index + 4))); |
a070f152 CMN |
654 | } |
655 | } | |
656 | ||
657 | static int pack_entry_find_offset( | |
658 | off_t *offset_out, | |
659 | git_oid *found_oid, | |
660 | struct git_pack_file *p, | |
661 | const git_oid *short_oid, | |
662 | unsigned int len) | |
663 | { | |
664 | const uint32_t *level1_ofs = p->index_map.data; | |
665 | const unsigned char *index = p->index_map.data; | |
666 | unsigned hi, lo, stride; | |
667 | int pos, found = 0; | |
668 | const unsigned char *current = 0; | |
669 | ||
670 | *offset_out = 0; | |
671 | ||
672 | if (index == NULL) { | |
673 | int error; | |
674 | ||
675 | if ((error = pack_index_open(p)) < GIT_SUCCESS) | |
676 | return git__rethrow(error, "Failed to find offset for pack entry"); | |
677 | ||
678 | assert(p->index_map.data); | |
679 | ||
680 | index = p->index_map.data; | |
681 | level1_ofs = p->index_map.data; | |
682 | } | |
683 | ||
684 | if (p->index_version > 1) { | |
685 | level1_ofs += 2; | |
686 | index += 8; | |
687 | } | |
688 | ||
689 | index += 4 * 256; | |
690 | hi = ntohl(level1_ofs[(int)short_oid->id[0]]); | |
691 | lo = ((short_oid->id[0] == 0x0) ? 0 : ntohl(level1_ofs[(int)short_oid->id[0] - 1])); | |
692 | ||
693 | if (p->index_version > 1) { | |
694 | stride = 20; | |
695 | } else { | |
696 | stride = 24; | |
697 | index += 4; | |
698 | } | |
699 | ||
700 | #ifdef INDEX_DEBUG_LOOKUP | |
701 | printf("%02x%02x%02x... lo %u hi %u nr %d\n", | |
702 | short_oid->id[0], short_oid->id[1], short_oid->id[2], lo, hi, p->num_objects); | |
703 | #endif | |
704 | ||
705 | /* Use git.git lookup code */ | |
87d9869f | 706 | pos = sha1_entry_pos(index, stride, 0, lo, hi, p->num_objects, short_oid->id); |
a070f152 CMN |
707 | |
708 | if (pos >= 0) { | |
709 | /* An object matching exactly the oid was found */ | |
710 | found = 1; | |
711 | current = index + pos * stride; | |
712 | } else { | |
713 | /* No object was found */ | |
714 | /* pos refers to the object with the "closest" oid to short_oid */ | |
715 | pos = - 1 - pos; | |
716 | if (pos < (int)p->num_objects) { | |
717 | current = index + pos * stride; | |
718 | ||
719 | if (!git_oid_ncmp(short_oid, (const git_oid *)current, len)) { | |
720 | found = 1; | |
721 | } | |
722 | } | |
723 | } | |
724 | ||
725 | if (found && pos + 1 < (int)p->num_objects) { | |
726 | /* Check for ambiguousity */ | |
727 | const unsigned char *next = current + stride; | |
728 | ||
729 | if (!git_oid_ncmp(short_oid, (const git_oid *)next, len)) { | |
730 | found = 2; | |
731 | } | |
732 | } | |
733 | ||
734 | if (!found) { | |
735 | return git__throw(GIT_ENOTFOUND, "Failed to find offset for pack entry. Entry not found"); | |
736 | } else if (found > 1) { | |
737 | return git__throw(GIT_EAMBIGUOUSOIDPREFIX, "Failed to find offset for pack entry. Ambiguous sha1 prefix within pack"); | |
738 | } else { | |
739 | *offset_out = nth_packed_object_offset(p, pos); | |
740 | git_oid_fromraw(found_oid, current); | |
741 | ||
742 | #ifdef INDEX_DEBUG_LOOKUP | |
743 | unsigned char hex_sha1[GIT_OID_HEXSZ + 1]; | |
744 | git_oid_fmt(hex_sha1, found_oid); | |
745 | hex_sha1[GIT_OID_HEXSZ] = '\0'; | |
746 | printf("found lo=%d %s\n", lo, hex_sha1); | |
747 | #endif | |
748 | return GIT_SUCCESS; | |
749 | } | |
750 | } | |
751 | ||
752 | int git_pack_entry_find( | |
753 | struct git_pack_entry *e, | |
754 | struct git_pack_file *p, | |
755 | const git_oid *short_oid, | |
756 | unsigned int len) | |
757 | { | |
758 | off_t offset; | |
759 | git_oid found_oid; | |
760 | int error; | |
761 | ||
762 | assert(p); | |
763 | ||
764 | if (len == GIT_OID_HEXSZ && p->num_bad_objects) { | |
765 | unsigned i; | |
766 | for (i = 0; i < p->num_bad_objects; i++) | |
767 | if (git_oid_cmp(short_oid, &p->bad_object_sha1[i]) == 0) | |
768 | return git__throw(GIT_ERROR, "Failed to find pack entry. Bad object found"); | |
769 | } | |
770 | ||
771 | error = pack_entry_find_offset(&offset, &found_oid, p, short_oid, len); | |
772 | if (error < GIT_SUCCESS) | |
773 | return git__rethrow(error, "Failed to find pack entry. Couldn't find offset"); | |
774 | ||
775 | /* we found a unique entry in the index; | |
776 | * make sure the packfile backing the index | |
777 | * still exists on disk */ | |
778 | if (p->mwf.fd == -1 && packfile_open(p) < GIT_SUCCESS) | |
779 | return git__throw(GIT_EOSERR, "Failed to find pack entry. Packfile doesn't exist on disk"); | |
780 | ||
781 | e->offset = offset; | |
782 | e->p = p; | |
783 | ||
784 | git_oid_cpy(&e->sha1, &found_oid); | |
785 | return GIT_SUCCESS; | |
786 | } |