2 * Copyright (C) the libgit2 contributors. All rights reserved.
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
16 #define GIT_MIDX_FILE_MODE 0444
18 #define MIDX_SIGNATURE 0x4d494458 /* "MIDX" */
19 #define MIDX_VERSION 1
20 #define MIDX_OBJECT_ID_VERSION 1
21 struct git_midx_header
{
24 uint8_t object_id_version
;
26 uint8_t base_midx_files
;
30 #define MIDX_PACKFILE_NAMES_ID 0x504e414d /* "PNAM" */
31 #define MIDX_OID_FANOUT_ID 0x4f494446 /* "OIDF" */
32 #define MIDX_OID_LOOKUP_ID 0x4f49444c /* "OIDL" */
33 #define MIDX_OBJECT_OFFSETS_ID 0x4f4f4646 /* "OOFF" */
34 #define MIDX_OBJECT_LARGE_OFFSETS_ID 0x4c4f4646 /* "LOFF" */
36 struct git_midx_chunk
{
41 static int midx_error(const char *message
)
43 git_error_set(GIT_ERROR_ODB
, "invalid multi-pack-index file - %s", message
);
47 static int midx_parse_packfile_names(
49 const unsigned char *data
,
51 struct git_midx_chunk
*chunk
)
55 char *packfile_name
= (char *)(data
+ chunk
->offset
);
56 size_t chunk_size
= chunk
->length
, len
;
57 if (chunk
->offset
== 0)
58 return midx_error("missing Packfile Names chunk");
59 if (chunk
->length
== 0)
60 return midx_error("empty Packfile Names chunk");
61 if ((error
= git_vector_init(&idx
->packfile_names
, packfiles
, git__strcmp_cb
)) < 0)
63 for (i
= 0; i
< packfiles
; ++i
) {
64 len
= p_strnlen(packfile_name
, chunk_size
);
66 return midx_error("empty packfile name");
67 if (len
+ 1 > chunk_size
)
68 return midx_error("unterminated packfile name");
69 git_vector_insert(&idx
->packfile_names
, packfile_name
);
70 if (i
&& strcmp(git_vector_get(&idx
->packfile_names
, i
- 1), packfile_name
) >= 0)
71 return midx_error("packfile names are not sorted");
72 if (strlen(packfile_name
) <= strlen(".idx") || git__suffixcmp(packfile_name
, ".idx") != 0)
73 return midx_error("non-.idx packfile name");
74 if (strchr(packfile_name
, '/') != NULL
|| strchr(packfile_name
, '\\') != NULL
)
75 return midx_error("non-local packfile");
76 packfile_name
+= len
+ 1;
77 chunk_size
-= len
+ 1;
82 static int midx_parse_oid_fanout(
84 const unsigned char *data
,
85 struct git_midx_chunk
*chunk_oid_fanout
)
88 if (chunk_oid_fanout
->offset
== 0)
89 return midx_error("missing OID Fanout chunk");
90 if (chunk_oid_fanout
->length
== 0)
91 return midx_error("empty OID Fanout chunk");
92 if (chunk_oid_fanout
->length
!= 256 * 4)
93 return midx_error("OID Fanout chunk has wrong length");
95 idx
->oid_fanout
= (const uint32_t *)(data
+ chunk_oid_fanout
->offset
);
97 for (i
= 0; i
< 256; ++i
) {
98 uint32_t n
= ntohl(idx
->oid_fanout
[i
]);
100 return midx_error("index is non-monotonic");
103 idx
->num_objects
= nr
;
107 static int midx_parse_oid_lookup(
109 const unsigned char *data
,
110 struct git_midx_chunk
*chunk_oid_lookup
)
113 git_oid
*oid
, *prev_oid
, zero_oid
= {{0}};
115 if (chunk_oid_lookup
->offset
== 0)
116 return midx_error("missing OID Lookup chunk");
117 if (chunk_oid_lookup
->length
== 0)
118 return midx_error("empty OID Lookup chunk");
119 if (chunk_oid_lookup
->length
!= idx
->num_objects
* 20)
120 return midx_error("OID Lookup chunk has wrong length");
122 idx
->oid_lookup
= oid
= (git_oid
*)(data
+ chunk_oid_lookup
->offset
);
123 prev_oid
= &zero_oid
;
124 for (i
= 0; i
< idx
->num_objects
; ++i
, ++oid
) {
125 if (git_oid_cmp(prev_oid
, oid
) >= 0)
126 return midx_error("OID Lookup index is non-monotonic");
133 static int midx_parse_object_offsets(
135 const unsigned char *data
,
136 struct git_midx_chunk
*chunk_object_offsets
)
138 if (chunk_object_offsets
->offset
== 0)
139 return midx_error("missing Object Offsets chunk");
140 if (chunk_object_offsets
->length
== 0)
141 return midx_error("empty Object Offsets chunk");
142 if (chunk_object_offsets
->length
!= idx
->num_objects
* 8)
143 return midx_error("Object Offsets chunk has wrong length");
145 idx
->object_offsets
= data
+ chunk_object_offsets
->offset
;
150 static int midx_parse_object_large_offsets(
152 const unsigned char *data
,
153 struct git_midx_chunk
*chunk_object_large_offsets
)
155 if (chunk_object_large_offsets
->length
== 0)
157 if (chunk_object_large_offsets
->length
% 8 != 0)
158 return midx_error("malformed Object Large Offsets chunk");
160 idx
->object_large_offsets
= data
+ chunk_object_large_offsets
->offset
;
161 idx
->num_object_large_offsets
= chunk_object_large_offsets
->length
/ 8;
168 const unsigned char *data
,
171 struct git_midx_header
*hdr
;
172 const unsigned char *chunk_hdr
;
173 struct git_midx_chunk
*last_chunk
;
175 off64_t last_chunk_offset
, chunk_offset
, trailer_offset
;
176 git_oid idx_checksum
= {{0}};
178 struct git_midx_chunk chunk_packfile_names
= {0},
179 chunk_oid_fanout
= {0},
180 chunk_oid_lookup
= {0},
181 chunk_object_offsets
= {0},
182 chunk_object_large_offsets
= {0};
186 if (size
< sizeof(struct git_midx_header
) + 20)
187 return midx_error("multi-pack index is too short");
189 hdr
= ((struct git_midx_header
*)data
);
191 if (hdr
->signature
!= htonl(MIDX_SIGNATURE
) ||
192 hdr
->version
!= MIDX_VERSION
||
193 hdr
->object_id_version
!= MIDX_OBJECT_ID_VERSION
) {
194 return midx_error("unsupported multi-pack index version");
196 if (hdr
->chunks
== 0)
197 return midx_error("no chunks in multi-pack index");
200 * The very first chunk's offset should be after the header, all the chunk
201 * headers, and a special zero chunk.
204 sizeof(struct git_midx_header
) +
205 (1 + hdr
->chunks
) * 12;
206 trailer_offset
= size
- 20;
207 if (trailer_offset
< last_chunk_offset
)
208 return midx_error("wrong index size");
209 git_oid_cpy(&idx
->checksum
, (git_oid
*)(data
+ trailer_offset
));
211 if (git_hash_buf(&idx_checksum
, data
, (size_t)trailer_offset
) < 0)
212 return midx_error("could not calculate signature");
213 if (!git_oid_equal(&idx_checksum
, &idx
->checksum
))
214 return midx_error("index signature mismatch");
216 chunk_hdr
= data
+ sizeof(struct git_midx_header
);
218 for (i
= 0; i
< hdr
->chunks
; ++i
, chunk_hdr
+= 12) {
219 chunk_offset
= ((off64_t
)ntohl(*((uint32_t *)(chunk_hdr
+ 4)))) << 32 |
220 ((off64_t
)ntohl(*((uint32_t *)(chunk_hdr
+ 8))));
221 if (chunk_offset
< last_chunk_offset
)
222 return midx_error("chunks are non-monotonic");
223 if (chunk_offset
>= trailer_offset
)
224 return midx_error("chunks extend beyond the trailer");
225 if (last_chunk
!= NULL
)
226 last_chunk
->length
= (size_t)(chunk_offset
- last_chunk_offset
);
227 last_chunk_offset
= chunk_offset
;
229 switch (ntohl(*((uint32_t *)(chunk_hdr
+ 0)))) {
230 case MIDX_PACKFILE_NAMES_ID
:
231 chunk_packfile_names
.offset
= last_chunk_offset
;
232 last_chunk
= &chunk_packfile_names
;
235 case MIDX_OID_FANOUT_ID
:
236 chunk_oid_fanout
.offset
= last_chunk_offset
;
237 last_chunk
= &chunk_oid_fanout
;
240 case MIDX_OID_LOOKUP_ID
:
241 chunk_oid_lookup
.offset
= last_chunk_offset
;
242 last_chunk
= &chunk_oid_lookup
;
245 case MIDX_OBJECT_OFFSETS_ID
:
246 chunk_object_offsets
.offset
= last_chunk_offset
;
247 last_chunk
= &chunk_object_offsets
;
250 case MIDX_OBJECT_LARGE_OFFSETS_ID
:
251 chunk_object_large_offsets
.offset
= last_chunk_offset
;
252 last_chunk
= &chunk_object_large_offsets
;
256 return midx_error("unrecognized chunk ID");
259 last_chunk
->length
= (size_t)(trailer_offset
- last_chunk_offset
);
261 error
= midx_parse_packfile_names(
262 idx
, data
, ntohl(hdr
->packfiles
), &chunk_packfile_names
);
265 error
= midx_parse_oid_fanout(idx
, data
, &chunk_oid_fanout
);
268 error
= midx_parse_oid_lookup(idx
, data
, &chunk_oid_lookup
);
271 error
= midx_parse_object_offsets(idx
, data
, &chunk_object_offsets
);
274 error
= midx_parse_object_large_offsets(idx
, data
, &chunk_object_large_offsets
);
282 git_midx_file
**idx_out
,
291 /* TODO: properly open the file without access time using O_NOATIME */
292 fd
= git_futils_open_ro(path
);
296 if (p_fstat(fd
, &st
) < 0) {
298 git_error_set(GIT_ERROR_ODB
, "multi-pack-index file not found - '%s'", path
);
302 if (!S_ISREG(st
.st_mode
) || !git__is_sizet(st
.st_size
)) {
304 git_error_set(GIT_ERROR_ODB
, "invalid pack index '%s'", path
);
307 idx_size
= (size_t)st
.st_size
;
309 idx
= git__calloc(1, sizeof(git_midx_file
));
310 GIT_ERROR_CHECK_ALLOC(idx
);
312 error
= git_futils_mmap_ro(&idx
->index_map
, fd
, 0, idx_size
);
319 if ((error
= git_midx_parse(idx
, idx
->index_map
.data
, idx_size
)) < 0) {
328 int git_midx_entry_find(
331 const git_oid
*short_oid
,
337 const git_oid
*current
= NULL
;
338 const unsigned char *object_offset
;
343 hi
= ntohl(idx
->oid_fanout
[(int)short_oid
->id
[0]]);
344 lo
= ((short_oid
->id
[0] == 0x0) ? 0 : ntohl(idx
->oid_fanout
[(int)short_oid
->id
[0] - 1]));
346 pos
= git_pack__lookup_sha1(idx
->oid_lookup
, 20, lo
, hi
, short_oid
->id
);
349 /* An object matching exactly the oid was found */
351 current
= idx
->oid_lookup
+ pos
;
353 /* No object was found */
354 /* pos refers to the object with the "closest" oid to short_oid */
356 if (pos
< (int)idx
->num_objects
) {
357 current
= idx
->oid_lookup
+ pos
;
359 if (!git_oid_ncmp(short_oid
, current
, len
))
364 if (found
&& len
!= GIT_OID_HEXSZ
&& pos
+ 1 < (int)idx
->num_objects
) {
365 /* Check for ambiguousity */
366 const git_oid
*next
= current
+ 1;
368 if (!git_oid_ncmp(short_oid
, next
, len
)) {
374 return git_odb__error_notfound("failed to find offset for multi-pack index entry", short_oid
, len
);
376 return git_odb__error_ambiguous("found multiple offsets for multi-pack index entry");
378 object_offset
= idx
->object_offsets
+ pos
* 8;
379 offset
= ntohl(*((uint32_t *)(object_offset
+ 4)));
380 if (offset
& 0x80000000) {
381 uint32_t object_large_offsets_pos
= offset
& 0x7fffffff;
382 const unsigned char *object_large_offsets_index
= idx
->object_large_offsets
;
384 /* Make sure we're not being sent out of bounds */
385 if (object_large_offsets_pos
>= idx
->num_object_large_offsets
)
386 return git_odb__error_notfound("invalid index into the object large offsets table", short_oid
, len
);
388 object_large_offsets_index
+= 8 * object_large_offsets_pos
;
390 offset
= (((uint64_t)ntohl(*((uint32_t *)(object_large_offsets_index
+ 0)))) << 32) |
391 ntohl(*((uint32_t *)(object_large_offsets_index
+ 4)));
393 pack_index
= ntohl(*((uint32_t *)(object_offset
+ 0)));
394 if (pack_index
>= git_vector_length(&idx
->packfile_names
))
395 return midx_error("invalid index into the packfile names table");
396 e
->pack_index
= pack_index
;
398 git_oid_cpy(&e
->sha1
, current
);
402 void git_midx_close(git_midx_file
*idx
)
406 if (idx
->index_map
.data
)
407 git_futils_mmap_free(&idx
->index_map
);
408 git_vector_free(&idx
->packfile_names
);
411 void git_midx_free(git_midx_file
*idx
)