]> git.proxmox.com Git - libgit2.git/blob - src/midx.c
21cfff497cfab7e670f48d85ab5dc638a9503278
[libgit2.git] / src / midx.c
1 /*
2 * Copyright (C) the libgit2 contributors. All rights reserved.
3 *
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
6 */
7
8 #include "midx.h"
9
10 #include "buffer.h"
11 #include "futils.h"
12 #include "hash.h"
13 #include "odb.h"
14 #include "pack.h"
15
16 #define GIT_MIDX_FILE_MODE 0444
17
18 #define MIDX_SIGNATURE 0x4d494458 /* "MIDX" */
19 #define MIDX_VERSION 1
20 #define MIDX_OBJECT_ID_VERSION 1
21 struct git_midx_header {
22 uint32_t signature;
23 uint8_t version;
24 uint8_t object_id_version;
25 uint8_t chunks;
26 uint8_t base_midx_files;
27 uint32_t packfiles;
28 };
29
30 #define MIDX_PACKFILE_NAMES_ID 0x504e414d /* "PNAM" */
31 #define MIDX_OID_FANOUT_ID 0x4f494446 /* "OIDF" */
32 #define MIDX_OID_LOOKUP_ID 0x4f49444c /* "OIDL" */
33 #define MIDX_OBJECT_OFFSETS_ID 0x4f4f4646 /* "OOFF" */
34 #define MIDX_OBJECT_LARGE_OFFSETS_ID 0x4c4f4646 /* "LOFF" */
35
36 struct git_midx_chunk {
37 off64_t offset;
38 size_t length;
39 };
40
41 static int midx_error(const char *message)
42 {
43 git_error_set(GIT_ERROR_ODB, "invalid multi-pack-index file - %s", message);
44 return -1;
45 }
46
47 static int midx_parse_packfile_names(
48 git_midx_file *idx,
49 const unsigned char *data,
50 uint32_t packfiles,
51 struct git_midx_chunk *chunk)
52 {
53 int error;
54 uint32_t i;
55 char *packfile_name = (char *)(data + chunk->offset);
56 size_t chunk_size = chunk->length, len;
57 if (chunk->offset == 0)
58 return midx_error("missing Packfile Names chunk");
59 if (chunk->length == 0)
60 return midx_error("empty Packfile Names chunk");
61 if ((error = git_vector_init(&idx->packfile_names, packfiles, git__strcmp_cb)) < 0)
62 return error;
63 for (i = 0; i < packfiles; ++i) {
64 len = p_strnlen(packfile_name, chunk_size);
65 if (len == 0)
66 return midx_error("empty packfile name");
67 if (len + 1 > chunk_size)
68 return midx_error("unterminated packfile name");
69 git_vector_insert(&idx->packfile_names, packfile_name);
70 if (i && strcmp(git_vector_get(&idx->packfile_names, i - 1), packfile_name) >= 0)
71 return midx_error("packfile names are not sorted");
72 if (strlen(packfile_name) <= strlen(".idx") || git__suffixcmp(packfile_name, ".idx") != 0)
73 return midx_error("non-.idx packfile name");
74 if (strchr(packfile_name, '/') != NULL || strchr(packfile_name, '\\') != NULL)
75 return midx_error("non-local packfile");
76 packfile_name += len + 1;
77 chunk_size -= len + 1;
78 }
79 return 0;
80 }
81
82 static int midx_parse_oid_fanout(
83 git_midx_file *idx,
84 const unsigned char *data,
85 struct git_midx_chunk *chunk_oid_fanout)
86 {
87 uint32_t i, nr;
88 if (chunk_oid_fanout->offset == 0)
89 return midx_error("missing OID Fanout chunk");
90 if (chunk_oid_fanout->length == 0)
91 return midx_error("empty OID Fanout chunk");
92 if (chunk_oid_fanout->length != 256 * 4)
93 return midx_error("OID Fanout chunk has wrong length");
94
95 idx->oid_fanout = (const uint32_t *)(data + chunk_oid_fanout->offset);
96 nr = 0;
97 for (i = 0; i < 256; ++i) {
98 uint32_t n = ntohl(idx->oid_fanout[i]);
99 if (n < nr)
100 return midx_error("index is non-monotonic");
101 nr = n;
102 }
103 idx->num_objects = nr;
104 return 0;
105 }
106
107 static int midx_parse_oid_lookup(
108 git_midx_file *idx,
109 const unsigned char *data,
110 struct git_midx_chunk *chunk_oid_lookup)
111 {
112 uint32_t i;
113 git_oid *oid, *prev_oid, zero_oid = {{0}};
114
115 if (chunk_oid_lookup->offset == 0)
116 return midx_error("missing OID Lookup chunk");
117 if (chunk_oid_lookup->length == 0)
118 return midx_error("empty OID Lookup chunk");
119 if (chunk_oid_lookup->length != idx->num_objects * 20)
120 return midx_error("OID Lookup chunk has wrong length");
121
122 idx->oid_lookup = oid = (git_oid *)(data + chunk_oid_lookup->offset);
123 prev_oid = &zero_oid;
124 for (i = 0; i < idx->num_objects; ++i, ++oid) {
125 if (git_oid_cmp(prev_oid, oid) >= 0)
126 return midx_error("OID Lookup index is non-monotonic");
127 prev_oid = oid;
128 }
129
130 return 0;
131 }
132
133 static int midx_parse_object_offsets(
134 git_midx_file *idx,
135 const unsigned char *data,
136 struct git_midx_chunk *chunk_object_offsets)
137 {
138 if (chunk_object_offsets->offset == 0)
139 return midx_error("missing Object Offsets chunk");
140 if (chunk_object_offsets->length == 0)
141 return midx_error("empty Object Offsets chunk");
142 if (chunk_object_offsets->length != idx->num_objects * 8)
143 return midx_error("Object Offsets chunk has wrong length");
144
145 idx->object_offsets = data + chunk_object_offsets->offset;
146
147 return 0;
148 }
149
150 static int midx_parse_object_large_offsets(
151 git_midx_file *idx,
152 const unsigned char *data,
153 struct git_midx_chunk *chunk_object_large_offsets)
154 {
155 if (chunk_object_large_offsets->length == 0)
156 return 0;
157 if (chunk_object_large_offsets->length % 8 != 0)
158 return midx_error("malformed Object Large Offsets chunk");
159
160 idx->object_large_offsets = data + chunk_object_large_offsets->offset;
161 idx->num_object_large_offsets = chunk_object_large_offsets->length / 8;
162
163 return 0;
164 }
165
166 int git_midx_parse(
167 git_midx_file *idx,
168 const unsigned char *data,
169 size_t size)
170 {
171 struct git_midx_header *hdr;
172 const unsigned char *chunk_hdr;
173 struct git_midx_chunk *last_chunk;
174 uint32_t i;
175 off64_t last_chunk_offset, chunk_offset, trailer_offset;
176 git_oid idx_checksum = {{0}};
177 int error;
178 struct git_midx_chunk chunk_packfile_names = {0},
179 chunk_oid_fanout = {0},
180 chunk_oid_lookup = {0},
181 chunk_object_offsets = {0},
182 chunk_object_large_offsets = {0};
183
184 assert(idx);
185
186 if (size < sizeof(struct git_midx_header) + 20)
187 return midx_error("multi-pack index is too short");
188
189 hdr = ((struct git_midx_header *)data);
190
191 if (hdr->signature != htonl(MIDX_SIGNATURE) ||
192 hdr->version != MIDX_VERSION ||
193 hdr->object_id_version != MIDX_OBJECT_ID_VERSION) {
194 return midx_error("unsupported multi-pack index version");
195 }
196 if (hdr->chunks == 0)
197 return midx_error("no chunks in multi-pack index");
198
199 /*
200 * The very first chunk's offset should be after the header, all the chunk
201 * headers, and a special zero chunk.
202 */
203 last_chunk_offset =
204 sizeof(struct git_midx_header) +
205 (1 + hdr->chunks) * 12;
206 trailer_offset = size - 20;
207 if (trailer_offset < last_chunk_offset)
208 return midx_error("wrong index size");
209 git_oid_cpy(&idx->checksum, (git_oid *)(data + trailer_offset));
210
211 if (git_hash_buf(&idx_checksum, data, (size_t)trailer_offset) < 0)
212 return midx_error("could not calculate signature");
213 if (!git_oid_equal(&idx_checksum, &idx->checksum))
214 return midx_error("index signature mismatch");
215
216 chunk_hdr = data + sizeof(struct git_midx_header);
217 last_chunk = NULL;
218 for (i = 0; i < hdr->chunks; ++i, chunk_hdr += 12) {
219 chunk_offset = ((off64_t)ntohl(*((uint32_t *)(chunk_hdr + 4)))) << 32 |
220 ((off64_t)ntohl(*((uint32_t *)(chunk_hdr + 8))));
221 if (chunk_offset < last_chunk_offset)
222 return midx_error("chunks are non-monotonic");
223 if (chunk_offset >= trailer_offset)
224 return midx_error("chunks extend beyond the trailer");
225 if (last_chunk != NULL)
226 last_chunk->length = (size_t)(chunk_offset - last_chunk_offset);
227 last_chunk_offset = chunk_offset;
228
229 switch (ntohl(*((uint32_t *)(chunk_hdr + 0)))) {
230 case MIDX_PACKFILE_NAMES_ID:
231 chunk_packfile_names.offset = last_chunk_offset;
232 last_chunk = &chunk_packfile_names;
233 break;
234
235 case MIDX_OID_FANOUT_ID:
236 chunk_oid_fanout.offset = last_chunk_offset;
237 last_chunk = &chunk_oid_fanout;
238 break;
239
240 case MIDX_OID_LOOKUP_ID:
241 chunk_oid_lookup.offset = last_chunk_offset;
242 last_chunk = &chunk_oid_lookup;
243 break;
244
245 case MIDX_OBJECT_OFFSETS_ID:
246 chunk_object_offsets.offset = last_chunk_offset;
247 last_chunk = &chunk_object_offsets;
248 break;
249
250 case MIDX_OBJECT_LARGE_OFFSETS_ID:
251 chunk_object_large_offsets.offset = last_chunk_offset;
252 last_chunk = &chunk_object_large_offsets;
253 break;
254
255 default:
256 return midx_error("unrecognized chunk ID");
257 }
258 }
259 last_chunk->length = (size_t)(trailer_offset - last_chunk_offset);
260
261 error = midx_parse_packfile_names(
262 idx, data, ntohl(hdr->packfiles), &chunk_packfile_names);
263 if (error < 0)
264 return error;
265 error = midx_parse_oid_fanout(idx, data, &chunk_oid_fanout);
266 if (error < 0)
267 return error;
268 error = midx_parse_oid_lookup(idx, data, &chunk_oid_lookup);
269 if (error < 0)
270 return error;
271 error = midx_parse_object_offsets(idx, data, &chunk_object_offsets);
272 if (error < 0)
273 return error;
274 error = midx_parse_object_large_offsets(idx, data, &chunk_object_large_offsets);
275 if (error < 0)
276 return error;
277
278 return 0;
279 }
280
281 int git_midx_open(
282 git_midx_file **idx_out,
283 const char *path)
284 {
285 git_midx_file *idx;
286 git_file fd = -1;
287 size_t idx_size;
288 struct stat st;
289 int error;
290
291 /* TODO: properly open the file without access time using O_NOATIME */
292 fd = git_futils_open_ro(path);
293 if (fd < 0)
294 return fd;
295
296 if (p_fstat(fd, &st) < 0) {
297 p_close(fd);
298 git_error_set(GIT_ERROR_ODB, "multi-pack-index file not found - '%s'", path);
299 return -1;
300 }
301
302 if (!S_ISREG(st.st_mode) || !git__is_sizet(st.st_size)) {
303 p_close(fd);
304 git_error_set(GIT_ERROR_ODB, "invalid pack index '%s'", path);
305 return -1;
306 }
307 idx_size = (size_t)st.st_size;
308
309 idx = git__calloc(1, sizeof(git_midx_file));
310 GIT_ERROR_CHECK_ALLOC(idx);
311
312 error = git_futils_mmap_ro(&idx->index_map, fd, 0, idx_size);
313 p_close(fd);
314 if (error < 0) {
315 git_midx_free(idx);
316 return error;
317 }
318
319 if ((error = git_midx_parse(idx, idx->index_map.data, idx_size)) < 0) {
320 git_midx_free(idx);
321 return error;
322 }
323
324 *idx_out = idx;
325 return 0;
326 }
327
328 int git_midx_entry_find(
329 git_midx_entry *e,
330 git_midx_file *idx,
331 const git_oid *short_oid,
332 size_t len)
333 {
334 int pos, found = 0;
335 size_t pack_index;
336 uint32_t hi, lo;
337 const git_oid *current = NULL;
338 const unsigned char *object_offset;
339 off64_t offset;
340
341 assert(idx);
342
343 hi = ntohl(idx->oid_fanout[(int)short_oid->id[0]]);
344 lo = ((short_oid->id[0] == 0x0) ? 0 : ntohl(idx->oid_fanout[(int)short_oid->id[0] - 1]));
345
346 pos = git_pack__lookup_sha1(idx->oid_lookup, 20, lo, hi, short_oid->id);
347
348 if (pos >= 0) {
349 /* An object matching exactly the oid was found */
350 found = 1;
351 current = idx->oid_lookup + pos;
352 } else {
353 /* No object was found */
354 /* pos refers to the object with the "closest" oid to short_oid */
355 pos = -1 - pos;
356 if (pos < (int)idx->num_objects) {
357 current = idx->oid_lookup + pos;
358
359 if (!git_oid_ncmp(short_oid, current, len))
360 found = 1;
361 }
362 }
363
364 if (found && len != GIT_OID_HEXSZ && pos + 1 < (int)idx->num_objects) {
365 /* Check for ambiguousity */
366 const git_oid *next = current + 1;
367
368 if (!git_oid_ncmp(short_oid, next, len)) {
369 found = 2;
370 }
371 }
372
373 if (!found)
374 return git_odb__error_notfound("failed to find offset for multi-pack index entry", short_oid, len);
375 if (found > 1)
376 return git_odb__error_ambiguous("found multiple offsets for multi-pack index entry");
377
378 object_offset = idx->object_offsets + pos * 8;
379 offset = ntohl(*((uint32_t *)(object_offset + 4)));
380 if (offset & 0x80000000) {
381 uint32_t object_large_offsets_pos = offset & 0x7fffffff;
382 const unsigned char *object_large_offsets_index = idx->object_large_offsets;
383
384 /* Make sure we're not being sent out of bounds */
385 if (object_large_offsets_pos >= idx->num_object_large_offsets)
386 return git_odb__error_notfound("invalid index into the object large offsets table", short_oid, len);
387
388 object_large_offsets_index += 8 * object_large_offsets_pos;
389
390 offset = (((uint64_t)ntohl(*((uint32_t *)(object_large_offsets_index + 0)))) << 32) |
391 ntohl(*((uint32_t *)(object_large_offsets_index + 4)));
392 }
393 pack_index = ntohl(*((uint32_t *)(object_offset + 0)));
394 if (pack_index >= git_vector_length(&idx->packfile_names))
395 return midx_error("invalid index into the packfile names table");
396 e->pack_index = pack_index;
397 e->offset = offset;
398 git_oid_cpy(&e->sha1, current);
399 return 0;
400 }
401
402 void git_midx_close(git_midx_file *idx)
403 {
404 assert(idx);
405
406 if (idx->index_map.data)
407 git_futils_mmap_free(&idx->index_map);
408 git_vector_free(&idx->packfile_names);
409 }
410
411 void git_midx_free(git_midx_file *idx)
412 {
413 if (!idx)
414 return;
415
416 git_midx_close(idx);
417 git__free(idx);
418 }