]>
Commit | Line | Data |
---|---|---|
3412391d | 1 | /* |
5e0de328 | 2 | * Copyright (C) 2009-2012 the libgit2 contributors |
3412391d | 3 | * |
bb742ede VM |
4 | * This file is part of libgit2, distributed under the GNU GPL v2 with |
5 | * a Linking Exception. For full terms see the included COPYING file. | |
3412391d CMN |
6 | */ |
7 | ||
0c3bae62 VM |
8 | #include <zlib.h> |
9 | ||
f23c4a66 | 10 | #include "git2/indexer.h" |
7d0cdf82 | 11 | #include "git2/object.h" |
b7c44096 | 12 | #include "git2/oid.h" |
f23c4a66 | 13 | |
3412391d CMN |
14 | #include "common.h" |
15 | #include "pack.h" | |
f23c4a66 | 16 | #include "mwindow.h" |
3412391d | 17 | #include "posix.h" |
b7c44096 CMN |
18 | #include "pack.h" |
19 | #include "filebuf.h" | |
20 | #include "sha1.h" | |
21 | ||
22 | #define UINT31_MAX (0x7FFFFFFF) | |
3412391d | 23 | |
b5b474dd | 24 | struct entry { |
b7c44096 | 25 | git_oid oid; |
b5b474dd CMN |
26 | uint32_t crc; |
27 | uint32_t offset; | |
28 | uint64_t offset_long; | |
29 | }; | |
30 | ||
2d3e417e | 31 | struct git_indexer { |
a070f152 | 32 | struct git_pack_file *pack; |
b7c44096 CMN |
33 | size_t nr_objects; |
34 | git_vector objects; | |
35 | git_filebuf file; | |
36 | unsigned int fanout[256]; | |
37 | git_oid hash; | |
2d3e417e | 38 | }; |
f23c4a66 | 39 | |
3f93e16c CMN |
40 | struct git_indexer_stream { |
41 | unsigned int parsed_header :1, | |
42 | opened_pack; | |
43 | struct git_pack_file *pack; | |
44 | git_filebuf pack_file; | |
45 | git_filebuf index_file; | |
46 | git_off_t off; | |
47 | size_t nr_objects; | |
48 | git_vector objects; | |
49 | git_vector deltas; | |
50 | unsigned int fanout[256]; | |
51 | git_oid hash; | |
52 | }; | |
53 | ||
54 | struct delta_info { | |
453ab98d | 55 | git_off_t delta_off; |
3f93e16c CMN |
56 | }; |
57 | ||
b7c44096 CMN |
58 | const git_oid *git_indexer_hash(git_indexer *idx) |
59 | { | |
60 | return &idx->hash; | |
61 | } | |
62 | ||
1c9c081a CMN |
63 | const git_oid *git_indexer_stream_hash(git_indexer_stream *idx) |
64 | { | |
65 | return &idx->hash; | |
66 | } | |
67 | ||
3f93e16c CMN |
68 | static int open_pack(struct git_pack_file **out, const char *filename) |
69 | { | |
70 | size_t namelen; | |
71 | struct git_pack_file *pack; | |
72 | struct stat st; | |
73 | int fd; | |
74 | ||
75 | namelen = strlen(filename); | |
76 | pack = git__calloc(1, sizeof(struct git_pack_file) + namelen + 1); | |
77 | GITERR_CHECK_ALLOC(pack); | |
78 | ||
79 | memcpy(pack->pack_name, filename, namelen + 1); | |
80 | ||
81 | if (p_stat(filename, &st) < 0) { | |
82 | giterr_set(GITERR_OS, "Failed to stat packfile."); | |
83 | goto cleanup; | |
84 | } | |
85 | ||
86 | if ((fd = p_open(pack->pack_name, O_RDONLY)) < 0) { | |
87 | giterr_set(GITERR_OS, "Failed to open packfile."); | |
88 | goto cleanup; | |
89 | } | |
90 | ||
91 | pack->mwf.fd = fd; | |
92 | pack->mwf.size = (git_off_t)st.st_size; | |
93 | ||
94 | *out = pack; | |
95 | return 0; | |
96 | ||
97 | cleanup: | |
98 | git__free(pack); | |
99 | return -1; | |
100 | } | |
101 | ||
102 | static int parse_header(struct git_pack_header *hdr, struct git_pack_file *pack) | |
3412391d | 103 | { |
3412391d CMN |
104 | int error; |
105 | ||
106 | /* Verify we recognize this pack file format. */ | |
3f93e16c | 107 | if ((error = p_read(pack->mwf.fd, hdr, sizeof(*hdr))) < 0) { |
4aa7de15 RB |
108 | giterr_set(GITERR_OS, "Failed to read in pack header"); |
109 | return error; | |
110 | } | |
3412391d | 111 | |
3f93e16c | 112 | if (hdr->hdr_signature != ntohl(PACK_SIGNATURE)) { |
73d87a09 | 113 | giterr_set(GITERR_INDEXER, "Wrong pack signature"); |
4aa7de15 RB |
114 | return -1; |
115 | } | |
3412391d | 116 | |
3f93e16c | 117 | if (!pack_version_ok(hdr->hdr_version)) { |
73d87a09 | 118 | giterr_set(GITERR_INDEXER, "Wrong pack version"); |
4aa7de15 RB |
119 | return -1; |
120 | } | |
f23c4a66 | 121 | |
4aa7de15 | 122 | return 0; |
3412391d CMN |
123 | } |
124 | ||
c1af5a39 | 125 | static int objects_cmp(const void *a, const void *b) |
b7c44096 CMN |
126 | { |
127 | const struct entry *entrya = a; | |
128 | const struct entry *entryb = b; | |
129 | ||
130 | return git_oid_cmp(&entrya->oid, &entryb->oid); | |
131 | } | |
132 | ||
c1af5a39 CMN |
133 | static int cache_cmp(const void *a, const void *b) |
134 | { | |
135 | const struct git_pack_entry *ea = a; | |
136 | const struct git_pack_entry *eb = b; | |
137 | ||
138 | return git_oid_cmp(&ea->sha1, &eb->sha1); | |
139 | } | |
140 | ||
3f93e16c CMN |
141 | int git_indexer_stream_new(git_indexer_stream **out, const char *prefix) |
142 | { | |
143 | git_indexer_stream *idx; | |
144 | git_buf path = GIT_BUF_INIT; | |
37159957 | 145 | static const char suff[] = "/pack"; |
3f93e16c | 146 | int error; |
c1af5a39 | 147 | |
3f93e16c CMN |
148 | idx = git__calloc(1, sizeof(git_indexer_stream)); |
149 | GITERR_CHECK_ALLOC(idx); | |
150 | ||
151 | error = git_buf_joinpath(&path, prefix, suff); | |
152 | if (error < 0) | |
153 | goto cleanup; | |
154 | ||
155 | error = git_filebuf_open(&idx->pack_file, path.ptr, | |
156 | GIT_FILEBUF_TEMPORARY | GIT_FILEBUF_DO_NOT_BUFFER); | |
157 | git_buf_free(&path); | |
158 | if (error < 0) | |
159 | goto cleanup; | |
160 | ||
161 | *out = idx; | |
162 | return 0; | |
163 | ||
164 | cleanup: | |
165 | git_buf_free(&path); | |
166 | git_filebuf_cleanup(&idx->pack_file); | |
167 | git__free(idx); | |
168 | return -1; | |
169 | } | |
170 | ||
171 | /* Try to store the delta so we can try to resolve it later */ | |
172 | static int store_delta(git_indexer_stream *idx) | |
3412391d | 173 | { |
3f93e16c CMN |
174 | git_otype type; |
175 | git_mwindow *w = NULL; | |
176 | git_mwindow_file *mwf = &idx->pack->mwf; | |
177 | git_off_t entry_start = idx->off; | |
178 | struct delta_info *delta; | |
179 | size_t entry_size; | |
453ab98d | 180 | git_rawobj obj; |
3f93e16c | 181 | int error; |
3412391d | 182 | |
3f93e16c CMN |
183 | /* |
184 | * ref-delta objects can refer to object that we haven't | |
185 | * found yet, so give it another opportunity | |
186 | */ | |
187 | if (git_packfile_unpack_header(&entry_size, &type, mwf, &w, &idx->off) < 0) | |
188 | return -1; | |
ade3c9bb | 189 | |
3f93e16c CMN |
190 | git_mwindow_close(&w); |
191 | ||
192 | /* If it's not a delta, mark it as failure, we can't do anything with it */ | |
193 | if (type != GIT_OBJ_REF_DELTA && type != GIT_OBJ_OFS_DELTA) | |
4aa7de15 | 194 | return -1; |
3f93e16c | 195 | |
3f93e16c | 196 | if (type == GIT_OBJ_REF_DELTA) { |
3f93e16c CMN |
197 | idx->off += GIT_OID_RAWSZ; |
198 | } else { | |
453ab98d CMN |
199 | git_off_t base_off; |
200 | ||
201 | base_off = get_delta_base(idx->pack, &w, &idx->off, type, entry_start); | |
3f93e16c | 202 | git_mwindow_close(&w); |
453ab98d CMN |
203 | if (base_off < 0) |
204 | return (int)base_off; | |
4aa7de15 | 205 | } |
3412391d | 206 | |
453ab98d | 207 | error = packfile_unpack_compressed(&obj, idx->pack, &w, &idx->off, entry_size, type); |
904b67e6 | 208 | if (error == GIT_EBUFS) { |
3f93e16c | 209 | idx->off = entry_start; |
904b67e6 | 210 | return GIT_EBUFS; |
3f93e16c | 211 | } else if (error < 0){ |
453ab98d | 212 | return -1; |
3f93e16c CMN |
213 | } |
214 | ||
453ab98d CMN |
215 | delta = git__calloc(1, sizeof(struct delta_info)); |
216 | GITERR_CHECK_ALLOC(delta); | |
217 | delta->delta_off = entry_start; | |
218 | ||
219 | git__free(obj.data); | |
220 | ||
3f93e16c | 221 | if (git_vector_insert(&idx->deltas, delta) < 0) |
453ab98d CMN |
222 | return -1; |
223 | ||
224 | return 0; | |
225 | } | |
226 | ||
227 | static int hash_and_save(git_indexer_stream *idx, git_rawobj *obj, git_off_t entry_start) | |
228 | { | |
229 | int i; | |
230 | git_oid oid; | |
231 | void *packed; | |
232 | size_t entry_size; | |
233 | unsigned int left; | |
234 | struct entry *entry; | |
235 | git_mwindow *w = NULL; | |
236 | git_mwindow_file *mwf = &idx->pack->mwf; | |
237 | struct git_pack_entry *pentry; | |
238 | ||
239 | entry = git__calloc(1, sizeof(*entry)); | |
240 | GITERR_CHECK_ALLOC(entry); | |
241 | ||
242 | if (entry_start > UINT31_MAX) { | |
243 | entry->offset = UINT32_MAX; | |
244 | entry->offset_long = entry_start; | |
245 | } else { | |
246 | entry->offset = (uint32_t)entry_start; | |
247 | } | |
248 | ||
249 | /* FIXME: Parse the object instead of hashing it */ | |
250 | if (git_odb__hashobj(&oid, obj) < 0) { | |
73d87a09 | 251 | giterr_set(GITERR_INDEXER, "Failed to hash object"); |
453ab98d CMN |
252 | return -1; |
253 | } | |
254 | ||
255 | pentry = git__malloc(sizeof(struct git_pack_entry)); | |
256 | GITERR_CHECK_ALLOC(pentry); | |
257 | ||
258 | git_oid_cpy(&pentry->sha1, &oid); | |
259 | pentry->offset = entry_start; | |
260 | if (git_vector_insert(&idx->pack->cache, pentry) < 0) | |
3f93e16c CMN |
261 | goto on_error; |
262 | ||
453ab98d CMN |
263 | git_oid_cpy(&entry->oid, &oid); |
264 | entry->crc = crc32(0L, Z_NULL, 0); | |
265 | ||
266 | entry_size = (size_t)(idx->off - entry_start); | |
267 | packed = git_mwindow_open(mwf, &w, entry_start, entry_size, &left); | |
268 | if (packed == NULL) | |
269 | goto on_error; | |
270 | ||
271 | entry->crc = htonl(crc32(entry->crc, packed, (uInt)entry_size)); | |
272 | git_mwindow_close(&w); | |
273 | ||
274 | /* Add the object to the list */ | |
275 | if (git_vector_insert(&idx->objects, entry) < 0) | |
276 | goto on_error; | |
277 | ||
278 | for (i = oid.id[0]; i < 256; ++i) { | |
279 | idx->fanout[i]++; | |
280 | } | |
281 | ||
3f93e16c | 282 | return 0; |
3412391d | 283 | |
3f93e16c | 284 | on_error: |
453ab98d CMN |
285 | git__free(entry); |
286 | git__free(pentry); | |
287 | git__free(obj->data); | |
3f93e16c CMN |
288 | return -1; |
289 | } | |
3412391d | 290 | |
dee5515a | 291 | int git_indexer_stream_add(git_indexer_stream *idx, const void *data, size_t size, git_indexer_stats *stats) |
3f93e16c CMN |
292 | { |
293 | int error; | |
294 | struct git_pack_header hdr; | |
3f035860 | 295 | size_t processed; |
3f93e16c | 296 | git_mwindow_file *mwf = &idx->pack->mwf; |
3412391d | 297 | |
3f93e16c CMN |
298 | assert(idx && data && stats); |
299 | ||
3f035860 VM |
300 | processed = stats->processed; |
301 | ||
3f93e16c CMN |
302 | if (git_filebuf_write(&idx->pack_file, data, size) < 0) |
303 | return -1; | |
304 | ||
305 | /* Make sure we set the new size of the pack */ | |
306 | if (idx->opened_pack) { | |
307 | idx->pack->mwf.size += size; | |
308 | //printf("\nadding %zu for %zu\n", size, idx->pack->mwf.size); | |
309 | } else { | |
310 | if (open_pack(&idx->pack, idx->pack_file.path_lock) < 0) | |
311 | return -1; | |
312 | idx->opened_pack = 1; | |
313 | mwf = &idx->pack->mwf; | |
314 | if (git_mwindow_file_register(&idx->pack->mwf) < 0) | |
315 | return -1; | |
3f93e16c CMN |
316 | } |
317 | ||
318 | if (!idx->parsed_header) { | |
319 | if ((unsigned)idx->pack->mwf.size < sizeof(hdr)) | |
320 | return 0; | |
321 | ||
322 | if (parse_header(&hdr, idx->pack) < 0) | |
323 | return -1; | |
324 | ||
325 | idx->parsed_header = 1; | |
326 | idx->nr_objects = ntohl(hdr.hdr_entries); | |
327 | idx->off = sizeof(struct git_pack_header); | |
328 | ||
329 | /* for now, limit to 2^32 objects */ | |
330 | assert(idx->nr_objects == (size_t)((unsigned int)idx->nr_objects)); | |
331 | ||
332 | if (git_vector_init(&idx->pack->cache, (unsigned int)idx->nr_objects, cache_cmp) < 0) | |
333 | return -1; | |
334 | ||
335 | idx->pack->has_cache = 1; | |
336 | if (git_vector_init(&idx->objects, (unsigned int)idx->nr_objects, objects_cmp) < 0) | |
337 | return -1; | |
338 | ||
339 | if (git_vector_init(&idx->deltas, (unsigned int)(idx->nr_objects / 2), NULL) < 0) | |
340 | return -1; | |
341 | ||
342 | stats->total = (unsigned int)idx->nr_objects; | |
343 | stats->processed = 0; | |
344 | } | |
345 | ||
346 | /* Now that we have data in the pack, let's try to parse it */ | |
347 | ||
348 | /* As the file grows any windows we try to use will be out of date */ | |
349 | git_mwindow_free_all(mwf); | |
350 | while (processed < idx->nr_objects) { | |
351 | git_rawobj obj; | |
3f93e16c | 352 | git_off_t entry_start = idx->off; |
3412391d | 353 | |
3f93e16c CMN |
354 | if (idx->pack->mwf.size <= idx->off + 20) |
355 | return 0; | |
356 | ||
357 | error = git_packfile_unpack(&obj, idx->pack, &idx->off); | |
904b67e6 | 358 | if (error == GIT_EBUFS) { |
3f93e16c CMN |
359 | idx->off = entry_start; |
360 | return 0; | |
361 | } | |
362 | ||
363 | if (error < 0) { | |
364 | idx->off = entry_start; | |
365 | error = store_delta(idx); | |
2aeadb9c | 366 | |
904b67e6 | 367 | if (error == GIT_EBUFS) |
3f93e16c CMN |
368 | return 0; |
369 | if (error < 0) | |
370 | return error; | |
3f93e16c CMN |
371 | continue; |
372 | } | |
373 | ||
453ab98d | 374 | if (hash_and_save(idx, &obj, entry_start) < 0) |
3f93e16c | 375 | goto on_error; |
3f93e16c | 376 | |
453ab98d | 377 | git__free(obj.data); |
3f93e16c | 378 | |
821f6bc7 | 379 | stats->processed = (unsigned int)++processed; |
453ab98d | 380 | } |
3f93e16c | 381 | |
453ab98d | 382 | return 0; |
3f93e16c | 383 | |
453ab98d CMN |
384 | on_error: |
385 | git_mwindow_free_all(mwf); | |
386 | return -1; | |
387 | } | |
3f93e16c | 388 | |
453ab98d CMN |
389 | static int index_path_stream(git_buf *path, git_indexer_stream *idx, const char *suffix) |
390 | { | |
391 | const char prefix[] = "pack-"; | |
392 | size_t slash = (size_t)path->size; | |
3f93e16c | 393 | |
453ab98d CMN |
394 | /* search backwards for '/' */ |
395 | while (slash > 0 && path->ptr[slash - 1] != '/') | |
396 | slash--; | |
3f93e16c | 397 | |
453ab98d CMN |
398 | if (git_buf_grow(path, slash + 1 + strlen(prefix) + |
399 | GIT_OID_HEXSZ + strlen(suffix) + 1) < 0) | |
400 | return -1; | |
401 | ||
402 | git_buf_truncate(path, slash); | |
403 | git_buf_puts(path, prefix); | |
fa6420f7 | 404 | git_oid_fmt(path->ptr + git_buf_len(path), &idx->hash); |
453ab98d CMN |
405 | path->size += GIT_OID_HEXSZ; |
406 | git_buf_puts(path, suffix); | |
407 | ||
408 | return git_buf_oom(path) ? -1 : 0; | |
409 | } | |
410 | ||
411 | static int resolve_deltas(git_indexer_stream *idx, git_indexer_stats *stats) | |
412 | { | |
413 | unsigned int i; | |
414 | struct delta_info *delta; | |
415 | ||
416 | git_vector_foreach(&idx->deltas, i, delta) { | |
417 | git_rawobj obj; | |
418 | ||
419 | idx->off = delta->delta_off; | |
420 | if (git_packfile_unpack(&obj, idx->pack, &idx->off) < 0) | |
421 | return -1; | |
422 | ||
423 | if (hash_and_save(idx, &obj, delta->delta_off) < 0) | |
424 | return -1; | |
3f93e16c CMN |
425 | |
426 | git__free(obj.data); | |
453ab98d CMN |
427 | stats->processed++; |
428 | } | |
3f93e16c | 429 | |
453ab98d CMN |
430 | return 0; |
431 | } | |
432 | ||
433 | int git_indexer_stream_finalize(git_indexer_stream *idx, git_indexer_stats *stats) | |
434 | { | |
435 | git_mwindow *w = NULL; | |
436 | unsigned int i, long_offsets = 0, left; | |
437 | struct git_pack_idx_header hdr; | |
438 | git_buf filename = GIT_BUF_INIT; | |
439 | struct entry *entry; | |
440 | void *packfile_hash; | |
441 | git_oid file_hash; | |
442 | SHA_CTX ctx; | |
443 | ||
6a9d61ef CMN |
444 | /* Test for this before resolve_deltas(), as it plays with idx->off */ |
445 | if (idx->off < idx->pack->mwf.size - GIT_OID_RAWSZ) { | |
446 | giterr_set(GITERR_INDEXER, "Indexing error: junk at the end of the pack"); | |
447 | return -1; | |
448 | } | |
449 | ||
453ab98d CMN |
450 | if (idx->deltas.length > 0) |
451 | if (resolve_deltas(idx, stats) < 0) | |
452 | return -1; | |
453 | ||
6a9d61ef CMN |
454 | if (stats->processed != stats->total) { |
455 | giterr_set(GITERR_INDEXER, "Indexing error: early EOF"); | |
456 | return -1; | |
457 | } | |
458 | ||
453ab98d CMN |
459 | git_vector_sort(&idx->objects); |
460 | ||
461 | git_buf_sets(&filename, idx->pack->pack_name); | |
462 | git_buf_truncate(&filename, filename.size - strlen("pack")); | |
463 | git_buf_puts(&filename, "idx"); | |
464 | if (git_buf_oom(&filename)) | |
465 | return -1; | |
466 | ||
467 | if (git_filebuf_open(&idx->index_file, filename.ptr, GIT_FILEBUF_HASH_CONTENTS) < 0) | |
468 | goto on_error; | |
469 | ||
470 | /* Write out the header */ | |
471 | hdr.idx_signature = htonl(PACK_IDX_SIGNATURE); | |
472 | hdr.idx_version = htonl(2); | |
473 | git_filebuf_write(&idx->index_file, &hdr, sizeof(hdr)); | |
474 | ||
475 | /* Write out the fanout table */ | |
476 | for (i = 0; i < 256; ++i) { | |
477 | uint32_t n = htonl(idx->fanout[i]); | |
478 | git_filebuf_write(&idx->index_file, &n, sizeof(n)); | |
3412391d CMN |
479 | } |
480 | ||
453ab98d CMN |
481 | /* Write out the object names (SHA-1 hashes) */ |
482 | SHA1_Init(&ctx); | |
483 | git_vector_foreach(&idx->objects, i, entry) { | |
484 | git_filebuf_write(&idx->index_file, &entry->oid, sizeof(git_oid)); | |
485 | SHA1_Update(&ctx, &entry->oid, GIT_OID_RAWSZ); | |
486 | } | |
487 | SHA1_Final(idx->hash.id, &ctx); | |
488 | ||
489 | /* Write out the CRC32 values */ | |
490 | git_vector_foreach(&idx->objects, i, entry) { | |
491 | git_filebuf_write(&idx->index_file, &entry->crc, sizeof(uint32_t)); | |
492 | } | |
493 | ||
494 | /* Write out the offsets */ | |
495 | git_vector_foreach(&idx->objects, i, entry) { | |
496 | uint32_t n; | |
497 | ||
498 | if (entry->offset == UINT32_MAX) | |
499 | n = htonl(0x80000000 | long_offsets++); | |
500 | else | |
501 | n = htonl(entry->offset); | |
502 | ||
503 | git_filebuf_write(&idx->index_file, &n, sizeof(uint32_t)); | |
504 | } | |
505 | ||
506 | /* Write out the long offsets */ | |
507 | git_vector_foreach(&idx->objects, i, entry) { | |
508 | uint32_t split[2]; | |
509 | ||
510 | if (entry->offset != UINT32_MAX) | |
511 | continue; | |
512 | ||
513 | split[0] = htonl(entry->offset_long >> 32); | |
514 | split[1] = htonl(entry->offset_long & 0xffffffff); | |
515 | ||
516 | git_filebuf_write(&idx->index_file, &split, sizeof(uint32_t) * 2); | |
517 | } | |
518 | ||
519 | /* Write out the packfile trailer */ | |
520 | packfile_hash = git_mwindow_open(&idx->pack->mwf, &w, idx->pack->mwf.size - GIT_OID_RAWSZ, GIT_OID_RAWSZ, &left); | |
521 | if (packfile_hash == NULL) { | |
522 | git_mwindow_close(&w); | |
523 | goto on_error; | |
524 | } | |
525 | ||
526 | memcpy(&file_hash, packfile_hash, GIT_OID_RAWSZ); | |
527 | git_mwindow_close(&w); | |
528 | ||
529 | git_filebuf_write(&idx->index_file, &file_hash, sizeof(git_oid)); | |
530 | ||
531 | /* Write out the packfile trailer to the idx file as well */ | |
532 | if (git_filebuf_hash(&file_hash, &idx->index_file) < 0) | |
533 | goto on_error; | |
534 | ||
535 | git_filebuf_write(&idx->index_file, &file_hash, sizeof(git_oid)); | |
536 | ||
537 | /* Figure out what the final name should be */ | |
538 | if (index_path_stream(&filename, idx, ".idx") < 0) | |
539 | goto on_error; | |
540 | ||
541 | /* Commit file */ | |
542 | if (git_filebuf_commit_at(&idx->index_file, filename.ptr, GIT_PACK_FILE_MODE) < 0) | |
543 | goto on_error; | |
544 | ||
545 | git_mwindow_free_all(&idx->pack->mwf); | |
a640d79e | 546 | p_close(idx->pack->mwf.fd); |
453ab98d CMN |
547 | |
548 | if (index_path_stream(&filename, idx, ".pack") < 0) | |
549 | goto on_error; | |
550 | /* And don't forget to rename the packfile to its new place. */ | |
551 | if (git_filebuf_commit_at(&idx->pack_file, filename.ptr, GIT_PACK_FILE_MODE) < 0) | |
552 | return -1; | |
553 | ||
554 | git_buf_free(&filename); | |
3f93e16c CMN |
555 | return 0; |
556 | ||
557 | on_error: | |
453ab98d | 558 | git_mwindow_free_all(&idx->pack->mwf); |
a640d79e | 559 | p_close(idx->pack->mwf.fd); |
453ab98d CMN |
560 | git_filebuf_cleanup(&idx->index_file); |
561 | git_buf_free(&filename); | |
3f93e16c CMN |
562 | return -1; |
563 | } | |
564 | ||
1c9c081a CMN |
565 | void git_indexer_stream_free(git_indexer_stream *idx) |
566 | { | |
567 | unsigned int i; | |
568 | struct entry *e; | |
569 | struct git_pack_entry *pe; | |
570 | struct delta_info *delta; | |
571 | ||
572 | if (idx == NULL) | |
573 | return; | |
574 | ||
1c9c081a CMN |
575 | git_vector_foreach(&idx->objects, i, e) |
576 | git__free(e); | |
577 | git_vector_free(&idx->objects); | |
578 | git_vector_foreach(&idx->pack->cache, i, pe) | |
579 | git__free(pe); | |
580 | git_vector_free(&idx->pack->cache); | |
581 | git_vector_foreach(&idx->deltas, i, delta) | |
582 | git__free(delta); | |
583 | git_vector_free(&idx->deltas); | |
584 | git__free(idx->pack); | |
585 | git__free(idx); | |
586 | } | |
587 | ||
3f93e16c CMN |
588 | int git_indexer_new(git_indexer **out, const char *packname) |
589 | { | |
590 | git_indexer *idx; | |
591 | struct git_pack_header hdr; | |
592 | int error; | |
593 | ||
594 | assert(out && packname); | |
595 | ||
596 | if (git_path_root(packname) < 0) { | |
73d87a09 | 597 | giterr_set(GITERR_INDEXER, "Path is not absolute"); |
3f93e16c | 598 | return -1; |
3412391d CMN |
599 | } |
600 | ||
3f93e16c CMN |
601 | idx = git__calloc(1, sizeof(git_indexer)); |
602 | GITERR_CHECK_ALLOC(idx); | |
3412391d | 603 | |
3f93e16c CMN |
604 | open_pack(&idx->pack, packname); |
605 | ||
606 | if ((error = parse_header(&hdr, idx->pack)) < 0) | |
3412391d | 607 | goto cleanup; |
3412391d | 608 | |
3f93e16c | 609 | idx->nr_objects = ntohl(hdr.hdr_entries); |
b7c44096 | 610 | |
deafee7b RB |
611 | /* for now, limit to 2^32 objects */ |
612 | assert(idx->nr_objects == (size_t)((unsigned int)idx->nr_objects)); | |
613 | ||
614 | error = git_vector_init(&idx->pack->cache, (unsigned int)idx->nr_objects, cache_cmp); | |
4aa7de15 | 615 | if (error < 0) |
c1af5a39 CMN |
616 | goto cleanup; |
617 | ||
618 | idx->pack->has_cache = 1; | |
deafee7b | 619 | error = git_vector_init(&idx->objects, (unsigned int)idx->nr_objects, objects_cmp); |
4aa7de15 | 620 | if (error < 0) |
b5b474dd | 621 | goto cleanup; |
b5b474dd | 622 | |
3412391d CMN |
623 | *out = idx; |
624 | ||
4aa7de15 | 625 | return 0; |
3412391d CMN |
626 | |
627 | cleanup: | |
b7c44096 | 628 | git_indexer_free(idx); |
3412391d | 629 | |
3f93e16c | 630 | return -1; |
3412391d CMN |
631 | } |
632 | ||
97769280 | 633 | static int index_path(git_buf *path, git_indexer *idx) |
f23c4a66 | 634 | { |
72d6a20b | 635 | const char prefix[] = "pack-", suffix[] = ".idx"; |
97769280 | 636 | size_t slash = (size_t)path->size; |
b7c44096 | 637 | |
97769280 RB |
638 | /* search backwards for '/' */ |
639 | while (slash > 0 && path->ptr[slash - 1] != '/') | |
640 | slash--; | |
b7c44096 | 641 | |
97769280 | 642 | if (git_buf_grow(path, slash + 1 + strlen(prefix) + |
4aa7de15 RB |
643 | GIT_OID_HEXSZ + strlen(suffix) + 1) < 0) |
644 | return -1; | |
97769280 | 645 | |
d0ec3fb8 | 646 | git_buf_truncate(path, slash); |
97769280 | 647 | git_buf_puts(path, prefix); |
fa6420f7 | 648 | git_oid_fmt(path->ptr + git_buf_len(path), &idx->hash); |
97769280 RB |
649 | path->size += GIT_OID_HEXSZ; |
650 | git_buf_puts(path, suffix); | |
651 | ||
4aa7de15 | 652 | return git_buf_oom(path) ? -1 : 0; |
b7c44096 CMN |
653 | } |
654 | ||
48b3ad4f | 655 | int git_indexer_write(git_indexer *idx) |
b7c44096 CMN |
656 | { |
657 | git_mwindow *w = NULL; | |
26e74c6a | 658 | int error; |
f6867e63 | 659 | unsigned int i, long_offsets = 0, left; |
b7c44096 | 660 | struct git_pack_idx_header hdr; |
97769280 | 661 | git_buf filename = GIT_BUF_INIT; |
b7c44096 CMN |
662 | struct entry *entry; |
663 | void *packfile_hash; | |
664 | git_oid file_hash; | |
665 | SHA_CTX ctx; | |
666 | ||
667 | git_vector_sort(&idx->objects); | |
668 | ||
97769280 RB |
669 | git_buf_sets(&filename, idx->pack->pack_name); |
670 | git_buf_truncate(&filename, filename.size - strlen("pack")); | |
671 | git_buf_puts(&filename, "idx"); | |
cb8a7961 | 672 | if (git_buf_oom(&filename)) |
4aa7de15 | 673 | return -1; |
97769280 RB |
674 | |
675 | error = git_filebuf_open(&idx->file, filename.ptr, GIT_FILEBUF_HASH_CONTENTS); | |
4aa7de15 | 676 | if (error < 0) |
97769280 | 677 | goto cleanup; |
b7c44096 CMN |
678 | |
679 | /* Write out the header */ | |
680 | hdr.idx_signature = htonl(PACK_IDX_SIGNATURE); | |
681 | hdr.idx_version = htonl(2); | |
682 | error = git_filebuf_write(&idx->file, &hdr, sizeof(hdr)); | |
4aa7de15 | 683 | if (error < 0) |
97769280 | 684 | goto cleanup; |
b7c44096 CMN |
685 | |
686 | /* Write out the fanout table */ | |
687 | for (i = 0; i < 256; ++i) { | |
688 | uint32_t n = htonl(idx->fanout[i]); | |
689 | error = git_filebuf_write(&idx->file, &n, sizeof(n)); | |
4aa7de15 | 690 | if (error < 0) |
b7c44096 CMN |
691 | goto cleanup; |
692 | } | |
693 | ||
694 | /* Write out the object names (SHA-1 hashes) */ | |
695 | SHA1_Init(&ctx); | |
696 | git_vector_foreach(&idx->objects, i, entry) { | |
697 | error = git_filebuf_write(&idx->file, &entry->oid, sizeof(git_oid)); | |
698 | SHA1_Update(&ctx, &entry->oid, GIT_OID_RAWSZ); | |
4aa7de15 | 699 | if (error < 0) |
b7c44096 CMN |
700 | goto cleanup; |
701 | } | |
702 | SHA1_Final(idx->hash.id, &ctx); | |
703 | ||
704 | /* Write out the CRC32 values */ | |
705 | git_vector_foreach(&idx->objects, i, entry) { | |
706 | error = git_filebuf_write(&idx->file, &entry->crc, sizeof(uint32_t)); | |
4aa7de15 | 707 | if (error < 0) |
b7c44096 CMN |
708 | goto cleanup; |
709 | } | |
710 | ||
711 | /* Write out the offsets */ | |
712 | git_vector_foreach(&idx->objects, i, entry) { | |
713 | uint32_t n; | |
714 | ||
715 | if (entry->offset == UINT32_MAX) | |
716 | n = htonl(0x80000000 | long_offsets++); | |
717 | else | |
718 | n = htonl(entry->offset); | |
719 | ||
720 | error = git_filebuf_write(&idx->file, &n, sizeof(uint32_t)); | |
4aa7de15 | 721 | if (error < 0) |
b7c44096 CMN |
722 | goto cleanup; |
723 | } | |
724 | ||
725 | /* Write out the long offsets */ | |
726 | git_vector_foreach(&idx->objects, i, entry) { | |
727 | uint32_t split[2]; | |
728 | ||
729 | if (entry->offset != UINT32_MAX) | |
730 | continue; | |
731 | ||
732 | split[0] = htonl(entry->offset_long >> 32); | |
733 | split[1] = htonl(entry->offset_long & 0xffffffff); | |
734 | ||
735 | error = git_filebuf_write(&idx->file, &split, sizeof(uint32_t) * 2); | |
4aa7de15 | 736 | if (error < 0) |
b7c44096 CMN |
737 | goto cleanup; |
738 | } | |
739 | ||
740 | /* Write out the packfile trailer */ | |
741 | ||
3f93e16c | 742 | packfile_hash = git_mwindow_open(&idx->pack->mwf, &w, idx->pack->mwf.size - GIT_OID_RAWSZ, GIT_OID_RAWSZ, &left); |
c1af5a39 | 743 | git_mwindow_close(&w); |
b7c44096 | 744 | if (packfile_hash == NULL) { |
4aa7de15 | 745 | error = -1; |
b7c44096 CMN |
746 | goto cleanup; |
747 | } | |
748 | ||
749 | memcpy(&file_hash, packfile_hash, GIT_OID_RAWSZ); | |
750 | ||
751 | git_mwindow_close(&w); | |
752 | ||
753 | error = git_filebuf_write(&idx->file, &file_hash, sizeof(git_oid)); | |
4aa7de15 RB |
754 | if (error < 0) |
755 | goto cleanup; | |
b7c44096 CMN |
756 | |
757 | /* Write out the index sha */ | |
758 | error = git_filebuf_hash(&file_hash, &idx->file); | |
4aa7de15 | 759 | if (error < 0) |
b7c44096 CMN |
760 | goto cleanup; |
761 | ||
762 | error = git_filebuf_write(&idx->file, &file_hash, sizeof(git_oid)); | |
4aa7de15 | 763 | if (error < 0) |
b7c44096 CMN |
764 | goto cleanup; |
765 | ||
766 | /* Figure out what the final name should be */ | |
97769280 | 767 | error = index_path(&filename, idx); |
4aa7de15 | 768 | if (error < 0) |
97769280 RB |
769 | goto cleanup; |
770 | ||
b7c44096 | 771 | /* Commit file */ |
97769280 | 772 | error = git_filebuf_commit_at(&idx->file, filename.ptr, GIT_PACK_FILE_MODE); |
b7c44096 CMN |
773 | |
774 | cleanup: | |
c1af5a39 | 775 | git_mwindow_free_all(&idx->pack->mwf); |
4aa7de15 | 776 | if (error < 0) |
b7c44096 | 777 | git_filebuf_cleanup(&idx->file); |
97769280 | 778 | git_buf_free(&filename); |
b7c44096 CMN |
779 | |
780 | return error; | |
781 | } | |
782 | ||
783 | int git_indexer_run(git_indexer *idx, git_indexer_stats *stats) | |
784 | { | |
785 | git_mwindow_file *mwf; | |
e1de726c | 786 | git_off_t off = sizeof(struct git_pack_header); |
f23c4a66 | 787 | int error; |
b7c44096 CMN |
788 | struct entry *entry; |
789 | unsigned int left, processed; | |
f23c4a66 | 790 | |
b7c44096 | 791 | assert(idx && stats); |
b5b474dd | 792 | |
b7c44096 | 793 | mwf = &idx->pack->mwf; |
f23c4a66 | 794 | error = git_mwindow_file_register(mwf); |
4aa7de15 RB |
795 | if (error < 0) |
796 | return error; | |
f23c4a66 | 797 | |
deafee7b | 798 | stats->total = (unsigned int)idx->nr_objects; |
b7c44096 | 799 | stats->processed = processed = 0; |
f23c4a66 | 800 | |
b7c44096 | 801 | while (processed < idx->nr_objects) { |
b5b474dd CMN |
802 | git_rawobj obj; |
803 | git_oid oid; | |
c1af5a39 | 804 | struct git_pack_entry *pentry; |
b7c44096 | 805 | git_mwindow *w = NULL; |
c85e08b1 | 806 | int i; |
e1de726c | 807 | git_off_t entry_start = off; |
b7c44096 CMN |
808 | void *packed; |
809 | size_t entry_size; | |
3f93e16c | 810 | char fmt[GIT_OID_HEXSZ] = {0}; |
b5b474dd | 811 | |
e1de726c RB |
812 | entry = git__calloc(1, sizeof(*entry)); |
813 | GITERR_CHECK_ALLOC(entry); | |
b5b474dd CMN |
814 | |
815 | if (off > UINT31_MAX) { | |
b7c44096 CMN |
816 | entry->offset = UINT32_MAX; |
817 | entry->offset_long = off; | |
b5b474dd | 818 | } else { |
e1de726c | 819 | entry->offset = (uint32_t)off; |
b5b474dd CMN |
820 | } |
821 | ||
822 | error = git_packfile_unpack(&obj, idx->pack, &off); | |
4aa7de15 | 823 | if (error < 0) |
b5b474dd | 824 | goto cleanup; |
b5b474dd | 825 | |
c1af5a39 | 826 | /* FIXME: Parse the object instead of hashing it */ |
18e5b854 | 827 | error = git_odb__hashobj(&oid, &obj); |
4aa7de15 | 828 | if (error < 0) { |
73d87a09 | 829 | giterr_set(GITERR_INDEXER, "Failed to hash object"); |
ab525a74 CMN |
830 | goto cleanup; |
831 | } | |
832 | ||
c1af5a39 CMN |
833 | pentry = git__malloc(sizeof(struct git_pack_entry)); |
834 | if (pentry == NULL) { | |
4aa7de15 | 835 | error = -1; |
c1af5a39 CMN |
836 | goto cleanup; |
837 | } | |
4aa7de15 | 838 | |
c1af5a39 CMN |
839 | git_oid_cpy(&pentry->sha1, &oid); |
840 | pentry->offset = entry_start; | |
3f93e16c | 841 | git_oid_fmt(fmt, &oid); |
c1af5a39 | 842 | error = git_vector_insert(&idx->pack->cache, pentry); |
4aa7de15 | 843 | if (error < 0) |
c1af5a39 CMN |
844 | goto cleanup; |
845 | ||
b7c44096 CMN |
846 | git_oid_cpy(&entry->oid, &oid); |
847 | entry->crc = crc32(0L, Z_NULL, 0); | |
848 | ||
e1de726c | 849 | entry_size = (size_t)(off - entry_start); |
b7c44096 CMN |
850 | packed = git_mwindow_open(mwf, &w, entry_start, entry_size, &left); |
851 | if (packed == NULL) { | |
4aa7de15 | 852 | error = -1; |
b7c44096 CMN |
853 | goto cleanup; |
854 | } | |
deafee7b | 855 | entry->crc = htonl(crc32(entry->crc, packed, (uInt)entry_size)); |
b7c44096 | 856 | git_mwindow_close(&w); |
ab525a74 | 857 | |
b5b474dd | 858 | /* Add the object to the list */ |
b7c44096 | 859 | error = git_vector_insert(&idx->objects, entry); |
4aa7de15 | 860 | if (error < 0) |
b7c44096 | 861 | goto cleanup; |
b5b474dd CMN |
862 | |
863 | for (i = oid.id[0]; i < 256; ++i) { | |
b7c44096 | 864 | idx->fanout[i]++; |
b5b474dd CMN |
865 | } |
866 | ||
3286c408 | 867 | git__free(obj.data); |
ab525a74 | 868 | |
b7c44096 | 869 | stats->processed = ++processed; |
ab525a74 CMN |
870 | } |
871 | ||
872 | cleanup: | |
873 | git_mwindow_free_all(mwf); | |
874 | ||
f23c4a66 | 875 | return error; |
ab525a74 | 876 | |
f23c4a66 CMN |
877 | } |
878 | ||
ab525a74 | 879 | void git_indexer_free(git_indexer *idx) |
3412391d | 880 | { |
b7c44096 CMN |
881 | unsigned int i; |
882 | struct entry *e; | |
c1af5a39 | 883 | struct git_pack_entry *pe; |
b7c44096 | 884 | |
92be7908 CMN |
885 | if (idx == NULL) |
886 | return; | |
887 | ||
7d0cdf82 | 888 | p_close(idx->pack->mwf.fd); |
b7c44096 | 889 | git_vector_foreach(&idx->objects, i, e) |
3286c408 | 890 | git__free(e); |
b7c44096 | 891 | git_vector_free(&idx->objects); |
c1af5a39 | 892 | git_vector_foreach(&idx->pack->cache, i, pe) |
3286c408 | 893 | git__free(pe); |
c1af5a39 | 894 | git_vector_free(&idx->pack->cache); |
3286c408 VM |
895 | git__free(idx->pack); |
896 | git__free(idx); | |
3412391d | 897 | } |
ab525a74 | 898 |