]> git.proxmox.com Git - libgit2.git/blame - src/indexer.c
Merge pull request #791 from carlosmn/index-path
[libgit2.git] / src / indexer.c
CommitLineData
3412391d 1/*
5e0de328 2 * Copyright (C) 2009-2012 the libgit2 contributors
3412391d 3 *
bb742ede
VM
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
3412391d
CMN
6 */
7
0c3bae62
VM
8#include <zlib.h>
9
f23c4a66 10#include "git2/indexer.h"
7d0cdf82 11#include "git2/object.h"
b7c44096 12#include "git2/oid.h"
f23c4a66 13
3412391d
CMN
14#include "common.h"
15#include "pack.h"
f23c4a66 16#include "mwindow.h"
3412391d 17#include "posix.h"
b7c44096
CMN
18#include "pack.h"
19#include "filebuf.h"
20#include "sha1.h"
21
22#define UINT31_MAX (0x7FFFFFFF)
3412391d 23
b5b474dd 24struct entry {
b7c44096 25 git_oid oid;
b5b474dd
CMN
26 uint32_t crc;
27 uint32_t offset;
28 uint64_t offset_long;
29};
30
2d3e417e 31struct git_indexer {
a070f152 32 struct git_pack_file *pack;
b7c44096
CMN
33 size_t nr_objects;
34 git_vector objects;
35 git_filebuf file;
36 unsigned int fanout[256];
37 git_oid hash;
2d3e417e 38};
f23c4a66 39
3f93e16c
CMN
40struct git_indexer_stream {
41 unsigned int parsed_header :1,
42 opened_pack;
43 struct git_pack_file *pack;
44 git_filebuf pack_file;
45 git_filebuf index_file;
46 git_off_t off;
47 size_t nr_objects;
48 git_vector objects;
49 git_vector deltas;
50 unsigned int fanout[256];
51 git_oid hash;
52};
53
54struct delta_info {
453ab98d 55 git_off_t delta_off;
3f93e16c
CMN
56};
57
b7c44096
CMN
58const git_oid *git_indexer_hash(git_indexer *idx)
59{
60 return &idx->hash;
61}
62
1c9c081a
CMN
63const git_oid *git_indexer_stream_hash(git_indexer_stream *idx)
64{
65 return &idx->hash;
66}
67
3f93e16c
CMN
68static int open_pack(struct git_pack_file **out, const char *filename)
69{
70 size_t namelen;
71 struct git_pack_file *pack;
72 struct stat st;
73 int fd;
74
75 namelen = strlen(filename);
76 pack = git__calloc(1, sizeof(struct git_pack_file) + namelen + 1);
77 GITERR_CHECK_ALLOC(pack);
78
79 memcpy(pack->pack_name, filename, namelen + 1);
80
81 if (p_stat(filename, &st) < 0) {
82 giterr_set(GITERR_OS, "Failed to stat packfile.");
83 goto cleanup;
84 }
85
86 if ((fd = p_open(pack->pack_name, O_RDONLY)) < 0) {
87 giterr_set(GITERR_OS, "Failed to open packfile.");
88 goto cleanup;
89 }
90
91 pack->mwf.fd = fd;
92 pack->mwf.size = (git_off_t)st.st_size;
93
94 *out = pack;
95 return 0;
96
97cleanup:
98 git__free(pack);
99 return -1;
100}
101
102static int parse_header(struct git_pack_header *hdr, struct git_pack_file *pack)
3412391d 103{
3412391d
CMN
104 int error;
105
106 /* Verify we recognize this pack file format. */
3f93e16c 107 if ((error = p_read(pack->mwf.fd, hdr, sizeof(*hdr))) < 0) {
4aa7de15
RB
108 giterr_set(GITERR_OS, "Failed to read in pack header");
109 return error;
110 }
3412391d 111
3f93e16c 112 if (hdr->hdr_signature != ntohl(PACK_SIGNATURE)) {
73d87a09 113 giterr_set(GITERR_INDEXER, "Wrong pack signature");
4aa7de15
RB
114 return -1;
115 }
3412391d 116
3f93e16c 117 if (!pack_version_ok(hdr->hdr_version)) {
73d87a09 118 giterr_set(GITERR_INDEXER, "Wrong pack version");
4aa7de15
RB
119 return -1;
120 }
f23c4a66 121
4aa7de15 122 return 0;
3412391d
CMN
123}
124
c1af5a39 125static int objects_cmp(const void *a, const void *b)
b7c44096
CMN
126{
127 const struct entry *entrya = a;
128 const struct entry *entryb = b;
129
130 return git_oid_cmp(&entrya->oid, &entryb->oid);
131}
132
c1af5a39
CMN
133static int cache_cmp(const void *a, const void *b)
134{
135 const struct git_pack_entry *ea = a;
136 const struct git_pack_entry *eb = b;
137
138 return git_oid_cmp(&ea->sha1, &eb->sha1);
139}
140
3f93e16c
CMN
141int git_indexer_stream_new(git_indexer_stream **out, const char *prefix)
142{
143 git_indexer_stream *idx;
144 git_buf path = GIT_BUF_INIT;
37159957 145 static const char suff[] = "/pack";
3f93e16c 146 int error;
c1af5a39 147
3f93e16c
CMN
148 idx = git__calloc(1, sizeof(git_indexer_stream));
149 GITERR_CHECK_ALLOC(idx);
150
151 error = git_buf_joinpath(&path, prefix, suff);
152 if (error < 0)
153 goto cleanup;
154
155 error = git_filebuf_open(&idx->pack_file, path.ptr,
156 GIT_FILEBUF_TEMPORARY | GIT_FILEBUF_DO_NOT_BUFFER);
157 git_buf_free(&path);
158 if (error < 0)
159 goto cleanup;
160
161 *out = idx;
162 return 0;
163
164cleanup:
165 git_buf_free(&path);
166 git_filebuf_cleanup(&idx->pack_file);
167 git__free(idx);
168 return -1;
169}
170
171/* Try to store the delta so we can try to resolve it later */
172static int store_delta(git_indexer_stream *idx)
3412391d 173{
3f93e16c
CMN
174 git_otype type;
175 git_mwindow *w = NULL;
176 git_mwindow_file *mwf = &idx->pack->mwf;
177 git_off_t entry_start = idx->off;
178 struct delta_info *delta;
179 size_t entry_size;
453ab98d 180 git_rawobj obj;
3f93e16c 181 int error;
3412391d 182
3f93e16c
CMN
183 /*
184 * ref-delta objects can refer to object that we haven't
185 * found yet, so give it another opportunity
186 */
187 if (git_packfile_unpack_header(&entry_size, &type, mwf, &w, &idx->off) < 0)
188 return -1;
ade3c9bb 189
3f93e16c
CMN
190 git_mwindow_close(&w);
191
192 /* If it's not a delta, mark it as failure, we can't do anything with it */
193 if (type != GIT_OBJ_REF_DELTA && type != GIT_OBJ_OFS_DELTA)
4aa7de15 194 return -1;
3f93e16c 195
3f93e16c 196 if (type == GIT_OBJ_REF_DELTA) {
3f93e16c
CMN
197 idx->off += GIT_OID_RAWSZ;
198 } else {
453ab98d
CMN
199 git_off_t base_off;
200
201 base_off = get_delta_base(idx->pack, &w, &idx->off, type, entry_start);
3f93e16c 202 git_mwindow_close(&w);
453ab98d
CMN
203 if (base_off < 0)
204 return (int)base_off;
4aa7de15 205 }
3412391d 206
453ab98d 207 error = packfile_unpack_compressed(&obj, idx->pack, &w, &idx->off, entry_size, type);
904b67e6 208 if (error == GIT_EBUFS) {
3f93e16c 209 idx->off = entry_start;
904b67e6 210 return GIT_EBUFS;
3f93e16c 211 } else if (error < 0){
453ab98d 212 return -1;
3f93e16c
CMN
213 }
214
453ab98d
CMN
215 delta = git__calloc(1, sizeof(struct delta_info));
216 GITERR_CHECK_ALLOC(delta);
217 delta->delta_off = entry_start;
218
219 git__free(obj.data);
220
3f93e16c 221 if (git_vector_insert(&idx->deltas, delta) < 0)
453ab98d
CMN
222 return -1;
223
224 return 0;
225}
226
227static int hash_and_save(git_indexer_stream *idx, git_rawobj *obj, git_off_t entry_start)
228{
229 int i;
230 git_oid oid;
231 void *packed;
232 size_t entry_size;
233 unsigned int left;
234 struct entry *entry;
235 git_mwindow *w = NULL;
236 git_mwindow_file *mwf = &idx->pack->mwf;
237 struct git_pack_entry *pentry;
238
239 entry = git__calloc(1, sizeof(*entry));
240 GITERR_CHECK_ALLOC(entry);
241
242 if (entry_start > UINT31_MAX) {
243 entry->offset = UINT32_MAX;
244 entry->offset_long = entry_start;
245 } else {
246 entry->offset = (uint32_t)entry_start;
247 }
248
249 /* FIXME: Parse the object instead of hashing it */
250 if (git_odb__hashobj(&oid, obj) < 0) {
73d87a09 251 giterr_set(GITERR_INDEXER, "Failed to hash object");
453ab98d
CMN
252 return -1;
253 }
254
255 pentry = git__malloc(sizeof(struct git_pack_entry));
256 GITERR_CHECK_ALLOC(pentry);
257
258 git_oid_cpy(&pentry->sha1, &oid);
259 pentry->offset = entry_start;
260 if (git_vector_insert(&idx->pack->cache, pentry) < 0)
3f93e16c
CMN
261 goto on_error;
262
453ab98d
CMN
263 git_oid_cpy(&entry->oid, &oid);
264 entry->crc = crc32(0L, Z_NULL, 0);
265
266 entry_size = (size_t)(idx->off - entry_start);
267 packed = git_mwindow_open(mwf, &w, entry_start, entry_size, &left);
268 if (packed == NULL)
269 goto on_error;
270
271 entry->crc = htonl(crc32(entry->crc, packed, (uInt)entry_size));
272 git_mwindow_close(&w);
273
274 /* Add the object to the list */
275 if (git_vector_insert(&idx->objects, entry) < 0)
276 goto on_error;
277
278 for (i = oid.id[0]; i < 256; ++i) {
279 idx->fanout[i]++;
280 }
281
3f93e16c 282 return 0;
3412391d 283
3f93e16c 284on_error:
453ab98d
CMN
285 git__free(entry);
286 git__free(pentry);
287 git__free(obj->data);
3f93e16c
CMN
288 return -1;
289}
3412391d 290
dee5515a 291int git_indexer_stream_add(git_indexer_stream *idx, const void *data, size_t size, git_indexer_stats *stats)
3f93e16c
CMN
292{
293 int error;
294 struct git_pack_header hdr;
3f035860 295 size_t processed;
3f93e16c 296 git_mwindow_file *mwf = &idx->pack->mwf;
3412391d 297
3f93e16c
CMN
298 assert(idx && data && stats);
299
3f035860
VM
300 processed = stats->processed;
301
3f93e16c
CMN
302 if (git_filebuf_write(&idx->pack_file, data, size) < 0)
303 return -1;
304
305 /* Make sure we set the new size of the pack */
306 if (idx->opened_pack) {
307 idx->pack->mwf.size += size;
308 //printf("\nadding %zu for %zu\n", size, idx->pack->mwf.size);
309 } else {
310 if (open_pack(&idx->pack, idx->pack_file.path_lock) < 0)
311 return -1;
312 idx->opened_pack = 1;
313 mwf = &idx->pack->mwf;
314 if (git_mwindow_file_register(&idx->pack->mwf) < 0)
315 return -1;
3f93e16c
CMN
316 }
317
318 if (!idx->parsed_header) {
319 if ((unsigned)idx->pack->mwf.size < sizeof(hdr))
320 return 0;
321
322 if (parse_header(&hdr, idx->pack) < 0)
323 return -1;
324
325 idx->parsed_header = 1;
326 idx->nr_objects = ntohl(hdr.hdr_entries);
327 idx->off = sizeof(struct git_pack_header);
328
329 /* for now, limit to 2^32 objects */
330 assert(idx->nr_objects == (size_t)((unsigned int)idx->nr_objects));
331
332 if (git_vector_init(&idx->pack->cache, (unsigned int)idx->nr_objects, cache_cmp) < 0)
333 return -1;
334
335 idx->pack->has_cache = 1;
336 if (git_vector_init(&idx->objects, (unsigned int)idx->nr_objects, objects_cmp) < 0)
337 return -1;
338
339 if (git_vector_init(&idx->deltas, (unsigned int)(idx->nr_objects / 2), NULL) < 0)
340 return -1;
341
342 stats->total = (unsigned int)idx->nr_objects;
343 stats->processed = 0;
344 }
345
346 /* Now that we have data in the pack, let's try to parse it */
347
348 /* As the file grows any windows we try to use will be out of date */
349 git_mwindow_free_all(mwf);
350 while (processed < idx->nr_objects) {
351 git_rawobj obj;
3f93e16c 352 git_off_t entry_start = idx->off;
3412391d 353
3f93e16c
CMN
354 if (idx->pack->mwf.size <= idx->off + 20)
355 return 0;
356
357 error = git_packfile_unpack(&obj, idx->pack, &idx->off);
904b67e6 358 if (error == GIT_EBUFS) {
3f93e16c
CMN
359 idx->off = entry_start;
360 return 0;
361 }
362
363 if (error < 0) {
364 idx->off = entry_start;
365 error = store_delta(idx);
2aeadb9c 366
904b67e6 367 if (error == GIT_EBUFS)
3f93e16c
CMN
368 return 0;
369 if (error < 0)
370 return error;
3f93e16c
CMN
371 continue;
372 }
373
453ab98d 374 if (hash_and_save(idx, &obj, entry_start) < 0)
3f93e16c 375 goto on_error;
3f93e16c 376
453ab98d 377 git__free(obj.data);
3f93e16c 378
821f6bc7 379 stats->processed = (unsigned int)++processed;
453ab98d 380 }
3f93e16c 381
453ab98d 382 return 0;
3f93e16c 383
453ab98d
CMN
384on_error:
385 git_mwindow_free_all(mwf);
386 return -1;
387}
3f93e16c 388
453ab98d
CMN
389static int index_path_stream(git_buf *path, git_indexer_stream *idx, const char *suffix)
390{
391 const char prefix[] = "pack-";
392 size_t slash = (size_t)path->size;
3f93e16c 393
453ab98d
CMN
394 /* search backwards for '/' */
395 while (slash > 0 && path->ptr[slash - 1] != '/')
396 slash--;
3f93e16c 397
453ab98d
CMN
398 if (git_buf_grow(path, slash + 1 + strlen(prefix) +
399 GIT_OID_HEXSZ + strlen(suffix) + 1) < 0)
400 return -1;
401
402 git_buf_truncate(path, slash);
403 git_buf_puts(path, prefix);
fa6420f7 404 git_oid_fmt(path->ptr + git_buf_len(path), &idx->hash);
453ab98d
CMN
405 path->size += GIT_OID_HEXSZ;
406 git_buf_puts(path, suffix);
407
408 return git_buf_oom(path) ? -1 : 0;
409}
410
411static int resolve_deltas(git_indexer_stream *idx, git_indexer_stats *stats)
412{
413 unsigned int i;
414 struct delta_info *delta;
415
416 git_vector_foreach(&idx->deltas, i, delta) {
417 git_rawobj obj;
418
419 idx->off = delta->delta_off;
420 if (git_packfile_unpack(&obj, idx->pack, &idx->off) < 0)
421 return -1;
422
423 if (hash_and_save(idx, &obj, delta->delta_off) < 0)
424 return -1;
3f93e16c
CMN
425
426 git__free(obj.data);
453ab98d
CMN
427 stats->processed++;
428 }
3f93e16c 429
453ab98d
CMN
430 return 0;
431}
432
433int git_indexer_stream_finalize(git_indexer_stream *idx, git_indexer_stats *stats)
434{
435 git_mwindow *w = NULL;
436 unsigned int i, long_offsets = 0, left;
437 struct git_pack_idx_header hdr;
438 git_buf filename = GIT_BUF_INIT;
439 struct entry *entry;
440 void *packfile_hash;
441 git_oid file_hash;
442 SHA_CTX ctx;
443
6a9d61ef
CMN
444 /* Test for this before resolve_deltas(), as it plays with idx->off */
445 if (idx->off < idx->pack->mwf.size - GIT_OID_RAWSZ) {
446 giterr_set(GITERR_INDEXER, "Indexing error: junk at the end of the pack");
447 return -1;
448 }
449
453ab98d
CMN
450 if (idx->deltas.length > 0)
451 if (resolve_deltas(idx, stats) < 0)
452 return -1;
453
6a9d61ef
CMN
454 if (stats->processed != stats->total) {
455 giterr_set(GITERR_INDEXER, "Indexing error: early EOF");
456 return -1;
457 }
458
453ab98d
CMN
459 git_vector_sort(&idx->objects);
460
461 git_buf_sets(&filename, idx->pack->pack_name);
462 git_buf_truncate(&filename, filename.size - strlen("pack"));
463 git_buf_puts(&filename, "idx");
464 if (git_buf_oom(&filename))
465 return -1;
466
467 if (git_filebuf_open(&idx->index_file, filename.ptr, GIT_FILEBUF_HASH_CONTENTS) < 0)
468 goto on_error;
469
470 /* Write out the header */
471 hdr.idx_signature = htonl(PACK_IDX_SIGNATURE);
472 hdr.idx_version = htonl(2);
473 git_filebuf_write(&idx->index_file, &hdr, sizeof(hdr));
474
475 /* Write out the fanout table */
476 for (i = 0; i < 256; ++i) {
477 uint32_t n = htonl(idx->fanout[i]);
478 git_filebuf_write(&idx->index_file, &n, sizeof(n));
3412391d
CMN
479 }
480
453ab98d
CMN
481 /* Write out the object names (SHA-1 hashes) */
482 SHA1_Init(&ctx);
483 git_vector_foreach(&idx->objects, i, entry) {
484 git_filebuf_write(&idx->index_file, &entry->oid, sizeof(git_oid));
485 SHA1_Update(&ctx, &entry->oid, GIT_OID_RAWSZ);
486 }
487 SHA1_Final(idx->hash.id, &ctx);
488
489 /* Write out the CRC32 values */
490 git_vector_foreach(&idx->objects, i, entry) {
491 git_filebuf_write(&idx->index_file, &entry->crc, sizeof(uint32_t));
492 }
493
494 /* Write out the offsets */
495 git_vector_foreach(&idx->objects, i, entry) {
496 uint32_t n;
497
498 if (entry->offset == UINT32_MAX)
499 n = htonl(0x80000000 | long_offsets++);
500 else
501 n = htonl(entry->offset);
502
503 git_filebuf_write(&idx->index_file, &n, sizeof(uint32_t));
504 }
505
506 /* Write out the long offsets */
507 git_vector_foreach(&idx->objects, i, entry) {
508 uint32_t split[2];
509
510 if (entry->offset != UINT32_MAX)
511 continue;
512
513 split[0] = htonl(entry->offset_long >> 32);
514 split[1] = htonl(entry->offset_long & 0xffffffff);
515
516 git_filebuf_write(&idx->index_file, &split, sizeof(uint32_t) * 2);
517 }
518
519 /* Write out the packfile trailer */
520 packfile_hash = git_mwindow_open(&idx->pack->mwf, &w, idx->pack->mwf.size - GIT_OID_RAWSZ, GIT_OID_RAWSZ, &left);
521 if (packfile_hash == NULL) {
522 git_mwindow_close(&w);
523 goto on_error;
524 }
525
526 memcpy(&file_hash, packfile_hash, GIT_OID_RAWSZ);
527 git_mwindow_close(&w);
528
529 git_filebuf_write(&idx->index_file, &file_hash, sizeof(git_oid));
530
531 /* Write out the packfile trailer to the idx file as well */
532 if (git_filebuf_hash(&file_hash, &idx->index_file) < 0)
533 goto on_error;
534
535 git_filebuf_write(&idx->index_file, &file_hash, sizeof(git_oid));
536
537 /* Figure out what the final name should be */
538 if (index_path_stream(&filename, idx, ".idx") < 0)
539 goto on_error;
540
541 /* Commit file */
542 if (git_filebuf_commit_at(&idx->index_file, filename.ptr, GIT_PACK_FILE_MODE) < 0)
543 goto on_error;
544
545 git_mwindow_free_all(&idx->pack->mwf);
a640d79e 546 p_close(idx->pack->mwf.fd);
453ab98d
CMN
547
548 if (index_path_stream(&filename, idx, ".pack") < 0)
549 goto on_error;
550 /* And don't forget to rename the packfile to its new place. */
551 if (git_filebuf_commit_at(&idx->pack_file, filename.ptr, GIT_PACK_FILE_MODE) < 0)
552 return -1;
553
554 git_buf_free(&filename);
3f93e16c
CMN
555 return 0;
556
557on_error:
453ab98d 558 git_mwindow_free_all(&idx->pack->mwf);
a640d79e 559 p_close(idx->pack->mwf.fd);
453ab98d
CMN
560 git_filebuf_cleanup(&idx->index_file);
561 git_buf_free(&filename);
3f93e16c
CMN
562 return -1;
563}
564
1c9c081a
CMN
565void git_indexer_stream_free(git_indexer_stream *idx)
566{
567 unsigned int i;
568 struct entry *e;
569 struct git_pack_entry *pe;
570 struct delta_info *delta;
571
572 if (idx == NULL)
573 return;
574
1c9c081a
CMN
575 git_vector_foreach(&idx->objects, i, e)
576 git__free(e);
577 git_vector_free(&idx->objects);
578 git_vector_foreach(&idx->pack->cache, i, pe)
579 git__free(pe);
580 git_vector_free(&idx->pack->cache);
581 git_vector_foreach(&idx->deltas, i, delta)
582 git__free(delta);
583 git_vector_free(&idx->deltas);
584 git__free(idx->pack);
585 git__free(idx);
586}
587
3f93e16c
CMN
588int git_indexer_new(git_indexer **out, const char *packname)
589{
590 git_indexer *idx;
591 struct git_pack_header hdr;
592 int error;
593
594 assert(out && packname);
595
596 if (git_path_root(packname) < 0) {
73d87a09 597 giterr_set(GITERR_INDEXER, "Path is not absolute");
3f93e16c 598 return -1;
3412391d
CMN
599 }
600
3f93e16c
CMN
601 idx = git__calloc(1, sizeof(git_indexer));
602 GITERR_CHECK_ALLOC(idx);
3412391d 603
3f93e16c
CMN
604 open_pack(&idx->pack, packname);
605
606 if ((error = parse_header(&hdr, idx->pack)) < 0)
3412391d 607 goto cleanup;
3412391d 608
3f93e16c 609 idx->nr_objects = ntohl(hdr.hdr_entries);
b7c44096 610
deafee7b
RB
611 /* for now, limit to 2^32 objects */
612 assert(idx->nr_objects == (size_t)((unsigned int)idx->nr_objects));
613
614 error = git_vector_init(&idx->pack->cache, (unsigned int)idx->nr_objects, cache_cmp);
4aa7de15 615 if (error < 0)
c1af5a39
CMN
616 goto cleanup;
617
618 idx->pack->has_cache = 1;
deafee7b 619 error = git_vector_init(&idx->objects, (unsigned int)idx->nr_objects, objects_cmp);
4aa7de15 620 if (error < 0)
b5b474dd 621 goto cleanup;
b5b474dd 622
3412391d
CMN
623 *out = idx;
624
4aa7de15 625 return 0;
3412391d
CMN
626
627cleanup:
b7c44096 628 git_indexer_free(idx);
3412391d 629
3f93e16c 630 return -1;
3412391d
CMN
631}
632
97769280 633static int index_path(git_buf *path, git_indexer *idx)
f23c4a66 634{
72d6a20b 635 const char prefix[] = "pack-", suffix[] = ".idx";
97769280 636 size_t slash = (size_t)path->size;
b7c44096 637
97769280
RB
638 /* search backwards for '/' */
639 while (slash > 0 && path->ptr[slash - 1] != '/')
640 slash--;
b7c44096 641
97769280 642 if (git_buf_grow(path, slash + 1 + strlen(prefix) +
4aa7de15
RB
643 GIT_OID_HEXSZ + strlen(suffix) + 1) < 0)
644 return -1;
97769280 645
d0ec3fb8 646 git_buf_truncate(path, slash);
97769280 647 git_buf_puts(path, prefix);
fa6420f7 648 git_oid_fmt(path->ptr + git_buf_len(path), &idx->hash);
97769280
RB
649 path->size += GIT_OID_HEXSZ;
650 git_buf_puts(path, suffix);
651
4aa7de15 652 return git_buf_oom(path) ? -1 : 0;
b7c44096
CMN
653}
654
48b3ad4f 655int git_indexer_write(git_indexer *idx)
b7c44096
CMN
656{
657 git_mwindow *w = NULL;
26e74c6a 658 int error;
f6867e63 659 unsigned int i, long_offsets = 0, left;
b7c44096 660 struct git_pack_idx_header hdr;
97769280 661 git_buf filename = GIT_BUF_INIT;
b7c44096
CMN
662 struct entry *entry;
663 void *packfile_hash;
664 git_oid file_hash;
665 SHA_CTX ctx;
666
667 git_vector_sort(&idx->objects);
668
97769280
RB
669 git_buf_sets(&filename, idx->pack->pack_name);
670 git_buf_truncate(&filename, filename.size - strlen("pack"));
671 git_buf_puts(&filename, "idx");
cb8a7961 672 if (git_buf_oom(&filename))
4aa7de15 673 return -1;
97769280
RB
674
675 error = git_filebuf_open(&idx->file, filename.ptr, GIT_FILEBUF_HASH_CONTENTS);
4aa7de15 676 if (error < 0)
97769280 677 goto cleanup;
b7c44096
CMN
678
679 /* Write out the header */
680 hdr.idx_signature = htonl(PACK_IDX_SIGNATURE);
681 hdr.idx_version = htonl(2);
682 error = git_filebuf_write(&idx->file, &hdr, sizeof(hdr));
4aa7de15 683 if (error < 0)
97769280 684 goto cleanup;
b7c44096
CMN
685
686 /* Write out the fanout table */
687 for (i = 0; i < 256; ++i) {
688 uint32_t n = htonl(idx->fanout[i]);
689 error = git_filebuf_write(&idx->file, &n, sizeof(n));
4aa7de15 690 if (error < 0)
b7c44096
CMN
691 goto cleanup;
692 }
693
694 /* Write out the object names (SHA-1 hashes) */
695 SHA1_Init(&ctx);
696 git_vector_foreach(&idx->objects, i, entry) {
697 error = git_filebuf_write(&idx->file, &entry->oid, sizeof(git_oid));
698 SHA1_Update(&ctx, &entry->oid, GIT_OID_RAWSZ);
4aa7de15 699 if (error < 0)
b7c44096
CMN
700 goto cleanup;
701 }
702 SHA1_Final(idx->hash.id, &ctx);
703
704 /* Write out the CRC32 values */
705 git_vector_foreach(&idx->objects, i, entry) {
706 error = git_filebuf_write(&idx->file, &entry->crc, sizeof(uint32_t));
4aa7de15 707 if (error < 0)
b7c44096
CMN
708 goto cleanup;
709 }
710
711 /* Write out the offsets */
712 git_vector_foreach(&idx->objects, i, entry) {
713 uint32_t n;
714
715 if (entry->offset == UINT32_MAX)
716 n = htonl(0x80000000 | long_offsets++);
717 else
718 n = htonl(entry->offset);
719
720 error = git_filebuf_write(&idx->file, &n, sizeof(uint32_t));
4aa7de15 721 if (error < 0)
b7c44096
CMN
722 goto cleanup;
723 }
724
725 /* Write out the long offsets */
726 git_vector_foreach(&idx->objects, i, entry) {
727 uint32_t split[2];
728
729 if (entry->offset != UINT32_MAX)
730 continue;
731
732 split[0] = htonl(entry->offset_long >> 32);
733 split[1] = htonl(entry->offset_long & 0xffffffff);
734
735 error = git_filebuf_write(&idx->file, &split, sizeof(uint32_t) * 2);
4aa7de15 736 if (error < 0)
b7c44096
CMN
737 goto cleanup;
738 }
739
740 /* Write out the packfile trailer */
741
3f93e16c 742 packfile_hash = git_mwindow_open(&idx->pack->mwf, &w, idx->pack->mwf.size - GIT_OID_RAWSZ, GIT_OID_RAWSZ, &left);
c1af5a39 743 git_mwindow_close(&w);
b7c44096 744 if (packfile_hash == NULL) {
4aa7de15 745 error = -1;
b7c44096
CMN
746 goto cleanup;
747 }
748
749 memcpy(&file_hash, packfile_hash, GIT_OID_RAWSZ);
750
751 git_mwindow_close(&w);
752
753 error = git_filebuf_write(&idx->file, &file_hash, sizeof(git_oid));
4aa7de15
RB
754 if (error < 0)
755 goto cleanup;
b7c44096
CMN
756
757 /* Write out the index sha */
758 error = git_filebuf_hash(&file_hash, &idx->file);
4aa7de15 759 if (error < 0)
b7c44096
CMN
760 goto cleanup;
761
762 error = git_filebuf_write(&idx->file, &file_hash, sizeof(git_oid));
4aa7de15 763 if (error < 0)
b7c44096
CMN
764 goto cleanup;
765
766 /* Figure out what the final name should be */
97769280 767 error = index_path(&filename, idx);
4aa7de15 768 if (error < 0)
97769280
RB
769 goto cleanup;
770
b7c44096 771 /* Commit file */
97769280 772 error = git_filebuf_commit_at(&idx->file, filename.ptr, GIT_PACK_FILE_MODE);
b7c44096
CMN
773
774cleanup:
c1af5a39 775 git_mwindow_free_all(&idx->pack->mwf);
4aa7de15 776 if (error < 0)
b7c44096 777 git_filebuf_cleanup(&idx->file);
97769280 778 git_buf_free(&filename);
b7c44096
CMN
779
780 return error;
781}
782
783int git_indexer_run(git_indexer *idx, git_indexer_stats *stats)
784{
785 git_mwindow_file *mwf;
e1de726c 786 git_off_t off = sizeof(struct git_pack_header);
f23c4a66 787 int error;
b7c44096
CMN
788 struct entry *entry;
789 unsigned int left, processed;
f23c4a66 790
b7c44096 791 assert(idx && stats);
b5b474dd 792
b7c44096 793 mwf = &idx->pack->mwf;
f23c4a66 794 error = git_mwindow_file_register(mwf);
4aa7de15
RB
795 if (error < 0)
796 return error;
f23c4a66 797
deafee7b 798 stats->total = (unsigned int)idx->nr_objects;
b7c44096 799 stats->processed = processed = 0;
f23c4a66 800
b7c44096 801 while (processed < idx->nr_objects) {
b5b474dd
CMN
802 git_rawobj obj;
803 git_oid oid;
c1af5a39 804 struct git_pack_entry *pentry;
b7c44096 805 git_mwindow *w = NULL;
c85e08b1 806 int i;
e1de726c 807 git_off_t entry_start = off;
b7c44096
CMN
808 void *packed;
809 size_t entry_size;
3f93e16c 810 char fmt[GIT_OID_HEXSZ] = {0};
b5b474dd 811
e1de726c
RB
812 entry = git__calloc(1, sizeof(*entry));
813 GITERR_CHECK_ALLOC(entry);
b5b474dd
CMN
814
815 if (off > UINT31_MAX) {
b7c44096
CMN
816 entry->offset = UINT32_MAX;
817 entry->offset_long = off;
b5b474dd 818 } else {
e1de726c 819 entry->offset = (uint32_t)off;
b5b474dd
CMN
820 }
821
822 error = git_packfile_unpack(&obj, idx->pack, &off);
4aa7de15 823 if (error < 0)
b5b474dd 824 goto cleanup;
b5b474dd 825
c1af5a39 826 /* FIXME: Parse the object instead of hashing it */
18e5b854 827 error = git_odb__hashobj(&oid, &obj);
4aa7de15 828 if (error < 0) {
73d87a09 829 giterr_set(GITERR_INDEXER, "Failed to hash object");
ab525a74
CMN
830 goto cleanup;
831 }
832
c1af5a39
CMN
833 pentry = git__malloc(sizeof(struct git_pack_entry));
834 if (pentry == NULL) {
4aa7de15 835 error = -1;
c1af5a39
CMN
836 goto cleanup;
837 }
4aa7de15 838
c1af5a39
CMN
839 git_oid_cpy(&pentry->sha1, &oid);
840 pentry->offset = entry_start;
3f93e16c 841 git_oid_fmt(fmt, &oid);
c1af5a39 842 error = git_vector_insert(&idx->pack->cache, pentry);
4aa7de15 843 if (error < 0)
c1af5a39
CMN
844 goto cleanup;
845
b7c44096
CMN
846 git_oid_cpy(&entry->oid, &oid);
847 entry->crc = crc32(0L, Z_NULL, 0);
848
e1de726c 849 entry_size = (size_t)(off - entry_start);
b7c44096
CMN
850 packed = git_mwindow_open(mwf, &w, entry_start, entry_size, &left);
851 if (packed == NULL) {
4aa7de15 852 error = -1;
b7c44096
CMN
853 goto cleanup;
854 }
deafee7b 855 entry->crc = htonl(crc32(entry->crc, packed, (uInt)entry_size));
b7c44096 856 git_mwindow_close(&w);
ab525a74 857
b5b474dd 858 /* Add the object to the list */
b7c44096 859 error = git_vector_insert(&idx->objects, entry);
4aa7de15 860 if (error < 0)
b7c44096 861 goto cleanup;
b5b474dd
CMN
862
863 for (i = oid.id[0]; i < 256; ++i) {
b7c44096 864 idx->fanout[i]++;
b5b474dd
CMN
865 }
866
3286c408 867 git__free(obj.data);
ab525a74 868
b7c44096 869 stats->processed = ++processed;
ab525a74
CMN
870 }
871
872cleanup:
873 git_mwindow_free_all(mwf);
874
f23c4a66 875 return error;
ab525a74 876
f23c4a66
CMN
877}
878
ab525a74 879void git_indexer_free(git_indexer *idx)
3412391d 880{
b7c44096
CMN
881 unsigned int i;
882 struct entry *e;
c1af5a39 883 struct git_pack_entry *pe;
b7c44096 884
92be7908
CMN
885 if (idx == NULL)
886 return;
887
7d0cdf82 888 p_close(idx->pack->mwf.fd);
b7c44096 889 git_vector_foreach(&idx->objects, i, e)
3286c408 890 git__free(e);
b7c44096 891 git_vector_free(&idx->objects);
c1af5a39 892 git_vector_foreach(&idx->pack->cache, i, pe)
3286c408 893 git__free(pe);
c1af5a39 894 git_vector_free(&idx->pack->cache);
3286c408
VM
895 git__free(idx->pack);
896 git__free(idx);
3412391d 897}
ab525a74 898