]> git.proxmox.com Git - libgit2.git/blame - src/indexer.c
Merge branch 'development' into clar2
[libgit2.git] / src / indexer.c
CommitLineData
3412391d 1/*
5e0de328 2 * Copyright (C) 2009-2012 the libgit2 contributors
3412391d 3 *
bb742ede
VM
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
3412391d
CMN
6 */
7
0c3bae62
VM
8#include <zlib.h>
9
f23c4a66 10#include "git2/indexer.h"
7d0cdf82 11#include "git2/object.h"
b7c44096 12#include "git2/oid.h"
f23c4a66 13
3412391d
CMN
14#include "common.h"
15#include "pack.h"
f23c4a66 16#include "mwindow.h"
3412391d 17#include "posix.h"
b7c44096
CMN
18#include "pack.h"
19#include "filebuf.h"
b7c44096
CMN
20
21#define UINT31_MAX (0x7FFFFFFF)
3412391d 22
b5b474dd 23struct entry {
b7c44096 24 git_oid oid;
b5b474dd
CMN
25 uint32_t crc;
26 uint32_t offset;
27 uint64_t offset_long;
28};
29
2d3e417e 30struct git_indexer {
a070f152 31 struct git_pack_file *pack;
b7c44096
CMN
32 size_t nr_objects;
33 git_vector objects;
34 git_filebuf file;
35 unsigned int fanout[256];
36 git_oid hash;
2d3e417e 37};
f23c4a66 38
3f93e16c
CMN
39struct git_indexer_stream {
40 unsigned int parsed_header :1,
f56f8585 41 opened_pack :1,
5a3ad89d
CMN
42 have_stream :1,
43 have_delta :1;
3f93e16c
CMN
44 struct git_pack_file *pack;
45 git_filebuf pack_file;
46 git_filebuf index_file;
47 git_off_t off;
f56f8585
CMN
48 git_off_t entry_start;
49 git_packfile_stream stream;
3f93e16c
CMN
50 size_t nr_objects;
51 git_vector objects;
52 git_vector deltas;
53 unsigned int fanout[256];
f56f8585 54 git_hash_ctx hash_ctx;
3f93e16c 55 git_oid hash;
7d222e13 56 git_transfer_progress_callback progress_cb;
216863c4 57 void *progress_payload;
6481a68d 58 char objbuf[8*1024];
3f93e16c
CMN
59};
60
61struct delta_info {
453ab98d 62 git_off_t delta_off;
3f93e16c
CMN
63};
64
839c5f57 65const git_oid *git_indexer_hash(const git_indexer *idx)
b7c44096
CMN
66{
67 return &idx->hash;
68}
69
839c5f57 70const git_oid *git_indexer_stream_hash(const git_indexer_stream *idx)
1c9c081a
CMN
71{
72 return &idx->hash;
73}
74
3f93e16c
CMN
75static int open_pack(struct git_pack_file **out, const char *filename)
76{
77 size_t namelen;
78 struct git_pack_file *pack;
79 struct stat st;
80 int fd;
81
82 namelen = strlen(filename);
83 pack = git__calloc(1, sizeof(struct git_pack_file) + namelen + 1);
84 GITERR_CHECK_ALLOC(pack);
85
86 memcpy(pack->pack_name, filename, namelen + 1);
87
88 if (p_stat(filename, &st) < 0) {
89 giterr_set(GITERR_OS, "Failed to stat packfile.");
90 goto cleanup;
91 }
92
93 if ((fd = p_open(pack->pack_name, O_RDONLY)) < 0) {
94 giterr_set(GITERR_OS, "Failed to open packfile.");
95 goto cleanup;
96 }
97
98 pack->mwf.fd = fd;
99 pack->mwf.size = (git_off_t)st.st_size;
100
101 *out = pack;
102 return 0;
103
104cleanup:
105 git__free(pack);
106 return -1;
107}
108
109static int parse_header(struct git_pack_header *hdr, struct git_pack_file *pack)
3412391d 110{
3412391d
CMN
111 int error;
112
113 /* Verify we recognize this pack file format. */
3f93e16c 114 if ((error = p_read(pack->mwf.fd, hdr, sizeof(*hdr))) < 0) {
4aa7de15
RB
115 giterr_set(GITERR_OS, "Failed to read in pack header");
116 return error;
117 }
3412391d 118
3f93e16c 119 if (hdr->hdr_signature != ntohl(PACK_SIGNATURE)) {
73d87a09 120 giterr_set(GITERR_INDEXER, "Wrong pack signature");
4aa7de15
RB
121 return -1;
122 }
3412391d 123
3f93e16c 124 if (!pack_version_ok(hdr->hdr_version)) {
73d87a09 125 giterr_set(GITERR_INDEXER, "Wrong pack version");
4aa7de15
RB
126 return -1;
127 }
f23c4a66 128
4aa7de15 129 return 0;
3412391d
CMN
130}
131
c1af5a39 132static int objects_cmp(const void *a, const void *b)
b7c44096
CMN
133{
134 const struct entry *entrya = a;
135 const struct entry *entryb = b;
136
137 return git_oid_cmp(&entrya->oid, &entryb->oid);
138}
139
c1af5a39
CMN
140static int cache_cmp(const void *a, const void *b)
141{
142 const struct git_pack_entry *ea = a;
143 const struct git_pack_entry *eb = b;
144
145 return git_oid_cmp(&ea->sha1, &eb->sha1);
146}
147
216863c4
BS
148int git_indexer_stream_new(
149 git_indexer_stream **out,
150 const char *prefix,
7d222e13 151 git_transfer_progress_callback progress_cb,
216863c4 152 void *progress_payload)
3f93e16c
CMN
153{
154 git_indexer_stream *idx;
155 git_buf path = GIT_BUF_INIT;
37159957 156 static const char suff[] = "/pack";
3f93e16c 157 int error;
c1af5a39 158
3f93e16c
CMN
159 idx = git__calloc(1, sizeof(git_indexer_stream));
160 GITERR_CHECK_ALLOC(idx);
216863c4
BS
161 idx->progress_cb = progress_cb;
162 idx->progress_payload = progress_payload;
3f93e16c
CMN
163
164 error = git_buf_joinpath(&path, prefix, suff);
165 if (error < 0)
166 goto cleanup;
167
168 error = git_filebuf_open(&idx->pack_file, path.ptr,
169 GIT_FILEBUF_TEMPORARY | GIT_FILEBUF_DO_NOT_BUFFER);
170 git_buf_free(&path);
171 if (error < 0)
172 goto cleanup;
173
174 *out = idx;
175 return 0;
176
177cleanup:
178 git_buf_free(&path);
179 git_filebuf_cleanup(&idx->pack_file);
180 git__free(idx);
181 return -1;
182}
183
184/* Try to store the delta so we can try to resolve it later */
5a3ad89d 185static int store_delta(git_indexer_stream *idx)
3412391d 186{
3f93e16c 187 struct delta_info *delta;
3f93e16c 188
453ab98d
CMN
189 delta = git__calloc(1, sizeof(struct delta_info));
190 GITERR_CHECK_ALLOC(delta);
5a3ad89d 191 delta->delta_off = idx->entry_start;
453ab98d 192
3f93e16c 193 if (git_vector_insert(&idx->deltas, delta) < 0)
453ab98d
CMN
194 return -1;
195
196 return 0;
197}
198
f56f8585
CMN
199static void hash_header(git_hash_ctx *ctx, git_off_t len, git_otype type)
200{
201 char buffer[64];
202 size_t hdrlen;
203
bdb94c21 204 hdrlen = git_odb__format_object_header(buffer, sizeof(buffer), (size_t)len, type);
f56f8585
CMN
205 git_hash_update(ctx, buffer, hdrlen);
206}
207
6481a68d 208static int hash_object_stream(git_indexer_stream *idx, git_packfile_stream *stream)
f56f8585 209{
f56f8585
CMN
210 ssize_t read;
211
6481a68d 212 assert(idx && stream);
f56f8585
CMN
213
214 do {
6481a68d 215 if ((read = git_packfile_stream_read(stream, idx->objbuf, sizeof(idx->objbuf))) < 0)
f56f8585
CMN
216 break;
217
6481a68d 218 git_hash_update(&idx->hash_ctx, idx->objbuf, read);
f56f8585
CMN
219 } while (read > 0);
220
221 if (read < 0)
222 return (int)read;
223
224 return 0;
225}
226
5a3ad89d
CMN
227/* In order to create the packfile stream, we need to skip over the delta base description */
228static int advance_delta_offset(git_indexer_stream *idx, git_otype type)
229{
230 git_mwindow *w = NULL;
231
232 assert(type == GIT_OBJ_REF_DELTA || type == GIT_OBJ_OFS_DELTA);
233
234 if (type == GIT_OBJ_REF_DELTA) {
235 idx->off += GIT_OID_RAWSZ;
236 } else {
237 git_off_t base_off = get_delta_base(idx->pack, &w, &idx->off, type, idx->entry_start);
238 git_mwindow_close(&w);
239 if (base_off < 0)
240 return (int)base_off;
241 }
242
243 return 0;
244}
245
246/* Read from the stream and discard any output */
6481a68d 247static int read_object_stream(git_indexer_stream *idx, git_packfile_stream *stream)
5a3ad89d 248{
5a3ad89d
CMN
249 ssize_t read;
250
251 assert(stream);
252
253 do {
6481a68d 254 read = git_packfile_stream_read(stream, idx->objbuf, sizeof(idx->objbuf));
5a3ad89d
CMN
255 } while (read > 0);
256
257 if (read < 0)
258 return (int)read;
259
260 return 0;
261}
262
3908c254
CMN
263static int crc_object(uint32_t *crc_out, git_mwindow_file *mwf, git_off_t start, git_off_t size)
264{
265 void *ptr;
266 uint32_t crc;
267 unsigned int left, len;
268 git_mwindow *w = NULL;
269
270 crc = crc32(0L, Z_NULL, 0);
271 while (size) {
bdb94c21 272 ptr = git_mwindow_open(mwf, &w, start, (size_t)size, &left);
3908c254
CMN
273 if (ptr == NULL)
274 return -1;
275
bdb94c21 276 len = min(left, (size_t)size);
3908c254
CMN
277 crc = crc32(crc, ptr, len);
278 size -= len;
279 start += len;
280 git_mwindow_close(&w);
281 }
282
283 *crc_out = htonl(crc);
284 return 0;
285}
286
5a3ad89d 287static int store_object(git_indexer_stream *idx)
f56f8585
CMN
288{
289 int i;
290 git_oid oid;
f56f8585
CMN
291 struct entry *entry;
292 git_off_t entry_size;
f56f8585 293 struct git_pack_entry *pentry;
5a3ad89d 294 git_hash_ctx *ctx = &idx->hash_ctx;
5a3ad89d 295 git_off_t entry_start = idx->entry_start;
f56f8585
CMN
296
297 entry = git__calloc(1, sizeof(*entry));
298 GITERR_CHECK_ALLOC(entry);
299
300 pentry = git__malloc(sizeof(struct git_pack_entry));
301 GITERR_CHECK_ALLOC(pentry);
302
303 git_hash_final(&oid, ctx);
304 entry_size = idx->off - entry_start;
305 if (entry_start > UINT31_MAX) {
306 entry->offset = UINT32_MAX;
307 entry->offset_long = entry_start;
308 } else {
309 entry->offset = (uint32_t)entry_start;
310 }
311
312 git_oid_cpy(&pentry->sha1, &oid);
313 pentry->offset = entry_start;
5a3ad89d
CMN
314 if (git_vector_insert(&idx->pack->cache, pentry) < 0) {
315 git__free(pentry);
f56f8585 316 goto on_error;
5a3ad89d 317 }
f56f8585
CMN
318
319 git_oid_cpy(&entry->oid, &oid);
f56f8585 320
3908c254 321 if (crc_object(&entry->crc, &idx->pack->mwf, entry_start, entry_size) < 0)
f56f8585
CMN
322 goto on_error;
323
f56f8585
CMN
324 /* Add the object to the list */
325 if (git_vector_insert(&idx->objects, entry) < 0)
326 goto on_error;
327
328 for (i = oid.id[0]; i < 256; ++i) {
329 idx->fanout[i]++;
330 }
331
332 return 0;
333
334on_error:
f56f8585
CMN
335 git__free(entry);
336
337 return -1;
338}
339
453ab98d
CMN
340static int hash_and_save(git_indexer_stream *idx, git_rawobj *obj, git_off_t entry_start)
341{
342 int i;
343 git_oid oid;
453ab98d 344 size_t entry_size;
453ab98d 345 struct entry *entry;
453ab98d
CMN
346 struct git_pack_entry *pentry;
347
348 entry = git__calloc(1, sizeof(*entry));
349 GITERR_CHECK_ALLOC(entry);
350
351 if (entry_start > UINT31_MAX) {
352 entry->offset = UINT32_MAX;
353 entry->offset_long = entry_start;
354 } else {
355 entry->offset = (uint32_t)entry_start;
356 }
357
358 /* FIXME: Parse the object instead of hashing it */
359 if (git_odb__hashobj(&oid, obj) < 0) {
73d87a09 360 giterr_set(GITERR_INDEXER, "Failed to hash object");
453ab98d
CMN
361 return -1;
362 }
363
364 pentry = git__malloc(sizeof(struct git_pack_entry));
365 GITERR_CHECK_ALLOC(pentry);
366
367 git_oid_cpy(&pentry->sha1, &oid);
368 pentry->offset = entry_start;
4cc7342e
SC
369 if (git_vector_insert(&idx->pack->cache, pentry) < 0) {
370 git__free(pentry);
3f93e16c 371 goto on_error;
4cc7342e 372 }
3f93e16c 373
453ab98d
CMN
374 git_oid_cpy(&entry->oid, &oid);
375 entry->crc = crc32(0L, Z_NULL, 0);
376
377 entry_size = (size_t)(idx->off - entry_start);
3908c254 378 if (crc_object(&entry->crc, &idx->pack->mwf, entry_start, entry_size) < 0)
453ab98d
CMN
379 goto on_error;
380
453ab98d
CMN
381 /* Add the object to the list */
382 if (git_vector_insert(&idx->objects, entry) < 0)
383 goto on_error;
384
385 for (i = oid.id[0]; i < 256; ++i) {
386 idx->fanout[i]++;
387 }
388
3f93e16c 389 return 0;
3412391d 390
3f93e16c 391on_error:
453ab98d 392 git__free(entry);
453ab98d 393 git__free(obj->data);
3f93e16c
CMN
394 return -1;
395}
3412391d 396
7d222e13 397static void do_progress_callback(git_indexer_stream *idx, git_transfer_progress *stats)
216863c4
BS
398{
399 if (!idx->progress_cb) return;
400 idx->progress_cb(stats, idx->progress_payload);
401}
402
7d222e13 403int git_indexer_stream_add(git_indexer_stream *idx, const void *data, size_t size, git_transfer_progress *stats)
3f93e16c
CMN
404{
405 int error;
406 struct git_pack_header hdr;
3f035860 407 size_t processed;
3f93e16c 408 git_mwindow_file *mwf = &idx->pack->mwf;
3412391d 409
3f93e16c
CMN
410 assert(idx && data && stats);
411
7d222e13 412 processed = stats->indexed_objects;
3f035860 413
3f93e16c
CMN
414 if (git_filebuf_write(&idx->pack_file, data, size) < 0)
415 return -1;
416
417 /* Make sure we set the new size of the pack */
418 if (idx->opened_pack) {
419 idx->pack->mwf.size += size;
420 //printf("\nadding %zu for %zu\n", size, idx->pack->mwf.size);
421 } else {
422 if (open_pack(&idx->pack, idx->pack_file.path_lock) < 0)
423 return -1;
424 idx->opened_pack = 1;
425 mwf = &idx->pack->mwf;
426 if (git_mwindow_file_register(&idx->pack->mwf) < 0)
427 return -1;
3f93e16c
CMN
428 }
429
430 if (!idx->parsed_header) {
431 if ((unsigned)idx->pack->mwf.size < sizeof(hdr))
432 return 0;
433
434 if (parse_header(&hdr, idx->pack) < 0)
435 return -1;
436
437 idx->parsed_header = 1;
438 idx->nr_objects = ntohl(hdr.hdr_entries);
439 idx->off = sizeof(struct git_pack_header);
440
441 /* for now, limit to 2^32 objects */
442 assert(idx->nr_objects == (size_t)((unsigned int)idx->nr_objects));
443
444 if (git_vector_init(&idx->pack->cache, (unsigned int)idx->nr_objects, cache_cmp) < 0)
445 return -1;
446
447 idx->pack->has_cache = 1;
448 if (git_vector_init(&idx->objects, (unsigned int)idx->nr_objects, objects_cmp) < 0)
449 return -1;
450
451 if (git_vector_init(&idx->deltas, (unsigned int)(idx->nr_objects / 2), NULL) < 0)
452 return -1;
453
81eecc34
BS
454 stats->received_objects = 0;
455 stats->indexed_objects = 0;
7d222e13 456 stats->total_objects = (unsigned int)idx->nr_objects;
216863c4 457 do_progress_callback(idx, stats);
3f93e16c
CMN
458 }
459
460 /* Now that we have data in the pack, let's try to parse it */
461
462 /* As the file grows any windows we try to use will be out of date */
463 git_mwindow_free_all(mwf);
464 while (processed < idx->nr_objects) {
f56f8585 465 git_packfile_stream *stream = &idx->stream;
3f93e16c 466 git_off_t entry_start = idx->off;
d1af70b0
CMN
467 size_t entry_size;
468 git_otype type;
469 git_mwindow *w = NULL;
3412391d 470
3f93e16c
CMN
471 if (idx->pack->mwf.size <= idx->off + 20)
472 return 0;
473
f56f8585
CMN
474 if (!idx->have_stream) {
475 error = git_packfile_unpack_header(&entry_size, &type, mwf, &w, &idx->off);
d1af70b0
CMN
476 if (error == GIT_EBUFS) {
477 idx->off = entry_start;
3f93e16c 478 return 0;
d1af70b0 479 }
3f93e16c 480 if (error < 0)
f56f8585
CMN
481 return -1;
482
483 git_mwindow_close(&w);
484 idx->entry_start = entry_start;
5a3ad89d 485 git_hash_ctx_init(&idx->hash_ctx);
f56f8585
CMN
486
487 if (type == GIT_OBJ_REF_DELTA || type == GIT_OBJ_OFS_DELTA) {
5a3ad89d 488 error = advance_delta_offset(idx, type);
f56f8585
CMN
489 if (error == GIT_EBUFS) {
490 idx->off = entry_start;
491 return 0;
492 }
493 if (error < 0)
5a3ad89d 494 return -1;
f56f8585 495
5a3ad89d
CMN
496 idx->have_delta = 1;
497 } else {
498 idx->have_delta = 0;
499 hash_header(&idx->hash_ctx, entry_size, type);
f56f8585 500 }
d1af70b0 501
f56f8585 502 idx->have_stream = 1;
f56f8585
CMN
503 if (git_packfile_stream_open(stream, idx->pack, idx->off) < 0)
504 goto on_error;
5a3ad89d
CMN
505
506 }
507
508 if (idx->have_delta) {
6481a68d 509 error = read_object_stream(idx, stream);
5a3ad89d 510 } else {
6481a68d 511 error = hash_object_stream(idx, stream);
3f93e16c
CMN
512 }
513
5a3ad89d 514 idx->off = stream->curpos;
f56f8585 515 if (error == GIT_EBUFS)
d1af70b0 516 return 0;
5a3ad89d
CMN
517
518 /* We want to free the stream reasorces no matter what here */
519 idx->have_stream = 0;
520 git_packfile_stream_free(stream);
521
d1af70b0 522 if (error < 0)
3f93e16c 523 goto on_error;
3f93e16c 524
5a3ad89d
CMN
525 if (idx->have_delta) {
526 error = store_delta(idx);
527 } else {
528 error = store_object(idx);
529 }
530
531 if (error < 0)
f56f8585 532 goto on_error;
3f93e16c 533
5a3ad89d
CMN
534 if (!idx->have_delta) {
535 stats->indexed_objects = (unsigned int)++processed;
536 }
7d222e13 537 stats->received_objects++;
f56f8585 538
216863c4 539 do_progress_callback(idx, stats);
453ab98d 540 }
3f93e16c 541
453ab98d 542 return 0;
3f93e16c 543
453ab98d
CMN
544on_error:
545 git_mwindow_free_all(mwf);
546 return -1;
547}
3f93e16c 548
453ab98d
CMN
549static int index_path_stream(git_buf *path, git_indexer_stream *idx, const char *suffix)
550{
551 const char prefix[] = "pack-";
552 size_t slash = (size_t)path->size;
3f93e16c 553
453ab98d
CMN
554 /* search backwards for '/' */
555 while (slash > 0 && path->ptr[slash - 1] != '/')
556 slash--;
3f93e16c 557
453ab98d
CMN
558 if (git_buf_grow(path, slash + 1 + strlen(prefix) +
559 GIT_OID_HEXSZ + strlen(suffix) + 1) < 0)
560 return -1;
561
562 git_buf_truncate(path, slash);
563 git_buf_puts(path, prefix);
fa6420f7 564 git_oid_fmt(path->ptr + git_buf_len(path), &idx->hash);
453ab98d
CMN
565 path->size += GIT_OID_HEXSZ;
566 git_buf_puts(path, suffix);
567
568 return git_buf_oom(path) ? -1 : 0;
569}
570
7d222e13 571static int resolve_deltas(git_indexer_stream *idx, git_transfer_progress *stats)
453ab98d
CMN
572{
573 unsigned int i;
574 struct delta_info *delta;
575
576 git_vector_foreach(&idx->deltas, i, delta) {
577 git_rawobj obj;
578
579 idx->off = delta->delta_off;
580 if (git_packfile_unpack(&obj, idx->pack, &idx->off) < 0)
581 return -1;
582
583 if (hash_and_save(idx, &obj, delta->delta_off) < 0)
584 return -1;
3f93e16c
CMN
585
586 git__free(obj.data);
7d222e13 587 stats->indexed_objects++;
909f6265 588 do_progress_callback(idx, stats);
453ab98d 589 }
3f93e16c 590
453ab98d
CMN
591 return 0;
592}
593
7d222e13 594int git_indexer_stream_finalize(git_indexer_stream *idx, git_transfer_progress *stats)
453ab98d
CMN
595{
596 git_mwindow *w = NULL;
597 unsigned int i, long_offsets = 0, left;
598 struct git_pack_idx_header hdr;
599 git_buf filename = GIT_BUF_INIT;
600 struct entry *entry;
601 void *packfile_hash;
602 git_oid file_hash;
603bee07 603 git_hash_ctx ctx;
d6fb0924 604
603bee07
ET
605 if (git_hash_ctx_init(&ctx) < 0)
606 return -1;
453ab98d 607
6a9d61ef
CMN
608 /* Test for this before resolve_deltas(), as it plays with idx->off */
609 if (idx->off < idx->pack->mwf.size - GIT_OID_RAWSZ) {
826bc4a8 610 giterr_set(GITERR_INDEXER, "Indexing error: unexpected data at the end of the pack");
6a9d61ef
CMN
611 return -1;
612 }
613
453ab98d
CMN
614 if (idx->deltas.length > 0)
615 if (resolve_deltas(idx, stats) < 0)
616 return -1;
617
7d222e13 618 if (stats->indexed_objects != stats->total_objects) {
6a9d61ef
CMN
619 giterr_set(GITERR_INDEXER, "Indexing error: early EOF");
620 return -1;
621 }
622
453ab98d
CMN
623 git_vector_sort(&idx->objects);
624
625 git_buf_sets(&filename, idx->pack->pack_name);
626 git_buf_truncate(&filename, filename.size - strlen("pack"));
627 git_buf_puts(&filename, "idx");
628 if (git_buf_oom(&filename))
629 return -1;
630
631 if (git_filebuf_open(&idx->index_file, filename.ptr, GIT_FILEBUF_HASH_CONTENTS) < 0)
632 goto on_error;
633
634 /* Write out the header */
635 hdr.idx_signature = htonl(PACK_IDX_SIGNATURE);
636 hdr.idx_version = htonl(2);
637 git_filebuf_write(&idx->index_file, &hdr, sizeof(hdr));
638
639 /* Write out the fanout table */
640 for (i = 0; i < 256; ++i) {
641 uint32_t n = htonl(idx->fanout[i]);
642 git_filebuf_write(&idx->index_file, &n, sizeof(n));
3412391d
CMN
643 }
644
453ab98d 645 /* Write out the object names (SHA-1 hashes) */
453ab98d
CMN
646 git_vector_foreach(&idx->objects, i, entry) {
647 git_filebuf_write(&idx->index_file, &entry->oid, sizeof(git_oid));
603bee07 648 git_hash_update(&ctx, &entry->oid, GIT_OID_RAWSZ);
453ab98d 649 }
603bee07 650 git_hash_final(&idx->hash, &ctx);
453ab98d
CMN
651
652 /* Write out the CRC32 values */
653 git_vector_foreach(&idx->objects, i, entry) {
654 git_filebuf_write(&idx->index_file, &entry->crc, sizeof(uint32_t));
655 }
656
657 /* Write out the offsets */
658 git_vector_foreach(&idx->objects, i, entry) {
659 uint32_t n;
660
661 if (entry->offset == UINT32_MAX)
662 n = htonl(0x80000000 | long_offsets++);
663 else
664 n = htonl(entry->offset);
665
666 git_filebuf_write(&idx->index_file, &n, sizeof(uint32_t));
667 }
668
669 /* Write out the long offsets */
670 git_vector_foreach(&idx->objects, i, entry) {
671 uint32_t split[2];
672
673 if (entry->offset != UINT32_MAX)
674 continue;
675
676 split[0] = htonl(entry->offset_long >> 32);
677 split[1] = htonl(entry->offset_long & 0xffffffff);
678
679 git_filebuf_write(&idx->index_file, &split, sizeof(uint32_t) * 2);
680 }
681
682 /* Write out the packfile trailer */
683 packfile_hash = git_mwindow_open(&idx->pack->mwf, &w, idx->pack->mwf.size - GIT_OID_RAWSZ, GIT_OID_RAWSZ, &left);
684 if (packfile_hash == NULL) {
685 git_mwindow_close(&w);
686 goto on_error;
687 }
688
689 memcpy(&file_hash, packfile_hash, GIT_OID_RAWSZ);
690 git_mwindow_close(&w);
691
692 git_filebuf_write(&idx->index_file, &file_hash, sizeof(git_oid));
693
694 /* Write out the packfile trailer to the idx file as well */
695 if (git_filebuf_hash(&file_hash, &idx->index_file) < 0)
696 goto on_error;
697
698 git_filebuf_write(&idx->index_file, &file_hash, sizeof(git_oid));
699
700 /* Figure out what the final name should be */
701 if (index_path_stream(&filename, idx, ".idx") < 0)
702 goto on_error;
703
704 /* Commit file */
705 if (git_filebuf_commit_at(&idx->index_file, filename.ptr, GIT_PACK_FILE_MODE) < 0)
706 goto on_error;
707
708 git_mwindow_free_all(&idx->pack->mwf);
a640d79e 709 p_close(idx->pack->mwf.fd);
453ab98d
CMN
710
711 if (index_path_stream(&filename, idx, ".pack") < 0)
712 goto on_error;
713 /* And don't forget to rename the packfile to its new place. */
714 if (git_filebuf_commit_at(&idx->pack_file, filename.ptr, GIT_PACK_FILE_MODE) < 0)
715 return -1;
716
717 git_buf_free(&filename);
3f93e16c
CMN
718 return 0;
719
720on_error:
453ab98d 721 git_mwindow_free_all(&idx->pack->mwf);
a640d79e 722 p_close(idx->pack->mwf.fd);
453ab98d
CMN
723 git_filebuf_cleanup(&idx->index_file);
724 git_buf_free(&filename);
603bee07 725 git_hash_ctx_cleanup(&ctx);
3f93e16c
CMN
726 return -1;
727}
728
1c9c081a
CMN
729void git_indexer_stream_free(git_indexer_stream *idx)
730{
731 unsigned int i;
732 struct entry *e;
733 struct git_pack_entry *pe;
734 struct delta_info *delta;
735
736 if (idx == NULL)
737 return;
738
1c9c081a
CMN
739 git_vector_foreach(&idx->objects, i, e)
740 git__free(e);
741 git_vector_free(&idx->objects);
7a57ae54
CMN
742 if (idx->pack) {
743 git_vector_foreach(&idx->pack->cache, i, pe)
744 git__free(pe);
745 git_vector_free(&idx->pack->cache);
746 }
1c9c081a
CMN
747 git_vector_foreach(&idx->deltas, i, delta)
748 git__free(delta);
749 git_vector_free(&idx->deltas);
750 git__free(idx->pack);
751 git__free(idx);
752}
753
3f93e16c
CMN
754int git_indexer_new(git_indexer **out, const char *packname)
755{
756 git_indexer *idx;
757 struct git_pack_header hdr;
758 int error;
759
760 assert(out && packname);
761
3f93e16c
CMN
762 idx = git__calloc(1, sizeof(git_indexer));
763 GITERR_CHECK_ALLOC(idx);
3412391d 764
3f93e16c
CMN
765 open_pack(&idx->pack, packname);
766
767 if ((error = parse_header(&hdr, idx->pack)) < 0)
3412391d 768 goto cleanup;
3412391d 769
3f93e16c 770 idx->nr_objects = ntohl(hdr.hdr_entries);
b7c44096 771
deafee7b
RB
772 /* for now, limit to 2^32 objects */
773 assert(idx->nr_objects == (size_t)((unsigned int)idx->nr_objects));
774
775 error = git_vector_init(&idx->pack->cache, (unsigned int)idx->nr_objects, cache_cmp);
4aa7de15 776 if (error < 0)
c1af5a39
CMN
777 goto cleanup;
778
779 idx->pack->has_cache = 1;
deafee7b 780 error = git_vector_init(&idx->objects, (unsigned int)idx->nr_objects, objects_cmp);
4aa7de15 781 if (error < 0)
b5b474dd 782 goto cleanup;
b5b474dd 783
3412391d
CMN
784 *out = idx;
785
4aa7de15 786 return 0;
3412391d
CMN
787
788cleanup:
b7c44096 789 git_indexer_free(idx);
3412391d 790
3f93e16c 791 return -1;
3412391d
CMN
792}
793
97769280 794static int index_path(git_buf *path, git_indexer *idx)
f23c4a66 795{
72d6a20b 796 const char prefix[] = "pack-", suffix[] = ".idx";
97769280 797 size_t slash = (size_t)path->size;
b7c44096 798
97769280
RB
799 /* search backwards for '/' */
800 while (slash > 0 && path->ptr[slash - 1] != '/')
801 slash--;
b7c44096 802
97769280 803 if (git_buf_grow(path, slash + 1 + strlen(prefix) +
4aa7de15
RB
804 GIT_OID_HEXSZ + strlen(suffix) + 1) < 0)
805 return -1;
97769280 806
d0ec3fb8 807 git_buf_truncate(path, slash);
97769280 808 git_buf_puts(path, prefix);
fa6420f7 809 git_oid_fmt(path->ptr + git_buf_len(path), &idx->hash);
97769280
RB
810 path->size += GIT_OID_HEXSZ;
811 git_buf_puts(path, suffix);
812
4aa7de15 813 return git_buf_oom(path) ? -1 : 0;
b7c44096
CMN
814}
815
48b3ad4f 816int git_indexer_write(git_indexer *idx)
b7c44096
CMN
817{
818 git_mwindow *w = NULL;
26e74c6a 819 int error;
f6867e63 820 unsigned int i, long_offsets = 0, left;
b7c44096 821 struct git_pack_idx_header hdr;
97769280 822 git_buf filename = GIT_BUF_INIT;
b7c44096
CMN
823 struct entry *entry;
824 void *packfile_hash;
825 git_oid file_hash;
603bee07 826 git_hash_ctx ctx;
d6fb0924 827
603bee07
ET
828 if (git_hash_ctx_init(&ctx) < 0)
829 return -1;
b7c44096
CMN
830
831 git_vector_sort(&idx->objects);
832
97769280
RB
833 git_buf_sets(&filename, idx->pack->pack_name);
834 git_buf_truncate(&filename, filename.size - strlen("pack"));
835 git_buf_puts(&filename, "idx");
cb8a7961 836 if (git_buf_oom(&filename))
4aa7de15 837 return -1;
97769280
RB
838
839 error = git_filebuf_open(&idx->file, filename.ptr, GIT_FILEBUF_HASH_CONTENTS);
4aa7de15 840 if (error < 0)
97769280 841 goto cleanup;
b7c44096
CMN
842
843 /* Write out the header */
844 hdr.idx_signature = htonl(PACK_IDX_SIGNATURE);
845 hdr.idx_version = htonl(2);
846 error = git_filebuf_write(&idx->file, &hdr, sizeof(hdr));
4aa7de15 847 if (error < 0)
97769280 848 goto cleanup;
b7c44096
CMN
849
850 /* Write out the fanout table */
851 for (i = 0; i < 256; ++i) {
852 uint32_t n = htonl(idx->fanout[i]);
853 error = git_filebuf_write(&idx->file, &n, sizeof(n));
4aa7de15 854 if (error < 0)
b7c44096
CMN
855 goto cleanup;
856 }
857
858 /* Write out the object names (SHA-1 hashes) */
b7c44096 859 git_vector_foreach(&idx->objects, i, entry) {
d6fb0924 860 if ((error = git_filebuf_write(&idx->file, &entry->oid, sizeof(git_oid))) < 0 ||
603bee07 861 (error = git_hash_update(&ctx, &entry->oid, GIT_OID_RAWSZ)) < 0)
b7c44096
CMN
862 goto cleanup;
863 }
d6fb0924 864
603bee07 865 if ((error = git_hash_final(&idx->hash, &ctx)) < 0)
d6fb0924 866 goto cleanup;
b7c44096
CMN
867
868 /* Write out the CRC32 values */
869 git_vector_foreach(&idx->objects, i, entry) {
870 error = git_filebuf_write(&idx->file, &entry->crc, sizeof(uint32_t));
4aa7de15 871 if (error < 0)
b7c44096
CMN
872 goto cleanup;
873 }
874
875 /* Write out the offsets */
876 git_vector_foreach(&idx->objects, i, entry) {
877 uint32_t n;
878
879 if (entry->offset == UINT32_MAX)
880 n = htonl(0x80000000 | long_offsets++);
881 else
882 n = htonl(entry->offset);
883
884 error = git_filebuf_write(&idx->file, &n, sizeof(uint32_t));
4aa7de15 885 if (error < 0)
b7c44096
CMN
886 goto cleanup;
887 }
888
889 /* Write out the long offsets */
890 git_vector_foreach(&idx->objects, i, entry) {
891 uint32_t split[2];
892
893 if (entry->offset != UINT32_MAX)
894 continue;
895
896 split[0] = htonl(entry->offset_long >> 32);
897 split[1] = htonl(entry->offset_long & 0xffffffff);
898
899 error = git_filebuf_write(&idx->file, &split, sizeof(uint32_t) * 2);
4aa7de15 900 if (error < 0)
b7c44096
CMN
901 goto cleanup;
902 }
903
904 /* Write out the packfile trailer */
905
3f93e16c 906 packfile_hash = git_mwindow_open(&idx->pack->mwf, &w, idx->pack->mwf.size - GIT_OID_RAWSZ, GIT_OID_RAWSZ, &left);
c1af5a39 907 git_mwindow_close(&w);
b7c44096 908 if (packfile_hash == NULL) {
4aa7de15 909 error = -1;
b7c44096
CMN
910 goto cleanup;
911 }
912
913 memcpy(&file_hash, packfile_hash, GIT_OID_RAWSZ);
914
915 git_mwindow_close(&w);
916
917 error = git_filebuf_write(&idx->file, &file_hash, sizeof(git_oid));
4aa7de15
RB
918 if (error < 0)
919 goto cleanup;
b7c44096
CMN
920
921 /* Write out the index sha */
922 error = git_filebuf_hash(&file_hash, &idx->file);
4aa7de15 923 if (error < 0)
b7c44096
CMN
924 goto cleanup;
925
926 error = git_filebuf_write(&idx->file, &file_hash, sizeof(git_oid));
4aa7de15 927 if (error < 0)
b7c44096
CMN
928 goto cleanup;
929
930 /* Figure out what the final name should be */
97769280 931 error = index_path(&filename, idx);
4aa7de15 932 if (error < 0)
97769280
RB
933 goto cleanup;
934
b7c44096 935 /* Commit file */
97769280 936 error = git_filebuf_commit_at(&idx->file, filename.ptr, GIT_PACK_FILE_MODE);
b7c44096
CMN
937
938cleanup:
c1af5a39 939 git_mwindow_free_all(&idx->pack->mwf);
1d8943c6 940 git_mwindow_file_deregister(&idx->pack->mwf);
4aa7de15 941 if (error < 0)
b7c44096 942 git_filebuf_cleanup(&idx->file);
97769280 943 git_buf_free(&filename);
603bee07 944 git_hash_ctx_cleanup(&ctx);
b7c44096
CMN
945
946 return error;
947}
948
7d222e13 949int git_indexer_run(git_indexer *idx, git_transfer_progress *stats)
b7c44096
CMN
950{
951 git_mwindow_file *mwf;
e1de726c 952 git_off_t off = sizeof(struct git_pack_header);
f23c4a66 953 int error;
b7c44096
CMN
954 struct entry *entry;
955 unsigned int left, processed;
f23c4a66 956
b7c44096 957 assert(idx && stats);
b5b474dd 958
b7c44096 959 mwf = &idx->pack->mwf;
f23c4a66 960 error = git_mwindow_file_register(mwf);
4aa7de15
RB
961 if (error < 0)
962 return error;
f23c4a66 963
7d222e13
BS
964 stats->total_objects = (unsigned int)idx->nr_objects;
965 stats->indexed_objects = processed = 0;
f23c4a66 966
b7c44096 967 while (processed < idx->nr_objects) {
b5b474dd
CMN
968 git_rawobj obj;
969 git_oid oid;
c1af5a39 970 struct git_pack_entry *pentry;
b7c44096 971 git_mwindow *w = NULL;
c85e08b1 972 int i;
e1de726c 973 git_off_t entry_start = off;
b7c44096
CMN
974 void *packed;
975 size_t entry_size;
3f93e16c 976 char fmt[GIT_OID_HEXSZ] = {0};
b5b474dd 977
e1de726c
RB
978 entry = git__calloc(1, sizeof(*entry));
979 GITERR_CHECK_ALLOC(entry);
b5b474dd
CMN
980
981 if (off > UINT31_MAX) {
b7c44096
CMN
982 entry->offset = UINT32_MAX;
983 entry->offset_long = off;
b5b474dd 984 } else {
e1de726c 985 entry->offset = (uint32_t)off;
b5b474dd
CMN
986 }
987
988 error = git_packfile_unpack(&obj, idx->pack, &off);
4aa7de15 989 if (error < 0)
b5b474dd 990 goto cleanup;
b5b474dd 991
c1af5a39 992 /* FIXME: Parse the object instead of hashing it */
18e5b854 993 error = git_odb__hashobj(&oid, &obj);
4aa7de15 994 if (error < 0) {
73d87a09 995 giterr_set(GITERR_INDEXER, "Failed to hash object");
ab525a74
CMN
996 goto cleanup;
997 }
998
c1af5a39
CMN
999 pentry = git__malloc(sizeof(struct git_pack_entry));
1000 if (pentry == NULL) {
4aa7de15 1001 error = -1;
c1af5a39
CMN
1002 goto cleanup;
1003 }
4aa7de15 1004
c1af5a39
CMN
1005 git_oid_cpy(&pentry->sha1, &oid);
1006 pentry->offset = entry_start;
3f93e16c 1007 git_oid_fmt(fmt, &oid);
c1af5a39 1008 error = git_vector_insert(&idx->pack->cache, pentry);
4aa7de15 1009 if (error < 0)
c1af5a39
CMN
1010 goto cleanup;
1011
b7c44096
CMN
1012 git_oid_cpy(&entry->oid, &oid);
1013 entry->crc = crc32(0L, Z_NULL, 0);
1014
e1de726c 1015 entry_size = (size_t)(off - entry_start);
b7c44096
CMN
1016 packed = git_mwindow_open(mwf, &w, entry_start, entry_size, &left);
1017 if (packed == NULL) {
4aa7de15 1018 error = -1;
b7c44096
CMN
1019 goto cleanup;
1020 }
deafee7b 1021 entry->crc = htonl(crc32(entry->crc, packed, (uInt)entry_size));
b7c44096 1022 git_mwindow_close(&w);
ab525a74 1023
b5b474dd 1024 /* Add the object to the list */
b7c44096 1025 error = git_vector_insert(&idx->objects, entry);
4aa7de15 1026 if (error < 0)
b7c44096 1027 goto cleanup;
b5b474dd
CMN
1028
1029 for (i = oid.id[0]; i < 256; ++i) {
b7c44096 1030 idx->fanout[i]++;
b5b474dd
CMN
1031 }
1032
3286c408 1033 git__free(obj.data);
ab525a74 1034
7d222e13 1035 stats->indexed_objects = ++processed;
ab525a74
CMN
1036 }
1037
1038cleanup:
1039 git_mwindow_free_all(mwf);
1040
f23c4a66 1041 return error;
ab525a74 1042
f23c4a66
CMN
1043}
1044
ab525a74 1045void git_indexer_free(git_indexer *idx)
3412391d 1046{
b7c44096
CMN
1047 unsigned int i;
1048 struct entry *e;
c1af5a39 1049 struct git_pack_entry *pe;
b7c44096 1050
92be7908
CMN
1051 if (idx == NULL)
1052 return;
1053
7d0cdf82 1054 p_close(idx->pack->mwf.fd);
1d8943c6 1055 git_mwindow_file_deregister(&idx->pack->mwf);
b7c44096 1056 git_vector_foreach(&idx->objects, i, e)
3286c408 1057 git__free(e);
b7c44096 1058 git_vector_free(&idx->objects);
c1af5a39 1059 git_vector_foreach(&idx->pack->cache, i, pe)
3286c408 1060 git__free(pe);
c1af5a39 1061 git_vector_free(&idx->pack->cache);
3286c408
VM
1062 git__free(idx->pack);
1063 git__free(idx);
3412391d 1064}
ab525a74 1065