]> git.proxmox.com Git - libgit2.git/blob - src/indexer.c
Extend packfile in increments of page_size.
[libgit2.git] / src / indexer.c
1 /*
2 * Copyright (C) the libgit2 contributors. All rights reserved.
3 *
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
6 */
7
8 #include "git2/indexer.h"
9 #include "git2/object.h"
10
11 #include "common.h"
12 #include "pack.h"
13 #include "mwindow.h"
14 #include "posix.h"
15 #include "pack.h"
16 #include "filebuf.h"
17 #include "oid.h"
18 #include "oidmap.h"
19 #include "zstream.h"
20
21 GIT__USE_OIDMAP
22
23 extern git_mutex git__mwindow_mutex;
24
25 #define UINT31_MAX (0x7FFFFFFF)
26
27 struct entry {
28 git_oid oid;
29 uint32_t crc;
30 uint32_t offset;
31 uint64_t offset_long;
32 };
33
34 struct git_indexer {
35 unsigned int parsed_header :1,
36 opened_pack :1,
37 have_stream :1,
38 have_delta :1;
39 struct git_pack_header hdr;
40 struct git_pack_file *pack;
41 unsigned int mode;
42 git_off_t off;
43 git_off_t entry_start;
44 git_packfile_stream stream;
45 size_t nr_objects;
46 git_vector objects;
47 git_vector deltas;
48 unsigned int fanout[256];
49 git_hash_ctx hash_ctx;
50 git_oid hash;
51 git_transfer_progress_cb progress_cb;
52 void *progress_payload;
53 char objbuf[8*1024];
54
55 /* Needed to look up objects which we want to inject to fix a thin pack */
56 git_odb *odb;
57
58 /* Fields for calculating the packfile trailer (hash of everything before it) */
59 char inbuf[GIT_OID_RAWSZ];
60 size_t inbuf_len;
61 git_hash_ctx trailer;
62 };
63
64 struct delta_info {
65 git_off_t delta_off;
66 };
67
68 const git_oid *git_indexer_hash(const git_indexer *idx)
69 {
70 return &idx->hash;
71 }
72
73 static int parse_header(struct git_pack_header *hdr, struct git_pack_file *pack)
74 {
75 int error;
76 git_map map;
77
78 if ((error = p_mmap(&map, sizeof(*hdr), GIT_PROT_READ, GIT_MAP_SHARED, pack->mwf.fd, 0)) < 0)
79 return error;
80
81 memcpy(hdr, map.data, sizeof(*hdr));
82 p_munmap(&map);
83
84 /* Verify we recognize this pack file format. */
85 if (hdr->hdr_signature != ntohl(PACK_SIGNATURE)) {
86 giterr_set(GITERR_INDEXER, "Wrong pack signature");
87 return -1;
88 }
89
90 if (!pack_version_ok(hdr->hdr_version)) {
91 giterr_set(GITERR_INDEXER, "Wrong pack version");
92 return -1;
93 }
94
95 return 0;
96 }
97
98 static int objects_cmp(const void *a, const void *b)
99 {
100 const struct entry *entrya = a;
101 const struct entry *entryb = b;
102
103 return git_oid__cmp(&entrya->oid, &entryb->oid);
104 }
105
106 int git_indexer_new(
107 git_indexer **out,
108 const char *prefix,
109 unsigned int mode,
110 git_odb *odb,
111 git_transfer_progress_cb progress_cb,
112 void *progress_payload)
113 {
114 git_indexer *idx;
115 git_buf path = GIT_BUF_INIT, tmp_path = GIT_BUF_INIT;
116 static const char suff[] = "/pack";
117 int error, fd = -1;
118
119 idx = git__calloc(1, sizeof(git_indexer));
120 GITERR_CHECK_ALLOC(idx);
121 idx->odb = odb;
122 idx->progress_cb = progress_cb;
123 idx->progress_payload = progress_payload;
124 idx->mode = mode ? mode : GIT_PACK_FILE_MODE;
125 git_hash_ctx_init(&idx->hash_ctx);
126 git_hash_ctx_init(&idx->trailer);
127
128 error = git_buf_joinpath(&path, prefix, suff);
129 if (error < 0)
130 goto cleanup;
131
132 fd = git_futils_mktmp(&tmp_path, git_buf_cstr(&path), idx->mode);
133 git_buf_free(&path);
134 if (fd < 0)
135 goto cleanup;
136
137 error = git_packfile_alloc(&idx->pack, git_buf_cstr(&tmp_path));
138 git_buf_free(&tmp_path);
139
140 if (error < 0)
141 goto cleanup;
142
143 idx->pack->mwf.fd = fd;
144 if ((error = git_mwindow_file_register(&idx->pack->mwf)) < 0)
145 goto cleanup;
146
147 *out = idx;
148 return 0;
149
150 cleanup:
151 if (fd != -1)
152 p_close(fd);
153
154 git_buf_free(&path);
155 git_buf_free(&tmp_path);
156 git__free(idx);
157 return -1;
158 }
159
160 /* Try to store the delta so we can try to resolve it later */
161 static int store_delta(git_indexer *idx)
162 {
163 struct delta_info *delta;
164
165 delta = git__calloc(1, sizeof(struct delta_info));
166 GITERR_CHECK_ALLOC(delta);
167 delta->delta_off = idx->entry_start;
168
169 if (git_vector_insert(&idx->deltas, delta) < 0)
170 return -1;
171
172 return 0;
173 }
174
175 static void hash_header(git_hash_ctx *ctx, git_off_t len, git_otype type)
176 {
177 char buffer[64];
178 size_t hdrlen;
179
180 hdrlen = git_odb__format_object_header(buffer, sizeof(buffer), (size_t)len, type);
181 git_hash_update(ctx, buffer, hdrlen);
182 }
183
184 static int hash_object_stream(git_indexer*idx, git_packfile_stream *stream)
185 {
186 ssize_t read;
187
188 assert(idx && stream);
189
190 do {
191 if ((read = git_packfile_stream_read(stream, idx->objbuf, sizeof(idx->objbuf))) < 0)
192 break;
193
194 git_hash_update(&idx->hash_ctx, idx->objbuf, read);
195 } while (read > 0);
196
197 if (read < 0)
198 return (int)read;
199
200 return 0;
201 }
202
203 /* In order to create the packfile stream, we need to skip over the delta base description */
204 static int advance_delta_offset(git_indexer *idx, git_otype type)
205 {
206 git_mwindow *w = NULL;
207
208 assert(type == GIT_OBJ_REF_DELTA || type == GIT_OBJ_OFS_DELTA);
209
210 if (type == GIT_OBJ_REF_DELTA) {
211 idx->off += GIT_OID_RAWSZ;
212 } else {
213 git_off_t base_off = get_delta_base(idx->pack, &w, &idx->off, type, idx->entry_start);
214 git_mwindow_close(&w);
215 if (base_off < 0)
216 return (int)base_off;
217 }
218
219 return 0;
220 }
221
222 /* Read from the stream and discard any output */
223 static int read_object_stream(git_indexer *idx, git_packfile_stream *stream)
224 {
225 ssize_t read;
226
227 assert(stream);
228
229 do {
230 read = git_packfile_stream_read(stream, idx->objbuf, sizeof(idx->objbuf));
231 } while (read > 0);
232
233 if (read < 0)
234 return (int)read;
235
236 return 0;
237 }
238
239 static int crc_object(uint32_t *crc_out, git_mwindow_file *mwf, git_off_t start, git_off_t size)
240 {
241 void *ptr;
242 uint32_t crc;
243 unsigned int left, len;
244 git_mwindow *w = NULL;
245
246 crc = crc32(0L, Z_NULL, 0);
247 while (size) {
248 ptr = git_mwindow_open(mwf, &w, start, (size_t)size, &left);
249 if (ptr == NULL)
250 return -1;
251
252 len = min(left, (unsigned int)size);
253 crc = crc32(crc, ptr, len);
254 size -= len;
255 start += len;
256 git_mwindow_close(&w);
257 }
258
259 *crc_out = htonl(crc);
260 return 0;
261 }
262
263 static int store_object(git_indexer *idx)
264 {
265 int i, error;
266 khiter_t k;
267 git_oid oid;
268 struct entry *entry;
269 git_off_t entry_size;
270 struct git_pack_entry *pentry;
271 git_off_t entry_start = idx->entry_start;
272
273 entry = git__calloc(1, sizeof(*entry));
274 GITERR_CHECK_ALLOC(entry);
275
276 pentry = git__calloc(1, sizeof(struct git_pack_entry));
277 GITERR_CHECK_ALLOC(pentry);
278
279 git_hash_final(&oid, &idx->hash_ctx);
280 entry_size = idx->off - entry_start;
281 if (entry_start > UINT31_MAX) {
282 entry->offset = UINT32_MAX;
283 entry->offset_long = entry_start;
284 } else {
285 entry->offset = (uint32_t)entry_start;
286 }
287
288 git_oid_cpy(&pentry->sha1, &oid);
289 pentry->offset = entry_start;
290
291 k = kh_put(oid, idx->pack->idx_cache, &pentry->sha1, &error);
292 if (error == -1) {
293 git__free(pentry);
294 giterr_set_oom();
295 goto on_error;
296 }
297
298 if (error == 0) {
299 giterr_set(GITERR_INDEXER, "duplicate object %s found in pack", git_oid_tostr_s(&pentry->sha1));
300 git__free(pentry);
301 goto on_error;
302 }
303
304
305 kh_value(idx->pack->idx_cache, k) = pentry;
306
307 git_oid_cpy(&entry->oid, &oid);
308
309 if (crc_object(&entry->crc, &idx->pack->mwf, entry_start, entry_size) < 0)
310 goto on_error;
311
312 /* Add the object to the list */
313 if (git_vector_insert(&idx->objects, entry) < 0)
314 goto on_error;
315
316 for (i = oid.id[0]; i < 256; ++i) {
317 idx->fanout[i]++;
318 }
319
320 return 0;
321
322 on_error:
323 git__free(entry);
324
325 return -1;
326 }
327
328 GIT_INLINE(bool) has_entry(git_indexer *idx, git_oid *id)
329 {
330 khiter_t k;
331 k = kh_get(oid, idx->pack->idx_cache, id);
332 return (k != kh_end(idx->pack->idx_cache));
333 }
334
335 static int save_entry(git_indexer *idx, struct entry *entry, struct git_pack_entry *pentry, git_off_t entry_start)
336 {
337 int i, error;
338 khiter_t k;
339
340 if (entry_start > UINT31_MAX) {
341 entry->offset = UINT32_MAX;
342 entry->offset_long = entry_start;
343 } else {
344 entry->offset = (uint32_t)entry_start;
345 }
346
347 pentry->offset = entry_start;
348 k = kh_put(oid, idx->pack->idx_cache, &pentry->sha1, &error);
349
350 if (error <= 0) {
351 giterr_set(GITERR_INDEXER, "cannot insert object into pack");
352 return -1;
353 }
354
355 kh_value(idx->pack->idx_cache, k) = pentry;
356
357 /* Add the object to the list */
358 if (git_vector_insert(&idx->objects, entry) < 0)
359 return -1;
360
361 for (i = entry->oid.id[0]; i < 256; ++i) {
362 idx->fanout[i]++;
363 }
364
365 return 0;
366 }
367
368 static int hash_and_save(git_indexer *idx, git_rawobj *obj, git_off_t entry_start)
369 {
370 git_oid oid;
371 size_t entry_size;
372 struct entry *entry;
373 struct git_pack_entry *pentry = NULL;
374
375 entry = git__calloc(1, sizeof(*entry));
376 GITERR_CHECK_ALLOC(entry);
377
378 if (git_odb__hashobj(&oid, obj) < 0) {
379 giterr_set(GITERR_INDEXER, "Failed to hash object");
380 goto on_error;
381 }
382
383 pentry = git__calloc(1, sizeof(struct git_pack_entry));
384 GITERR_CHECK_ALLOC(pentry);
385
386 git_oid_cpy(&pentry->sha1, &oid);
387 git_oid_cpy(&entry->oid, &oid);
388 entry->crc = crc32(0L, Z_NULL, 0);
389
390 entry_size = (size_t)(idx->off - entry_start);
391 if (crc_object(&entry->crc, &idx->pack->mwf, entry_start, entry_size) < 0)
392 goto on_error;
393
394 return save_entry(idx, entry, pentry, entry_start);
395
396 on_error:
397 git__free(pentry);
398 git__free(entry);
399 git__free(obj->data);
400 return -1;
401 }
402
403 static int do_progress_callback(git_indexer *idx, git_transfer_progress *stats)
404 {
405 if (idx->progress_cb)
406 return giterr_set_after_callback_function(
407 idx->progress_cb(stats, idx->progress_payload),
408 "indexer progress");
409 return 0;
410 }
411
412 /* Hash everything but the last 20B of input */
413 static void hash_partially(git_indexer *idx, const uint8_t *data, size_t size)
414 {
415 size_t to_expell, to_keep;
416
417 if (size == 0)
418 return;
419
420 /* Easy case, dump the buffer and the data minus the last 20 bytes */
421 if (size >= GIT_OID_RAWSZ) {
422 git_hash_update(&idx->trailer, idx->inbuf, idx->inbuf_len);
423 git_hash_update(&idx->trailer, data, size - GIT_OID_RAWSZ);
424
425 data += size - GIT_OID_RAWSZ;
426 memcpy(idx->inbuf, data, GIT_OID_RAWSZ);
427 idx->inbuf_len = GIT_OID_RAWSZ;
428 return;
429 }
430
431 /* We can just append */
432 if (idx->inbuf_len + size <= GIT_OID_RAWSZ) {
433 memcpy(idx->inbuf + idx->inbuf_len, data, size);
434 idx->inbuf_len += size;
435 return;
436 }
437
438 /* We need to partially drain the buffer and then append */
439 to_keep = GIT_OID_RAWSZ - size;
440 to_expell = idx->inbuf_len - to_keep;
441
442 git_hash_update(&idx->trailer, idx->inbuf, to_expell);
443
444 memmove(idx->inbuf, idx->inbuf + to_expell, to_keep);
445 memcpy(idx->inbuf + to_keep, data, size);
446 idx->inbuf_len += size - to_expell;
447 }
448
449 static int write_at(git_indexer *idx, const void *data, git_off_t offset, size_t size)
450 {
451 git_file fd = idx->pack->mwf.fd;
452 size_t mmap_alignment;
453 size_t page_offset;
454 git_off_t page_start;
455 unsigned char *map_data;
456 git_map map;
457 int error;
458
459 assert(data && size);
460
461 if ((error = git__mmap_alignment(&mmap_alignment)) < 0)
462 return error;
463
464 /* the offset needs to be at the mmap boundary for the platform */
465 page_offset = offset % mmap_alignment;
466 page_start = offset - page_offset;
467
468 if ((error = p_mmap(&map, page_offset + size, GIT_PROT_WRITE, GIT_MAP_SHARED, fd, page_start)) < 0)
469 return error;
470
471 map_data = (unsigned char *)map.data;
472 memcpy(map_data + page_offset, data, size);
473 p_munmap(&map);
474
475 return 0;
476 }
477
478 static int append_to_pack(git_indexer *idx, const void *data, size_t size)
479 {
480 git_off_t new_size;
481 size_t mmap_alignment;
482 size_t page_offset;
483 git_off_t page_start;
484 git_off_t current_size = idx->pack->mwf.size;
485 int fd = idx->pack->mwf.fd;
486 int error;
487
488 if (!size)
489 return 0;
490
491 if ((error = git__mmap_alignment(&mmap_alignment)) < 0)
492 return error;
493
494 /* Write a single byte to force the file system to allocate space now or
495 * report an error, since we can't report errors when writing using mmap.
496 * Round the size up to the nearest page so that we only need to perform file
497 * I/O when we add a page, instead of whenever we write even a single byte. */
498 new_size = current_size + size;
499 page_offset = new_size % mmap_alignment;
500 page_start = new_size - page_offset;
501
502 if (p_lseek(fd, page_start + mmap_alignment - 1, SEEK_SET) < 0 ||
503 p_write(idx->pack->mwf.fd, data, 1) < 0) {
504 giterr_set(GITERR_OS, "cannot extend packfile '%s'", idx->pack->pack_name);
505 return -1;
506 }
507
508 return write_at(idx, data, idx->pack->mwf.size, size);
509 }
510
511 int git_indexer_append(git_indexer *idx, const void *data, size_t size, git_transfer_progress *stats)
512 {
513 int error = -1;
514 size_t processed;
515 struct git_pack_header *hdr = &idx->hdr;
516 git_mwindow_file *mwf = &idx->pack->mwf;
517
518 assert(idx && data && stats);
519
520 processed = stats->indexed_objects;
521
522 if ((error = append_to_pack(idx, data, size)) < 0)
523 return error;
524
525 hash_partially(idx, data, (int)size);
526
527 /* Make sure we set the new size of the pack */
528 idx->pack->mwf.size += size;
529
530 if (!idx->parsed_header) {
531 unsigned int total_objects;
532
533 if ((unsigned)idx->pack->mwf.size < sizeof(struct git_pack_header))
534 return 0;
535
536 if ((error = parse_header(&idx->hdr, idx->pack)) < 0)
537 return error;
538
539 idx->parsed_header = 1;
540 idx->nr_objects = ntohl(hdr->hdr_entries);
541 idx->off = sizeof(struct git_pack_header);
542
543 /* for now, limit to 2^32 objects */
544 assert(idx->nr_objects == (size_t)((unsigned int)idx->nr_objects));
545 if (idx->nr_objects == (size_t)((unsigned int)idx->nr_objects))
546 total_objects = (unsigned int)idx->nr_objects;
547 else
548 total_objects = UINT_MAX;
549
550 idx->pack->idx_cache = git_oidmap_alloc();
551 GITERR_CHECK_ALLOC(idx->pack->idx_cache);
552
553 idx->pack->has_cache = 1;
554 if (git_vector_init(&idx->objects, total_objects, objects_cmp) < 0)
555 return -1;
556
557 if (git_vector_init(&idx->deltas, total_objects / 2, NULL) < 0)
558 return -1;
559
560 stats->received_objects = 0;
561 stats->local_objects = 0;
562 stats->total_deltas = 0;
563 stats->indexed_deltas = 0;
564 processed = stats->indexed_objects = 0;
565 stats->total_objects = total_objects;
566
567 if ((error = do_progress_callback(idx, stats)) != 0)
568 return error;
569 }
570
571 /* Now that we have data in the pack, let's try to parse it */
572
573 /* As the file grows any windows we try to use will be out of date */
574 git_mwindow_free_all(mwf);
575
576 while (processed < idx->nr_objects) {
577 git_packfile_stream *stream = &idx->stream;
578 git_off_t entry_start = idx->off;
579 size_t entry_size;
580 git_otype type;
581 git_mwindow *w = NULL;
582
583 if (idx->pack->mwf.size <= idx->off + 20)
584 return 0;
585
586 if (!idx->have_stream) {
587 error = git_packfile_unpack_header(&entry_size, &type, mwf, &w, &idx->off);
588 if (error == GIT_EBUFS) {
589 idx->off = entry_start;
590 return 0;
591 }
592 if (error < 0)
593 goto on_error;
594
595 git_mwindow_close(&w);
596 idx->entry_start = entry_start;
597 git_hash_init(&idx->hash_ctx);
598
599 if (type == GIT_OBJ_REF_DELTA || type == GIT_OBJ_OFS_DELTA) {
600 error = advance_delta_offset(idx, type);
601 if (error == GIT_EBUFS) {
602 idx->off = entry_start;
603 return 0;
604 }
605 if (error < 0)
606 goto on_error;
607
608 idx->have_delta = 1;
609 } else {
610 idx->have_delta = 0;
611 hash_header(&idx->hash_ctx, entry_size, type);
612 }
613
614 idx->have_stream = 1;
615
616 error = git_packfile_stream_open(stream, idx->pack, idx->off);
617 if (error < 0)
618 goto on_error;
619 }
620
621 if (idx->have_delta) {
622 error = read_object_stream(idx, stream);
623 } else {
624 error = hash_object_stream(idx, stream);
625 }
626
627 idx->off = stream->curpos;
628 if (error == GIT_EBUFS)
629 return 0;
630
631 /* We want to free the stream reasorces no matter what here */
632 idx->have_stream = 0;
633 git_packfile_stream_free(stream);
634
635 if (error < 0)
636 goto on_error;
637
638 if (idx->have_delta) {
639 error = store_delta(idx);
640 } else {
641 error = store_object(idx);
642 }
643
644 if (error < 0)
645 goto on_error;
646
647 if (!idx->have_delta) {
648 stats->indexed_objects = (unsigned int)++processed;
649 }
650 stats->received_objects++;
651
652 if ((error = do_progress_callback(idx, stats)) != 0)
653 goto on_error;
654 }
655
656 return 0;
657
658 on_error:
659 git_mwindow_free_all(mwf);
660 return error;
661 }
662
663 static int index_path(git_buf *path, git_indexer *idx, const char *suffix)
664 {
665 const char prefix[] = "pack-";
666 size_t slash = (size_t)path->size;
667
668 /* search backwards for '/' */
669 while (slash > 0 && path->ptr[slash - 1] != '/')
670 slash--;
671
672 if (git_buf_grow(path, slash + 1 + strlen(prefix) +
673 GIT_OID_HEXSZ + strlen(suffix) + 1) < 0)
674 return -1;
675
676 git_buf_truncate(path, slash);
677 git_buf_puts(path, prefix);
678 git_oid_fmt(path->ptr + git_buf_len(path), &idx->hash);
679 path->size += GIT_OID_HEXSZ;
680 git_buf_puts(path, suffix);
681
682 return git_buf_oom(path) ? -1 : 0;
683 }
684
685 /**
686 * Rewind the packfile by the trailer, as we might need to fix the
687 * packfile by injecting objects at the tail and must overwrite it.
688 */
689 static void seek_back_trailer(git_indexer *idx)
690 {
691 idx->pack->mwf.size -= GIT_OID_RAWSZ;
692 git_mwindow_free_all(&idx->pack->mwf);
693 }
694
695 static int inject_object(git_indexer *idx, git_oid *id)
696 {
697 git_odb_object *obj;
698 struct entry *entry;
699 struct git_pack_entry *pentry = NULL;
700 git_oid foo = {{0}};
701 unsigned char hdr[64];
702 git_buf buf = GIT_BUF_INIT;
703 git_off_t entry_start;
704 const void *data;
705 size_t len, hdr_len;
706 int error;
707
708 seek_back_trailer(idx);
709 entry_start = idx->pack->mwf.size;
710
711 if (git_odb_read(&obj, idx->odb, id) < 0) {
712 giterr_set(GITERR_INDEXER, "missing delta bases");
713 return -1;
714 }
715
716 data = git_odb_object_data(obj);
717 len = git_odb_object_size(obj);
718
719 entry = git__calloc(1, sizeof(*entry));
720 GITERR_CHECK_ALLOC(entry);
721
722 entry->crc = crc32(0L, Z_NULL, 0);
723
724 /* Write out the object header */
725 hdr_len = git_packfile__object_header(hdr, len, git_odb_object_type(obj));
726 if ((error = append_to_pack(idx, hdr, hdr_len)) < 0)
727 goto cleanup;
728
729 idx->pack->mwf.size += hdr_len;
730 entry->crc = crc32(entry->crc, hdr, (uInt)hdr_len);
731
732 if ((error = git_zstream_deflatebuf(&buf, data, len)) < 0)
733 goto cleanup;
734
735 /* And then the compressed object */
736 if ((error = append_to_pack(idx, buf.ptr, buf.size)) < 0)
737 goto cleanup;
738
739 idx->pack->mwf.size += buf.size;
740 entry->crc = htonl(crc32(entry->crc, (unsigned char *)buf.ptr, (uInt)buf.size));
741 git_buf_free(&buf);
742
743 /* Write a fake trailer so the pack functions play ball */
744
745 if ((error = append_to_pack(idx, &foo, GIT_OID_RAWSZ)) < 0)
746 goto cleanup;
747
748 idx->pack->mwf.size += GIT_OID_RAWSZ;
749
750 pentry = git__calloc(1, sizeof(struct git_pack_entry));
751 GITERR_CHECK_ALLOC(pentry);
752
753 git_oid_cpy(&pentry->sha1, id);
754 git_oid_cpy(&entry->oid, id);
755 idx->off = entry_start + hdr_len + len;
756
757 error = save_entry(idx, entry, pentry, entry_start);
758
759 cleanup:
760 if (error) {
761 git__free(entry);
762 git__free(pentry);
763 }
764
765 git_odb_object_free(obj);
766 return error;
767 }
768
769 static int fix_thin_pack(git_indexer *idx, git_transfer_progress *stats)
770 {
771 int error, found_ref_delta = 0;
772 unsigned int i;
773 struct delta_info *delta;
774 size_t size;
775 git_otype type;
776 git_mwindow *w = NULL;
777 git_off_t curpos = 0;
778 unsigned char *base_info;
779 unsigned int left = 0;
780 git_oid base;
781
782 assert(git_vector_length(&idx->deltas) > 0);
783
784 if (idx->odb == NULL) {
785 giterr_set(GITERR_INDEXER, "cannot fix a thin pack without an ODB");
786 return -1;
787 }
788
789 /* Loop until we find the first REF delta */
790 git_vector_foreach(&idx->deltas, i, delta) {
791 if (!delta)
792 continue;
793
794 curpos = delta->delta_off;
795 error = git_packfile_unpack_header(&size, &type, &idx->pack->mwf, &w, &curpos);
796 if (error < 0)
797 return error;
798
799 if (type == GIT_OBJ_REF_DELTA) {
800 found_ref_delta = 1;
801 break;
802 }
803 }
804
805 if (!found_ref_delta) {
806 giterr_set(GITERR_INDEXER, "no REF_DELTA found, cannot inject object");
807 return -1;
808 }
809
810 /* curpos now points to the base information, which is an OID */
811 base_info = git_mwindow_open(&idx->pack->mwf, &w, curpos, GIT_OID_RAWSZ, &left);
812 if (base_info == NULL) {
813 giterr_set(GITERR_INDEXER, "failed to map delta information");
814 return -1;
815 }
816
817 git_oid_fromraw(&base, base_info);
818 git_mwindow_close(&w);
819
820 if (has_entry(idx, &base))
821 return 0;
822
823 if (inject_object(idx, &base) < 0)
824 return -1;
825
826 stats->local_objects++;
827
828 return 0;
829 }
830
831 static int resolve_deltas(git_indexer *idx, git_transfer_progress *stats)
832 {
833 unsigned int i;
834 struct delta_info *delta;
835 int progressed = 0, non_null = 0, progress_cb_result;
836
837 while (idx->deltas.length > 0) {
838 progressed = 0;
839 non_null = 0;
840 git_vector_foreach(&idx->deltas, i, delta) {
841 git_rawobj obj = {NULL};
842
843 if (!delta)
844 continue;
845
846 non_null = 1;
847 idx->off = delta->delta_off;
848 if (git_packfile_unpack(&obj, idx->pack, &idx->off) < 0)
849 continue;
850
851 if (hash_and_save(idx, &obj, delta->delta_off) < 0)
852 continue;
853
854 git__free(obj.data);
855 stats->indexed_objects++;
856 stats->indexed_deltas++;
857 progressed = 1;
858 if ((progress_cb_result = do_progress_callback(idx, stats)) < 0)
859 return progress_cb_result;
860
861 /* remove from the list */
862 git_vector_set(NULL, &idx->deltas, i, NULL);
863 git__free(delta);
864 }
865
866 /* if none were actually set, we're done */
867 if (!non_null)
868 break;
869
870 if (!progressed && (fix_thin_pack(idx, stats) < 0)) {
871 return -1;
872 }
873 }
874
875 return 0;
876 }
877
878 static int update_header_and_rehash(git_indexer *idx, git_transfer_progress *stats)
879 {
880 void *ptr;
881 size_t chunk = 1024*1024;
882 git_off_t hashed = 0;
883 git_mwindow *w = NULL;
884 git_mwindow_file *mwf;
885 unsigned int left;
886
887 mwf = &idx->pack->mwf;
888
889 git_hash_init(&idx->trailer);
890
891
892 /* Update the header to include the numer of local objects we injected */
893 idx->hdr.hdr_entries = htonl(stats->total_objects + stats->local_objects);
894 if (write_at(idx, &idx->hdr, 0, sizeof(struct git_pack_header)) < 0)
895 return -1;
896
897 /*
898 * We now use the same technique as before to determine the
899 * hash. We keep reading up to the end and let
900 * hash_partially() keep the existing trailer out of the
901 * calculation.
902 */
903 git_mwindow_free_all(mwf);
904 idx->inbuf_len = 0;
905 while (hashed < mwf->size) {
906 ptr = git_mwindow_open(mwf, &w, hashed, chunk, &left);
907 if (ptr == NULL)
908 return -1;
909
910 hash_partially(idx, ptr, left);
911 hashed += left;
912
913 git_mwindow_close(&w);
914 }
915
916 return 0;
917 }
918
919 int git_indexer_commit(git_indexer *idx, git_transfer_progress *stats)
920 {
921 git_mwindow *w = NULL;
922 unsigned int i, long_offsets = 0, left;
923 int error;
924 struct git_pack_idx_header hdr;
925 git_buf filename = GIT_BUF_INIT;
926 struct entry *entry;
927 git_oid trailer_hash, file_hash;
928 git_hash_ctx ctx;
929 git_filebuf index_file = {0};
930 void *packfile_trailer;
931
932 if (!idx->parsed_header) {
933 giterr_set(GITERR_INDEXER, "incomplete pack header");
934 return -1;
935 }
936
937 if (git_hash_ctx_init(&ctx) < 0)
938 return -1;
939
940 /* Test for this before resolve_deltas(), as it plays with idx->off */
941 if (idx->off + 20 < idx->pack->mwf.size) {
942 giterr_set(GITERR_INDEXER, "unexpected data at the end of the pack");
943 return -1;
944 }
945
946 packfile_trailer = git_mwindow_open(&idx->pack->mwf, &w, idx->pack->mwf.size - GIT_OID_RAWSZ, GIT_OID_RAWSZ, &left);
947 if (packfile_trailer == NULL) {
948 git_mwindow_close(&w);
949 goto on_error;
950 }
951
952 /* Compare the packfile trailer as it was sent to us and what we calculated */
953 git_oid_fromraw(&file_hash, packfile_trailer);
954 git_mwindow_close(&w);
955
956 git_hash_final(&trailer_hash, &idx->trailer);
957 if (git_oid_cmp(&file_hash, &trailer_hash)) {
958 giterr_set(GITERR_INDEXER, "packfile trailer mismatch");
959 return -1;
960 }
961
962 /* Freeze the number of deltas */
963 stats->total_deltas = stats->total_objects - stats->indexed_objects;
964
965 if ((error = resolve_deltas(idx, stats)) < 0)
966 return error;
967
968 if (stats->indexed_objects != stats->total_objects) {
969 giterr_set(GITERR_INDEXER, "early EOF");
970 return -1;
971 }
972
973 if (stats->local_objects > 0) {
974 if (update_header_and_rehash(idx, stats) < 0)
975 return -1;
976
977 git_hash_final(&trailer_hash, &idx->trailer);
978 write_at(idx, &trailer_hash, idx->pack->mwf.size - GIT_OID_RAWSZ, GIT_OID_RAWSZ);
979 }
980
981 git_vector_sort(&idx->objects);
982
983 git_buf_sets(&filename, idx->pack->pack_name);
984 git_buf_shorten(&filename, strlen("pack"));
985 git_buf_puts(&filename, "idx");
986 if (git_buf_oom(&filename))
987 return -1;
988
989 if (git_filebuf_open(&index_file, filename.ptr,
990 GIT_FILEBUF_HASH_CONTENTS, idx->mode) < 0)
991 goto on_error;
992
993 /* Write out the header */
994 hdr.idx_signature = htonl(PACK_IDX_SIGNATURE);
995 hdr.idx_version = htonl(2);
996 git_filebuf_write(&index_file, &hdr, sizeof(hdr));
997
998 /* Write out the fanout table */
999 for (i = 0; i < 256; ++i) {
1000 uint32_t n = htonl(idx->fanout[i]);
1001 git_filebuf_write(&index_file, &n, sizeof(n));
1002 }
1003
1004 /* Write out the object names (SHA-1 hashes) */
1005 git_vector_foreach(&idx->objects, i, entry) {
1006 git_filebuf_write(&index_file, &entry->oid, sizeof(git_oid));
1007 git_hash_update(&ctx, &entry->oid, GIT_OID_RAWSZ);
1008 }
1009 git_hash_final(&idx->hash, &ctx);
1010
1011 /* Write out the CRC32 values */
1012 git_vector_foreach(&idx->objects, i, entry) {
1013 git_filebuf_write(&index_file, &entry->crc, sizeof(uint32_t));
1014 }
1015
1016 /* Write out the offsets */
1017 git_vector_foreach(&idx->objects, i, entry) {
1018 uint32_t n;
1019
1020 if (entry->offset == UINT32_MAX)
1021 n = htonl(0x80000000 | long_offsets++);
1022 else
1023 n = htonl(entry->offset);
1024
1025 git_filebuf_write(&index_file, &n, sizeof(uint32_t));
1026 }
1027
1028 /* Write out the long offsets */
1029 git_vector_foreach(&idx->objects, i, entry) {
1030 uint32_t split[2];
1031
1032 if (entry->offset != UINT32_MAX)
1033 continue;
1034
1035 split[0] = htonl(entry->offset_long >> 32);
1036 split[1] = htonl(entry->offset_long & 0xffffffff);
1037
1038 git_filebuf_write(&index_file, &split, sizeof(uint32_t) * 2);
1039 }
1040
1041 /* Write out the packfile trailer to the index */
1042 if (git_filebuf_write(&index_file, &trailer_hash, GIT_OID_RAWSZ) < 0)
1043 goto on_error;
1044
1045 /* Write out the hash of the idx */
1046 if (git_filebuf_hash(&trailer_hash, &index_file) < 0)
1047 goto on_error;
1048
1049 git_filebuf_write(&index_file, &trailer_hash, sizeof(git_oid));
1050
1051 /* Figure out what the final name should be */
1052 if (index_path(&filename, idx, ".idx") < 0)
1053 goto on_error;
1054
1055 /* Commit file */
1056 if (git_filebuf_commit_at(&index_file, filename.ptr) < 0)
1057 goto on_error;
1058
1059 git_mwindow_free_all(&idx->pack->mwf);
1060
1061 /* Truncate file to undo rounding up to next page_size in append_to_pack */
1062 if (p_ftruncate(idx->pack->mwf.fd, idx->pack->mwf.size) < 0) {
1063 giterr_set(GITERR_OS, "failed to truncate pack file '%s'", idx->pack->pack_name);
1064 return -1;
1065 }
1066
1067 /* We need to close the descriptor here so Windows doesn't choke on commit_at */
1068 if (p_close(idx->pack->mwf.fd) < 0) {
1069 giterr_set(GITERR_OS, "failed to close packfile");
1070 goto on_error;
1071 }
1072
1073 idx->pack->mwf.fd = -1;
1074
1075 if (index_path(&filename, idx, ".pack") < 0)
1076 goto on_error;
1077
1078 /* And don't forget to rename the packfile to its new place. */
1079 p_rename(idx->pack->pack_name, git_buf_cstr(&filename));
1080
1081 git_buf_free(&filename);
1082 git_hash_ctx_cleanup(&ctx);
1083 return 0;
1084
1085 on_error:
1086 git_mwindow_free_all(&idx->pack->mwf);
1087 git_filebuf_cleanup(&index_file);
1088 git_buf_free(&filename);
1089 git_hash_ctx_cleanup(&ctx);
1090 return -1;
1091 }
1092
1093 void git_indexer_free(git_indexer *idx)
1094 {
1095 if (idx == NULL)
1096 return;
1097
1098 git_vector_free_deep(&idx->objects);
1099
1100 if (idx->pack && idx->pack->idx_cache) {
1101 struct git_pack_entry *pentry;
1102 kh_foreach_value(
1103 idx->pack->idx_cache, pentry, { git__free(pentry); });
1104
1105 git_oidmap_free(idx->pack->idx_cache);
1106 }
1107
1108 git_vector_free_deep(&idx->deltas);
1109
1110 if (!git_mutex_lock(&git__mwindow_mutex)) {
1111 git_packfile_free(idx->pack);
1112 git_mutex_unlock(&git__mwindow_mutex);
1113 }
1114
1115 git_hash_ctx_cleanup(&idx->trailer);
1116 git_hash_ctx_cleanup(&idx->hash_ctx);
1117 git__free(idx);
1118 }