]> git.proxmox.com Git - libgit2.git/blob - src/indexer.c
settings: rename `GIT_OPT_ENABLE_SYNCHRONOUS_OBJECT_CREATION`
[libgit2.git] / src / indexer.c
1 /*
2 * Copyright (C) the libgit2 contributors. All rights reserved.
3 *
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
6 */
7
8 #include "git2/indexer.h"
9 #include "git2/object.h"
10
11 #include "common.h"
12 #include "pack.h"
13 #include "mwindow.h"
14 #include "posix.h"
15 #include "pack.h"
16 #include "filebuf.h"
17 #include "oid.h"
18 #include "oidmap.h"
19 #include "zstream.h"
20 #include "object.h"
21
22 extern git_mutex git__mwindow_mutex;
23
24 #define UINT31_MAX (0x7FFFFFFF)
25
26 struct entry {
27 git_oid oid;
28 uint32_t crc;
29 uint32_t offset;
30 uint64_t offset_long;
31 };
32
33 struct git_indexer {
34 unsigned int parsed_header :1,
35 pack_committed :1,
36 have_stream :1,
37 have_delta :1,
38 do_fsync :1;
39 struct git_pack_header hdr;
40 struct git_pack_file *pack;
41 unsigned int mode;
42 git_off_t off;
43 git_off_t entry_start;
44 git_packfile_stream stream;
45 size_t nr_objects;
46 git_vector objects;
47 git_vector deltas;
48 unsigned int fanout[256];
49 git_hash_ctx hash_ctx;
50 git_oid hash;
51 git_transfer_progress_cb progress_cb;
52 void *progress_payload;
53 char objbuf[8*1024];
54
55 /* Needed to look up objects which we want to inject to fix a thin pack */
56 git_odb *odb;
57
58 /* Fields for calculating the packfile trailer (hash of everything before it) */
59 char inbuf[GIT_OID_RAWSZ];
60 size_t inbuf_len;
61 git_hash_ctx trailer;
62 };
63
64 struct delta_info {
65 git_off_t delta_off;
66 };
67
68 const git_oid *git_indexer_hash(const git_indexer *idx)
69 {
70 return &idx->hash;
71 }
72
73 static int parse_header(struct git_pack_header *hdr, struct git_pack_file *pack)
74 {
75 int error;
76 git_map map;
77
78 if ((error = p_mmap(&map, sizeof(*hdr), GIT_PROT_READ, GIT_MAP_SHARED, pack->mwf.fd, 0)) < 0)
79 return error;
80
81 memcpy(hdr, map.data, sizeof(*hdr));
82 p_munmap(&map);
83
84 /* Verify we recognize this pack file format. */
85 if (hdr->hdr_signature != ntohl(PACK_SIGNATURE)) {
86 giterr_set(GITERR_INDEXER, "wrong pack signature");
87 return -1;
88 }
89
90 if (!pack_version_ok(hdr->hdr_version)) {
91 giterr_set(GITERR_INDEXER, "wrong pack version");
92 return -1;
93 }
94
95 return 0;
96 }
97
98 static int objects_cmp(const void *a, const void *b)
99 {
100 const struct entry *entrya = a;
101 const struct entry *entryb = b;
102
103 return git_oid__cmp(&entrya->oid, &entryb->oid);
104 }
105
106 int git_indexer_new(
107 git_indexer **out,
108 const char *prefix,
109 unsigned int mode,
110 git_odb *odb,
111 git_transfer_progress_cb progress_cb,
112 void *progress_payload)
113 {
114 git_indexer *idx;
115 git_buf path = GIT_BUF_INIT, tmp_path = GIT_BUF_INIT;
116 static const char suff[] = "/pack";
117 int error, fd = -1;
118
119 idx = git__calloc(1, sizeof(git_indexer));
120 GITERR_CHECK_ALLOC(idx);
121 idx->odb = odb;
122 idx->progress_cb = progress_cb;
123 idx->progress_payload = progress_payload;
124 idx->mode = mode ? mode : GIT_PACK_FILE_MODE;
125 git_hash_ctx_init(&idx->hash_ctx);
126 git_hash_ctx_init(&idx->trailer);
127
128 if (git_repository__fsync_gitdir)
129 idx->do_fsync = 1;
130
131 error = git_buf_joinpath(&path, prefix, suff);
132 if (error < 0)
133 goto cleanup;
134
135 fd = git_futils_mktmp(&tmp_path, git_buf_cstr(&path), idx->mode);
136 git_buf_free(&path);
137 if (fd < 0)
138 goto cleanup;
139
140 error = git_packfile_alloc(&idx->pack, git_buf_cstr(&tmp_path));
141 git_buf_free(&tmp_path);
142
143 if (error < 0)
144 goto cleanup;
145
146 idx->pack->mwf.fd = fd;
147 if ((error = git_mwindow_file_register(&idx->pack->mwf)) < 0)
148 goto cleanup;
149
150 *out = idx;
151 return 0;
152
153 cleanup:
154 if (fd != -1)
155 p_close(fd);
156
157 if (git_buf_len(&tmp_path) > 0)
158 p_unlink(git_buf_cstr(&tmp_path));
159
160 if (idx->pack != NULL)
161 p_unlink(idx->pack->pack_name);
162
163 git_buf_free(&path);
164 git_buf_free(&tmp_path);
165 git__free(idx);
166 return -1;
167 }
168
169 void git_indexer__set_fsync(git_indexer *idx, int do_fsync)
170 {
171 idx->do_fsync = !!do_fsync;
172 }
173
174 /* Try to store the delta so we can try to resolve it later */
175 static int store_delta(git_indexer *idx)
176 {
177 struct delta_info *delta;
178
179 delta = git__calloc(1, sizeof(struct delta_info));
180 GITERR_CHECK_ALLOC(delta);
181 delta->delta_off = idx->entry_start;
182
183 if (git_vector_insert(&idx->deltas, delta) < 0)
184 return -1;
185
186 return 0;
187 }
188
189 static void hash_header(git_hash_ctx *ctx, git_off_t len, git_otype type)
190 {
191 char buffer[64];
192 size_t hdrlen;
193
194 hdrlen = git_odb__format_object_header(buffer, sizeof(buffer), (size_t)len, type);
195 git_hash_update(ctx, buffer, hdrlen);
196 }
197
198 static int hash_object_stream(git_indexer*idx, git_packfile_stream *stream)
199 {
200 ssize_t read;
201
202 assert(idx && stream);
203
204 do {
205 if ((read = git_packfile_stream_read(stream, idx->objbuf, sizeof(idx->objbuf))) < 0)
206 break;
207
208 git_hash_update(&idx->hash_ctx, idx->objbuf, read);
209 } while (read > 0);
210
211 if (read < 0)
212 return (int)read;
213
214 return 0;
215 }
216
217 /* In order to create the packfile stream, we need to skip over the delta base description */
218 static int advance_delta_offset(git_indexer *idx, git_otype type)
219 {
220 git_mwindow *w = NULL;
221
222 assert(type == GIT_OBJ_REF_DELTA || type == GIT_OBJ_OFS_DELTA);
223
224 if (type == GIT_OBJ_REF_DELTA) {
225 idx->off += GIT_OID_RAWSZ;
226 } else {
227 git_off_t base_off = get_delta_base(idx->pack, &w, &idx->off, type, idx->entry_start);
228 git_mwindow_close(&w);
229 if (base_off < 0)
230 return (int)base_off;
231 }
232
233 return 0;
234 }
235
236 /* Read from the stream and discard any output */
237 static int read_object_stream(git_indexer *idx, git_packfile_stream *stream)
238 {
239 ssize_t read;
240
241 assert(stream);
242
243 do {
244 read = git_packfile_stream_read(stream, idx->objbuf, sizeof(idx->objbuf));
245 } while (read > 0);
246
247 if (read < 0)
248 return (int)read;
249
250 return 0;
251 }
252
253 static int crc_object(uint32_t *crc_out, git_mwindow_file *mwf, git_off_t start, git_off_t size)
254 {
255 void *ptr;
256 uint32_t crc;
257 unsigned int left, len;
258 git_mwindow *w = NULL;
259
260 crc = crc32(0L, Z_NULL, 0);
261 while (size) {
262 ptr = git_mwindow_open(mwf, &w, start, (size_t)size, &left);
263 if (ptr == NULL)
264 return -1;
265
266 len = min(left, (unsigned int)size);
267 crc = crc32(crc, ptr, len);
268 size -= len;
269 start += len;
270 git_mwindow_close(&w);
271 }
272
273 *crc_out = htonl(crc);
274 return 0;
275 }
276
277 static int store_object(git_indexer *idx)
278 {
279 int i, error;
280 khiter_t k;
281 git_oid oid;
282 struct entry *entry;
283 git_off_t entry_size;
284 struct git_pack_entry *pentry;
285 git_off_t entry_start = idx->entry_start;
286
287 entry = git__calloc(1, sizeof(*entry));
288 GITERR_CHECK_ALLOC(entry);
289
290 pentry = git__calloc(1, sizeof(struct git_pack_entry));
291 GITERR_CHECK_ALLOC(pentry);
292
293 git_hash_final(&oid, &idx->hash_ctx);
294 entry_size = idx->off - entry_start;
295 if (entry_start > UINT31_MAX) {
296 entry->offset = UINT32_MAX;
297 entry->offset_long = entry_start;
298 } else {
299 entry->offset = (uint32_t)entry_start;
300 }
301
302 git_oid_cpy(&pentry->sha1, &oid);
303 pentry->offset = entry_start;
304
305 k = git_oidmap_put(idx->pack->idx_cache, &pentry->sha1, &error);
306 if (error == -1) {
307 git__free(pentry);
308 giterr_set_oom();
309 goto on_error;
310 }
311
312 if (error == 0) {
313 giterr_set(GITERR_INDEXER, "duplicate object %s found in pack", git_oid_tostr_s(&pentry->sha1));
314 git__free(pentry);
315 goto on_error;
316 }
317
318
319 git_oidmap_set_value_at(idx->pack->idx_cache, k, pentry);
320
321 git_oid_cpy(&entry->oid, &oid);
322
323 if (crc_object(&entry->crc, &idx->pack->mwf, entry_start, entry_size) < 0)
324 goto on_error;
325
326 /* Add the object to the list */
327 if (git_vector_insert(&idx->objects, entry) < 0)
328 goto on_error;
329
330 for (i = oid.id[0]; i < 256; ++i) {
331 idx->fanout[i]++;
332 }
333
334 return 0;
335
336 on_error:
337 git__free(entry);
338
339 return -1;
340 }
341
342 GIT_INLINE(bool) has_entry(git_indexer *idx, git_oid *id)
343 {
344 return git_oidmap_exists(idx->pack->idx_cache, id);
345 }
346
347 static int save_entry(git_indexer *idx, struct entry *entry, struct git_pack_entry *pentry, git_off_t entry_start)
348 {
349 int i, error;
350 khiter_t k;
351
352 if (entry_start > UINT31_MAX) {
353 entry->offset = UINT32_MAX;
354 entry->offset_long = entry_start;
355 } else {
356 entry->offset = (uint32_t)entry_start;
357 }
358
359 pentry->offset = entry_start;
360 k = git_oidmap_put(idx->pack->idx_cache, &pentry->sha1, &error);
361
362 if (error <= 0) {
363 giterr_set(GITERR_INDEXER, "cannot insert object into pack");
364 return -1;
365 }
366
367 git_oidmap_set_value_at(idx->pack->idx_cache, k, pentry);
368
369 /* Add the object to the list */
370 if (git_vector_insert(&idx->objects, entry) < 0)
371 return -1;
372
373 for (i = entry->oid.id[0]; i < 256; ++i) {
374 idx->fanout[i]++;
375 }
376
377 return 0;
378 }
379
380 static int hash_and_save(git_indexer *idx, git_rawobj *obj, git_off_t entry_start)
381 {
382 git_oid oid;
383 size_t entry_size;
384 struct entry *entry;
385 struct git_pack_entry *pentry = NULL;
386
387 entry = git__calloc(1, sizeof(*entry));
388 GITERR_CHECK_ALLOC(entry);
389
390 if (git_odb__hashobj(&oid, obj) < 0) {
391 giterr_set(GITERR_INDEXER, "failed to hash object");
392 goto on_error;
393 }
394
395 pentry = git__calloc(1, sizeof(struct git_pack_entry));
396 GITERR_CHECK_ALLOC(pentry);
397
398 git_oid_cpy(&pentry->sha1, &oid);
399 git_oid_cpy(&entry->oid, &oid);
400 entry->crc = crc32(0L, Z_NULL, 0);
401
402 entry_size = (size_t)(idx->off - entry_start);
403 if (crc_object(&entry->crc, &idx->pack->mwf, entry_start, entry_size) < 0)
404 goto on_error;
405
406 return save_entry(idx, entry, pentry, entry_start);
407
408 on_error:
409 git__free(pentry);
410 git__free(entry);
411 git__free(obj->data);
412 return -1;
413 }
414
415 static int do_progress_callback(git_indexer *idx, git_transfer_progress *stats)
416 {
417 if (idx->progress_cb)
418 return giterr_set_after_callback_function(
419 idx->progress_cb(stats, idx->progress_payload),
420 "indexer progress");
421 return 0;
422 }
423
424 /* Hash everything but the last 20B of input */
425 static void hash_partially(git_indexer *idx, const uint8_t *data, size_t size)
426 {
427 size_t to_expell, to_keep;
428
429 if (size == 0)
430 return;
431
432 /* Easy case, dump the buffer and the data minus the last 20 bytes */
433 if (size >= GIT_OID_RAWSZ) {
434 git_hash_update(&idx->trailer, idx->inbuf, idx->inbuf_len);
435 git_hash_update(&idx->trailer, data, size - GIT_OID_RAWSZ);
436
437 data += size - GIT_OID_RAWSZ;
438 memcpy(idx->inbuf, data, GIT_OID_RAWSZ);
439 idx->inbuf_len = GIT_OID_RAWSZ;
440 return;
441 }
442
443 /* We can just append */
444 if (idx->inbuf_len + size <= GIT_OID_RAWSZ) {
445 memcpy(idx->inbuf + idx->inbuf_len, data, size);
446 idx->inbuf_len += size;
447 return;
448 }
449
450 /* We need to partially drain the buffer and then append */
451 to_keep = GIT_OID_RAWSZ - size;
452 to_expell = idx->inbuf_len - to_keep;
453
454 git_hash_update(&idx->trailer, idx->inbuf, to_expell);
455
456 memmove(idx->inbuf, idx->inbuf + to_expell, to_keep);
457 memcpy(idx->inbuf + to_keep, data, size);
458 idx->inbuf_len += size - to_expell;
459 }
460
461 static int write_at(git_indexer *idx, const void *data, git_off_t offset, size_t size)
462 {
463 git_file fd = idx->pack->mwf.fd;
464 size_t mmap_alignment;
465 size_t page_offset;
466 git_off_t page_start;
467 unsigned char *map_data;
468 git_map map;
469 int error;
470
471 assert(data && size);
472
473 if ((error = git__mmap_alignment(&mmap_alignment)) < 0)
474 return error;
475
476 /* the offset needs to be at the mmap boundary for the platform */
477 page_offset = offset % mmap_alignment;
478 page_start = offset - page_offset;
479
480 if ((error = p_mmap(&map, page_offset + size, GIT_PROT_WRITE, GIT_MAP_SHARED, fd, page_start)) < 0)
481 return error;
482
483 map_data = (unsigned char *)map.data;
484 memcpy(map_data + page_offset, data, size);
485 p_munmap(&map);
486
487 return 0;
488 }
489
490 static int append_to_pack(git_indexer *idx, const void *data, size_t size)
491 {
492 git_off_t new_size;
493 size_t mmap_alignment;
494 size_t page_offset;
495 git_off_t page_start;
496 git_off_t current_size = idx->pack->mwf.size;
497 int fd = idx->pack->mwf.fd;
498 int error;
499
500 if (!size)
501 return 0;
502
503 if ((error = git__mmap_alignment(&mmap_alignment)) < 0)
504 return error;
505
506 /* Write a single byte to force the file system to allocate space now or
507 * report an error, since we can't report errors when writing using mmap.
508 * Round the size up to the nearest page so that we only need to perform file
509 * I/O when we add a page, instead of whenever we write even a single byte. */
510 new_size = current_size + size;
511 page_offset = new_size % mmap_alignment;
512 page_start = new_size - page_offset;
513
514 if (p_lseek(fd, page_start + mmap_alignment - 1, SEEK_SET) < 0 ||
515 p_write(idx->pack->mwf.fd, data, 1) < 0) {
516 giterr_set(GITERR_OS, "cannot extend packfile '%s'", idx->pack->pack_name);
517 return -1;
518 }
519
520 return write_at(idx, data, idx->pack->mwf.size, size);
521 }
522
523 int git_indexer_append(git_indexer *idx, const void *data, size_t size, git_transfer_progress *stats)
524 {
525 int error = -1;
526 size_t processed;
527 struct git_pack_header *hdr = &idx->hdr;
528 git_mwindow_file *mwf = &idx->pack->mwf;
529
530 assert(idx && data && stats);
531
532 processed = stats->indexed_objects;
533
534 if ((error = append_to_pack(idx, data, size)) < 0)
535 return error;
536
537 hash_partially(idx, data, (int)size);
538
539 /* Make sure we set the new size of the pack */
540 idx->pack->mwf.size += size;
541
542 if (!idx->parsed_header) {
543 unsigned int total_objects;
544
545 if ((unsigned)idx->pack->mwf.size < sizeof(struct git_pack_header))
546 return 0;
547
548 if ((error = parse_header(&idx->hdr, idx->pack)) < 0)
549 return error;
550
551 idx->parsed_header = 1;
552 idx->nr_objects = ntohl(hdr->hdr_entries);
553 idx->off = sizeof(struct git_pack_header);
554
555 /* for now, limit to 2^32 objects */
556 assert(idx->nr_objects == (size_t)((unsigned int)idx->nr_objects));
557 if (idx->nr_objects == (size_t)((unsigned int)idx->nr_objects))
558 total_objects = (unsigned int)idx->nr_objects;
559 else
560 total_objects = UINT_MAX;
561
562 idx->pack->idx_cache = git_oidmap_alloc();
563 GITERR_CHECK_ALLOC(idx->pack->idx_cache);
564
565 idx->pack->has_cache = 1;
566 if (git_vector_init(&idx->objects, total_objects, objects_cmp) < 0)
567 return -1;
568
569 if (git_vector_init(&idx->deltas, total_objects / 2, NULL) < 0)
570 return -1;
571
572 stats->received_objects = 0;
573 stats->local_objects = 0;
574 stats->total_deltas = 0;
575 stats->indexed_deltas = 0;
576 processed = stats->indexed_objects = 0;
577 stats->total_objects = total_objects;
578
579 if ((error = do_progress_callback(idx, stats)) != 0)
580 return error;
581 }
582
583 /* Now that we have data in the pack, let's try to parse it */
584
585 /* As the file grows any windows we try to use will be out of date */
586 git_mwindow_free_all(mwf);
587
588 while (processed < idx->nr_objects) {
589 git_packfile_stream *stream = &idx->stream;
590 git_off_t entry_start = idx->off;
591 size_t entry_size;
592 git_otype type;
593 git_mwindow *w = NULL;
594
595 if (idx->pack->mwf.size <= idx->off + 20)
596 return 0;
597
598 if (!idx->have_stream) {
599 error = git_packfile_unpack_header(&entry_size, &type, mwf, &w, &idx->off);
600 if (error == GIT_EBUFS) {
601 idx->off = entry_start;
602 return 0;
603 }
604 if (error < 0)
605 goto on_error;
606
607 git_mwindow_close(&w);
608 idx->entry_start = entry_start;
609 git_hash_init(&idx->hash_ctx);
610
611 if (type == GIT_OBJ_REF_DELTA || type == GIT_OBJ_OFS_DELTA) {
612 error = advance_delta_offset(idx, type);
613 if (error == GIT_EBUFS) {
614 idx->off = entry_start;
615 return 0;
616 }
617 if (error < 0)
618 goto on_error;
619
620 idx->have_delta = 1;
621 } else {
622 idx->have_delta = 0;
623 hash_header(&idx->hash_ctx, entry_size, type);
624 }
625
626 idx->have_stream = 1;
627
628 error = git_packfile_stream_open(stream, idx->pack, idx->off);
629 if (error < 0)
630 goto on_error;
631 }
632
633 if (idx->have_delta) {
634 error = read_object_stream(idx, stream);
635 } else {
636 error = hash_object_stream(idx, stream);
637 }
638
639 idx->off = stream->curpos;
640 if (error == GIT_EBUFS)
641 return 0;
642
643 /* We want to free the stream reasorces no matter what here */
644 idx->have_stream = 0;
645 git_packfile_stream_free(stream);
646
647 if (error < 0)
648 goto on_error;
649
650 if (idx->have_delta) {
651 error = store_delta(idx);
652 } else {
653 error = store_object(idx);
654 }
655
656 if (error < 0)
657 goto on_error;
658
659 if (!idx->have_delta) {
660 stats->indexed_objects = (unsigned int)++processed;
661 }
662 stats->received_objects++;
663
664 if ((error = do_progress_callback(idx, stats)) != 0)
665 goto on_error;
666 }
667
668 return 0;
669
670 on_error:
671 git_mwindow_free_all(mwf);
672 return error;
673 }
674
675 static int index_path(git_buf *path, git_indexer *idx, const char *suffix)
676 {
677 const char prefix[] = "pack-";
678 size_t slash = (size_t)path->size;
679
680 /* search backwards for '/' */
681 while (slash > 0 && path->ptr[slash - 1] != '/')
682 slash--;
683
684 if (git_buf_grow(path, slash + 1 + strlen(prefix) +
685 GIT_OID_HEXSZ + strlen(suffix) + 1) < 0)
686 return -1;
687
688 git_buf_truncate(path, slash);
689 git_buf_puts(path, prefix);
690 git_oid_fmt(path->ptr + git_buf_len(path), &idx->hash);
691 path->size += GIT_OID_HEXSZ;
692 git_buf_puts(path, suffix);
693
694 return git_buf_oom(path) ? -1 : 0;
695 }
696
697 /**
698 * Rewind the packfile by the trailer, as we might need to fix the
699 * packfile by injecting objects at the tail and must overwrite it.
700 */
701 static void seek_back_trailer(git_indexer *idx)
702 {
703 idx->pack->mwf.size -= GIT_OID_RAWSZ;
704 git_mwindow_free_all(&idx->pack->mwf);
705 }
706
707 static int inject_object(git_indexer *idx, git_oid *id)
708 {
709 git_odb_object *obj;
710 struct entry *entry;
711 struct git_pack_entry *pentry = NULL;
712 git_oid foo = {{0}};
713 unsigned char hdr[64];
714 git_buf buf = GIT_BUF_INIT;
715 git_off_t entry_start;
716 const void *data;
717 size_t len, hdr_len;
718 int error;
719
720 seek_back_trailer(idx);
721 entry_start = idx->pack->mwf.size;
722
723 if (git_odb_read(&obj, idx->odb, id) < 0) {
724 giterr_set(GITERR_INDEXER, "missing delta bases");
725 return -1;
726 }
727
728 data = git_odb_object_data(obj);
729 len = git_odb_object_size(obj);
730
731 entry = git__calloc(1, sizeof(*entry));
732 GITERR_CHECK_ALLOC(entry);
733
734 entry->crc = crc32(0L, Z_NULL, 0);
735
736 /* Write out the object header */
737 hdr_len = git_packfile__object_header(hdr, len, git_odb_object_type(obj));
738 if ((error = append_to_pack(idx, hdr, hdr_len)) < 0)
739 goto cleanup;
740
741 idx->pack->mwf.size += hdr_len;
742 entry->crc = crc32(entry->crc, hdr, (uInt)hdr_len);
743
744 if ((error = git_zstream_deflatebuf(&buf, data, len)) < 0)
745 goto cleanup;
746
747 /* And then the compressed object */
748 if ((error = append_to_pack(idx, buf.ptr, buf.size)) < 0)
749 goto cleanup;
750
751 idx->pack->mwf.size += buf.size;
752 entry->crc = htonl(crc32(entry->crc, (unsigned char *)buf.ptr, (uInt)buf.size));
753 git_buf_free(&buf);
754
755 /* Write a fake trailer so the pack functions play ball */
756
757 if ((error = append_to_pack(idx, &foo, GIT_OID_RAWSZ)) < 0)
758 goto cleanup;
759
760 idx->pack->mwf.size += GIT_OID_RAWSZ;
761
762 pentry = git__calloc(1, sizeof(struct git_pack_entry));
763 GITERR_CHECK_ALLOC(pentry);
764
765 git_oid_cpy(&pentry->sha1, id);
766 git_oid_cpy(&entry->oid, id);
767 idx->off = entry_start + hdr_len + len;
768
769 error = save_entry(idx, entry, pentry, entry_start);
770
771 cleanup:
772 if (error) {
773 git__free(entry);
774 git__free(pentry);
775 }
776
777 git_odb_object_free(obj);
778 return error;
779 }
780
781 static int fix_thin_pack(git_indexer *idx, git_transfer_progress *stats)
782 {
783 int error, found_ref_delta = 0;
784 unsigned int i;
785 struct delta_info *delta;
786 size_t size;
787 git_otype type;
788 git_mwindow *w = NULL;
789 git_off_t curpos = 0;
790 unsigned char *base_info;
791 unsigned int left = 0;
792 git_oid base;
793
794 assert(git_vector_length(&idx->deltas) > 0);
795
796 if (idx->odb == NULL) {
797 giterr_set(GITERR_INDEXER, "cannot fix a thin pack without an ODB");
798 return -1;
799 }
800
801 /* Loop until we find the first REF delta */
802 git_vector_foreach(&idx->deltas, i, delta) {
803 if (!delta)
804 continue;
805
806 curpos = delta->delta_off;
807 error = git_packfile_unpack_header(&size, &type, &idx->pack->mwf, &w, &curpos);
808 if (error < 0)
809 return error;
810
811 if (type == GIT_OBJ_REF_DELTA) {
812 found_ref_delta = 1;
813 break;
814 }
815 }
816
817 if (!found_ref_delta) {
818 giterr_set(GITERR_INDEXER, "no REF_DELTA found, cannot inject object");
819 return -1;
820 }
821
822 /* curpos now points to the base information, which is an OID */
823 base_info = git_mwindow_open(&idx->pack->mwf, &w, curpos, GIT_OID_RAWSZ, &left);
824 if (base_info == NULL) {
825 giterr_set(GITERR_INDEXER, "failed to map delta information");
826 return -1;
827 }
828
829 git_oid_fromraw(&base, base_info);
830 git_mwindow_close(&w);
831
832 if (has_entry(idx, &base))
833 return 0;
834
835 if (inject_object(idx, &base) < 0)
836 return -1;
837
838 stats->local_objects++;
839
840 return 0;
841 }
842
843 static int resolve_deltas(git_indexer *idx, git_transfer_progress *stats)
844 {
845 unsigned int i;
846 struct delta_info *delta;
847 int progressed = 0, non_null = 0, progress_cb_result;
848
849 while (idx->deltas.length > 0) {
850 progressed = 0;
851 non_null = 0;
852 git_vector_foreach(&idx->deltas, i, delta) {
853 git_rawobj obj = {NULL};
854
855 if (!delta)
856 continue;
857
858 non_null = 1;
859 idx->off = delta->delta_off;
860 if (git_packfile_unpack(&obj, idx->pack, &idx->off) < 0)
861 continue;
862
863 if (hash_and_save(idx, &obj, delta->delta_off) < 0)
864 continue;
865
866 git__free(obj.data);
867 stats->indexed_objects++;
868 stats->indexed_deltas++;
869 progressed = 1;
870 if ((progress_cb_result = do_progress_callback(idx, stats)) < 0)
871 return progress_cb_result;
872
873 /* remove from the list */
874 git_vector_set(NULL, &idx->deltas, i, NULL);
875 git__free(delta);
876 }
877
878 /* if none were actually set, we're done */
879 if (!non_null)
880 break;
881
882 if (!progressed && (fix_thin_pack(idx, stats) < 0)) {
883 return -1;
884 }
885 }
886
887 return 0;
888 }
889
890 static int update_header_and_rehash(git_indexer *idx, git_transfer_progress *stats)
891 {
892 void *ptr;
893 size_t chunk = 1024*1024;
894 git_off_t hashed = 0;
895 git_mwindow *w = NULL;
896 git_mwindow_file *mwf;
897 unsigned int left;
898
899 mwf = &idx->pack->mwf;
900
901 git_hash_init(&idx->trailer);
902
903
904 /* Update the header to include the numer of local objects we injected */
905 idx->hdr.hdr_entries = htonl(stats->total_objects + stats->local_objects);
906 if (write_at(idx, &idx->hdr, 0, sizeof(struct git_pack_header)) < 0)
907 return -1;
908
909 /*
910 * We now use the same technique as before to determine the
911 * hash. We keep reading up to the end and let
912 * hash_partially() keep the existing trailer out of the
913 * calculation.
914 */
915 git_mwindow_free_all(mwf);
916 idx->inbuf_len = 0;
917 while (hashed < mwf->size) {
918 ptr = git_mwindow_open(mwf, &w, hashed, chunk, &left);
919 if (ptr == NULL)
920 return -1;
921
922 hash_partially(idx, ptr, left);
923 hashed += left;
924
925 git_mwindow_close(&w);
926 }
927
928 return 0;
929 }
930
931 int git_indexer_commit(git_indexer *idx, git_transfer_progress *stats)
932 {
933 git_mwindow *w = NULL;
934 unsigned int i, long_offsets = 0, left;
935 int error;
936 struct git_pack_idx_header hdr;
937 git_buf filename = GIT_BUF_INIT;
938 struct entry *entry;
939 git_oid trailer_hash, file_hash;
940 git_hash_ctx ctx;
941 git_filebuf index_file = {0};
942 void *packfile_trailer;
943
944 if (!idx->parsed_header) {
945 giterr_set(GITERR_INDEXER, "incomplete pack header");
946 return -1;
947 }
948
949 if (git_hash_ctx_init(&ctx) < 0)
950 return -1;
951
952 /* Test for this before resolve_deltas(), as it plays with idx->off */
953 if (idx->off + 20 < idx->pack->mwf.size) {
954 giterr_set(GITERR_INDEXER, "unexpected data at the end of the pack");
955 return -1;
956 }
957
958 packfile_trailer = git_mwindow_open(&idx->pack->mwf, &w, idx->pack->mwf.size - GIT_OID_RAWSZ, GIT_OID_RAWSZ, &left);
959 if (packfile_trailer == NULL) {
960 git_mwindow_close(&w);
961 goto on_error;
962 }
963
964 /* Compare the packfile trailer as it was sent to us and what we calculated */
965 git_oid_fromraw(&file_hash, packfile_trailer);
966 git_mwindow_close(&w);
967
968 git_hash_final(&trailer_hash, &idx->trailer);
969 if (git_oid_cmp(&file_hash, &trailer_hash)) {
970 giterr_set(GITERR_INDEXER, "packfile trailer mismatch");
971 return -1;
972 }
973
974 /* Freeze the number of deltas */
975 stats->total_deltas = stats->total_objects - stats->indexed_objects;
976
977 if ((error = resolve_deltas(idx, stats)) < 0)
978 return error;
979
980 if (stats->indexed_objects != stats->total_objects) {
981 giterr_set(GITERR_INDEXER, "early EOF");
982 return -1;
983 }
984
985 if (stats->local_objects > 0) {
986 if (update_header_and_rehash(idx, stats) < 0)
987 return -1;
988
989 git_hash_final(&trailer_hash, &idx->trailer);
990 write_at(idx, &trailer_hash, idx->pack->mwf.size - GIT_OID_RAWSZ, GIT_OID_RAWSZ);
991 }
992
993 git_vector_sort(&idx->objects);
994
995 git_buf_sets(&filename, idx->pack->pack_name);
996 git_buf_shorten(&filename, strlen("pack"));
997 git_buf_puts(&filename, "idx");
998 if (git_buf_oom(&filename))
999 return -1;
1000
1001 if (git_filebuf_open(&index_file, filename.ptr,
1002 GIT_FILEBUF_HASH_CONTENTS |
1003 (idx->do_fsync ? GIT_FILEBUF_FSYNC : 0),
1004 idx->mode) < 0)
1005 goto on_error;
1006
1007 /* Write out the header */
1008 hdr.idx_signature = htonl(PACK_IDX_SIGNATURE);
1009 hdr.idx_version = htonl(2);
1010 git_filebuf_write(&index_file, &hdr, sizeof(hdr));
1011
1012 /* Write out the fanout table */
1013 for (i = 0; i < 256; ++i) {
1014 uint32_t n = htonl(idx->fanout[i]);
1015 git_filebuf_write(&index_file, &n, sizeof(n));
1016 }
1017
1018 /* Write out the object names (SHA-1 hashes) */
1019 git_vector_foreach(&idx->objects, i, entry) {
1020 git_filebuf_write(&index_file, &entry->oid, sizeof(git_oid));
1021 git_hash_update(&ctx, &entry->oid, GIT_OID_RAWSZ);
1022 }
1023 git_hash_final(&idx->hash, &ctx);
1024
1025 /* Write out the CRC32 values */
1026 git_vector_foreach(&idx->objects, i, entry) {
1027 git_filebuf_write(&index_file, &entry->crc, sizeof(uint32_t));
1028 }
1029
1030 /* Write out the offsets */
1031 git_vector_foreach(&idx->objects, i, entry) {
1032 uint32_t n;
1033
1034 if (entry->offset == UINT32_MAX)
1035 n = htonl(0x80000000 | long_offsets++);
1036 else
1037 n = htonl(entry->offset);
1038
1039 git_filebuf_write(&index_file, &n, sizeof(uint32_t));
1040 }
1041
1042 /* Write out the long offsets */
1043 git_vector_foreach(&idx->objects, i, entry) {
1044 uint32_t split[2];
1045
1046 if (entry->offset != UINT32_MAX)
1047 continue;
1048
1049 split[0] = htonl(entry->offset_long >> 32);
1050 split[1] = htonl(entry->offset_long & 0xffffffff);
1051
1052 git_filebuf_write(&index_file, &split, sizeof(uint32_t) * 2);
1053 }
1054
1055 /* Write out the packfile trailer to the index */
1056 if (git_filebuf_write(&index_file, &trailer_hash, GIT_OID_RAWSZ) < 0)
1057 goto on_error;
1058
1059 /* Write out the hash of the idx */
1060 if (git_filebuf_hash(&trailer_hash, &index_file) < 0)
1061 goto on_error;
1062
1063 git_filebuf_write(&index_file, &trailer_hash, sizeof(git_oid));
1064
1065 /* Figure out what the final name should be */
1066 if (index_path(&filename, idx, ".idx") < 0)
1067 goto on_error;
1068
1069 /* Commit file */
1070 if (git_filebuf_commit_at(&index_file, filename.ptr) < 0)
1071 goto on_error;
1072
1073 git_mwindow_free_all(&idx->pack->mwf);
1074
1075 /* Truncate file to undo rounding up to next page_size in append_to_pack */
1076 if (p_ftruncate(idx->pack->mwf.fd, idx->pack->mwf.size) < 0) {
1077 giterr_set(GITERR_OS, "failed to truncate pack file '%s'", idx->pack->pack_name);
1078 return -1;
1079 }
1080
1081 if (idx->do_fsync && p_fsync(idx->pack->mwf.fd) < 0) {
1082 giterr_set(GITERR_OS, "failed to fsync packfile");
1083 goto on_error;
1084 }
1085
1086 /* We need to close the descriptor here so Windows doesn't choke on commit_at */
1087 if (p_close(idx->pack->mwf.fd) < 0) {
1088 giterr_set(GITERR_OS, "failed to close packfile");
1089 goto on_error;
1090 }
1091
1092 idx->pack->mwf.fd = -1;
1093
1094 if (index_path(&filename, idx, ".pack") < 0)
1095 goto on_error;
1096
1097 /* And don't forget to rename the packfile to its new place. */
1098 if (p_rename(idx->pack->pack_name, git_buf_cstr(&filename)) < 0)
1099 goto on_error;
1100
1101 /* And fsync the parent directory if we're asked to. */
1102 if (idx->do_fsync &&
1103 git_futils_fsync_parent(git_buf_cstr(&filename)) < 0)
1104 goto on_error;
1105
1106 idx->pack_committed = 1;
1107
1108 git_buf_free(&filename);
1109 git_hash_ctx_cleanup(&ctx);
1110 return 0;
1111
1112 on_error:
1113 git_mwindow_free_all(&idx->pack->mwf);
1114 git_filebuf_cleanup(&index_file);
1115 git_buf_free(&filename);
1116 git_hash_ctx_cleanup(&ctx);
1117 return -1;
1118 }
1119
1120 void git_indexer_free(git_indexer *idx)
1121 {
1122 if (idx == NULL)
1123 return;
1124
1125 git_vector_free_deep(&idx->objects);
1126
1127 if (idx->pack->idx_cache) {
1128 struct git_pack_entry *pentry;
1129 git_oidmap_foreach_value(idx->pack->idx_cache, pentry, {
1130 git__free(pentry);
1131 });
1132
1133 git_oidmap_free(idx->pack->idx_cache);
1134 }
1135
1136 git_vector_free_deep(&idx->deltas);
1137
1138 if (!git_mutex_lock(&git__mwindow_mutex)) {
1139 if (!idx->pack_committed)
1140 git_packfile_close(idx->pack, true);
1141
1142 git_packfile_free(idx->pack);
1143 git_mutex_unlock(&git__mwindow_mutex);
1144 }
1145
1146 git_hash_ctx_cleanup(&idx->trailer);
1147 git_hash_ctx_cleanup(&idx->hash_ctx);
1148 git__free(idx);
1149 }