]> git.proxmox.com Git - libgit2.git/blob - src/indexer.c
New upstream version 0.28.1+dfsg.1
[libgit2.git] / src / indexer.c
1 /*
2 * Copyright (C) the libgit2 contributors. All rights reserved.
3 *
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
6 */
7
8 #include "indexer.h"
9
10 #include "git2/indexer.h"
11 #include "git2/object.h"
12
13 #include "commit.h"
14 #include "tree.h"
15 #include "tag.h"
16 #include "pack.h"
17 #include "mwindow.h"
18 #include "posix.h"
19 #include "pack.h"
20 #include "filebuf.h"
21 #include "oid.h"
22 #include "oidmap.h"
23 #include "zstream.h"
24 #include "object.h"
25
26 extern git_mutex git__mwindow_mutex;
27
28 size_t git_indexer__max_objects = UINT32_MAX;
29
30 #define UINT31_MAX (0x7FFFFFFF)
31
32 struct entry {
33 git_oid oid;
34 uint32_t crc;
35 uint32_t offset;
36 uint64_t offset_long;
37 };
38
39 struct git_indexer {
40 unsigned int parsed_header :1,
41 pack_committed :1,
42 have_stream :1,
43 have_delta :1,
44 do_fsync :1,
45 do_verify :1;
46 struct git_pack_header hdr;
47 struct git_pack_file *pack;
48 unsigned int mode;
49 git_off_t off;
50 git_off_t entry_start;
51 git_object_t entry_type;
52 git_buf entry_data;
53 git_packfile_stream stream;
54 size_t nr_objects;
55 git_vector objects;
56 git_vector deltas;
57 unsigned int fanout[256];
58 git_hash_ctx hash_ctx;
59 git_oid hash;
60 git_transfer_progress_cb progress_cb;
61 void *progress_payload;
62 char objbuf[8*1024];
63
64 /* OIDs referenced from pack objects. Used for verification. */
65 git_oidmap *expected_oids;
66
67 /* Needed to look up objects which we want to inject to fix a thin pack */
68 git_odb *odb;
69
70 /* Fields for calculating the packfile trailer (hash of everything before it) */
71 char inbuf[GIT_OID_RAWSZ];
72 size_t inbuf_len;
73 git_hash_ctx trailer;
74 };
75
76 struct delta_info {
77 git_off_t delta_off;
78 };
79
80 const git_oid *git_indexer_hash(const git_indexer *idx)
81 {
82 return &idx->hash;
83 }
84
85 static int parse_header(struct git_pack_header *hdr, struct git_pack_file *pack)
86 {
87 int error;
88 git_map map;
89
90 if ((error = p_mmap(&map, sizeof(*hdr), GIT_PROT_READ, GIT_MAP_SHARED, pack->mwf.fd, 0)) < 0)
91 return error;
92
93 memcpy(hdr, map.data, sizeof(*hdr));
94 p_munmap(&map);
95
96 /* Verify we recognize this pack file format. */
97 if (hdr->hdr_signature != ntohl(PACK_SIGNATURE)) {
98 git_error_set(GIT_ERROR_INDEXER, "wrong pack signature");
99 return -1;
100 }
101
102 if (!pack_version_ok(hdr->hdr_version)) {
103 git_error_set(GIT_ERROR_INDEXER, "wrong pack version");
104 return -1;
105 }
106
107 return 0;
108 }
109
110 static int objects_cmp(const void *a, const void *b)
111 {
112 const struct entry *entrya = a;
113 const struct entry *entryb = b;
114
115 return git_oid__cmp(&entrya->oid, &entryb->oid);
116 }
117
118 int git_indexer_init_options(git_indexer_options *opts, unsigned int version)
119 {
120 GIT_INIT_STRUCTURE_FROM_TEMPLATE(
121 opts, version, git_indexer_options, GIT_INDEXER_OPTIONS_INIT);
122 return 0;
123 }
124
125 int git_indexer_new(
126 git_indexer **out,
127 const char *prefix,
128 unsigned int mode,
129 git_odb *odb,
130 git_indexer_options *in_opts)
131 {
132 git_indexer_options opts = GIT_INDEXER_OPTIONS_INIT;
133 git_indexer *idx;
134 git_buf path = GIT_BUF_INIT, tmp_path = GIT_BUF_INIT;
135 static const char suff[] = "/pack";
136 int error, fd = -1;
137
138 if (in_opts)
139 memcpy(&opts, in_opts, sizeof(opts));
140
141 idx = git__calloc(1, sizeof(git_indexer));
142 GIT_ERROR_CHECK_ALLOC(idx);
143 idx->odb = odb;
144 idx->progress_cb = opts.progress_cb;
145 idx->progress_payload = opts.progress_cb_payload;
146 idx->mode = mode ? mode : GIT_PACK_FILE_MODE;
147 git_hash_ctx_init(&idx->hash_ctx);
148 git_hash_ctx_init(&idx->trailer);
149 git_buf_init(&idx->entry_data, 0);
150 idx->expected_oids = git_oidmap_alloc();
151 GIT_ERROR_CHECK_ALLOC(idx->expected_oids);
152
153 idx->do_verify = opts.verify;
154
155 if (git_repository__fsync_gitdir)
156 idx->do_fsync = 1;
157
158 error = git_buf_joinpath(&path, prefix, suff);
159 if (error < 0)
160 goto cleanup;
161
162 fd = git_futils_mktmp(&tmp_path, git_buf_cstr(&path), idx->mode);
163 git_buf_dispose(&path);
164 if (fd < 0)
165 goto cleanup;
166
167 error = git_packfile_alloc(&idx->pack, git_buf_cstr(&tmp_path));
168 git_buf_dispose(&tmp_path);
169
170 if (error < 0)
171 goto cleanup;
172
173 idx->pack->mwf.fd = fd;
174 if ((error = git_mwindow_file_register(&idx->pack->mwf)) < 0)
175 goto cleanup;
176
177 *out = idx;
178 return 0;
179
180 cleanup:
181 if (fd != -1)
182 p_close(fd);
183
184 if (git_buf_len(&tmp_path) > 0)
185 p_unlink(git_buf_cstr(&tmp_path));
186
187 if (idx->pack != NULL)
188 p_unlink(idx->pack->pack_name);
189
190 git_buf_dispose(&path);
191 git_buf_dispose(&tmp_path);
192 git__free(idx);
193 return -1;
194 }
195
196 void git_indexer__set_fsync(git_indexer *idx, int do_fsync)
197 {
198 idx->do_fsync = !!do_fsync;
199 }
200
201 /* Try to store the delta so we can try to resolve it later */
202 static int store_delta(git_indexer *idx)
203 {
204 struct delta_info *delta;
205
206 delta = git__calloc(1, sizeof(struct delta_info));
207 GIT_ERROR_CHECK_ALLOC(delta);
208 delta->delta_off = idx->entry_start;
209
210 if (git_vector_insert(&idx->deltas, delta) < 0)
211 return -1;
212
213 return 0;
214 }
215
216 static int hash_header(git_hash_ctx *ctx, git_off_t len, git_object_t type)
217 {
218 char buffer[64];
219 size_t hdrlen;
220 int error;
221
222 if ((error = git_odb__format_object_header(&hdrlen,
223 buffer, sizeof(buffer), (size_t)len, type)) < 0)
224 return error;
225
226 return git_hash_update(ctx, buffer, hdrlen);
227 }
228
229 static int hash_object_stream(git_indexer*idx, git_packfile_stream *stream)
230 {
231 ssize_t read;
232
233 assert(idx && stream);
234
235 do {
236 if ((read = git_packfile_stream_read(stream, idx->objbuf, sizeof(idx->objbuf))) < 0)
237 break;
238
239 if (idx->do_verify)
240 git_buf_put(&idx->entry_data, idx->objbuf, read);
241
242 git_hash_update(&idx->hash_ctx, idx->objbuf, read);
243 } while (read > 0);
244
245 if (read < 0)
246 return (int)read;
247
248 return 0;
249 }
250
251 /* In order to create the packfile stream, we need to skip over the delta base description */
252 static int advance_delta_offset(git_indexer *idx, git_object_t type)
253 {
254 git_mwindow *w = NULL;
255
256 assert(type == GIT_OBJECT_REF_DELTA || type == GIT_OBJECT_OFS_DELTA);
257
258 if (type == GIT_OBJECT_REF_DELTA) {
259 idx->off += GIT_OID_RAWSZ;
260 } else {
261 git_off_t base_off = get_delta_base(idx->pack, &w, &idx->off, type, idx->entry_start);
262 git_mwindow_close(&w);
263 if (base_off < 0)
264 return (int)base_off;
265 }
266
267 return 0;
268 }
269
270 /* Read from the stream and discard any output */
271 static int read_object_stream(git_indexer *idx, git_packfile_stream *stream)
272 {
273 ssize_t read;
274
275 assert(stream);
276
277 do {
278 read = git_packfile_stream_read(stream, idx->objbuf, sizeof(idx->objbuf));
279 } while (read > 0);
280
281 if (read < 0)
282 return (int)read;
283
284 return 0;
285 }
286
287 static int crc_object(uint32_t *crc_out, git_mwindow_file *mwf, git_off_t start, git_off_t size)
288 {
289 void *ptr;
290 uint32_t crc;
291 unsigned int left, len;
292 git_mwindow *w = NULL;
293
294 crc = crc32(0L, Z_NULL, 0);
295 while (size) {
296 ptr = git_mwindow_open(mwf, &w, start, (size_t)size, &left);
297 if (ptr == NULL)
298 return -1;
299
300 len = min(left, (unsigned int)size);
301 crc = crc32(crc, ptr, len);
302 size -= len;
303 start += len;
304 git_mwindow_close(&w);
305 }
306
307 *crc_out = htonl(crc);
308 return 0;
309 }
310
311 static void add_expected_oid(git_indexer *idx, const git_oid *oid)
312 {
313 int ret;
314
315 /*
316 * If we know about that object because it is stored in our ODB or
317 * because we have already processed it as part of our pack file, we do
318 * not have to expect it.
319 */
320 if ((!idx->odb || !git_odb_exists(idx->odb, oid)) &&
321 !git_oidmap_exists(idx->pack->idx_cache, oid) &&
322 !git_oidmap_exists(idx->expected_oids, oid)) {
323 git_oid *dup = git__malloc(sizeof(*oid));
324 git_oid_cpy(dup, oid);
325 git_oidmap_put(idx->expected_oids, dup, &ret);
326 }
327 }
328
329 static int check_object_connectivity(git_indexer *idx, const git_rawobj *obj)
330 {
331 git_object *object;
332 size_t keyidx;
333 int error;
334
335 if (obj->type != GIT_OBJECT_BLOB &&
336 obj->type != GIT_OBJECT_TREE &&
337 obj->type != GIT_OBJECT_COMMIT &&
338 obj->type != GIT_OBJECT_TAG)
339 return 0;
340
341 if ((error = git_object__from_raw(&object, obj->data, obj->len, obj->type)) < 0)
342 goto out;
343
344 keyidx = git_oidmap_lookup_index(idx->expected_oids, &object->cached.oid);
345 if (git_oidmap_valid_index(idx->expected_oids, keyidx)) {
346 const git_oid *key = git_oidmap_key(idx->expected_oids, keyidx);
347 git__free((git_oid *) key);
348 git_oidmap_delete_at(idx->expected_oids, keyidx);
349 }
350
351 /*
352 * Check whether this is a known object. If so, we can just continue as
353 * we assume that the ODB has a complete graph.
354 */
355 if (idx->odb && git_odb_exists(idx->odb, &object->cached.oid))
356 return 0;
357
358 switch (obj->type) {
359 case GIT_OBJECT_TREE:
360 {
361 git_tree *tree = (git_tree *) object;
362 git_tree_entry *entry;
363 size_t i;
364
365 git_array_foreach(tree->entries, i, entry)
366 add_expected_oid(idx, entry->oid);
367
368 break;
369 }
370 case GIT_OBJECT_COMMIT:
371 {
372 git_commit *commit = (git_commit *) object;
373 git_oid *parent_oid;
374 size_t i;
375
376 git_array_foreach(commit->parent_ids, i, parent_oid)
377 add_expected_oid(idx, parent_oid);
378
379 add_expected_oid(idx, &commit->tree_id);
380
381 break;
382 }
383 case GIT_OBJECT_TAG:
384 {
385 git_tag *tag = (git_tag *) object;
386
387 add_expected_oid(idx, &tag->target);
388
389 break;
390 }
391 case GIT_OBJECT_BLOB:
392 default:
393 break;
394 }
395
396 out:
397 git_object_free(object);
398
399 return error;
400 }
401
402 static int store_object(git_indexer *idx)
403 {
404 int i, error;
405 size_t k;
406 git_oid oid;
407 struct entry *entry;
408 git_off_t entry_size;
409 struct git_pack_entry *pentry;
410 git_off_t entry_start = idx->entry_start;
411
412 entry = git__calloc(1, sizeof(*entry));
413 GIT_ERROR_CHECK_ALLOC(entry);
414
415 pentry = git__calloc(1, sizeof(struct git_pack_entry));
416 GIT_ERROR_CHECK_ALLOC(pentry);
417
418 git_hash_final(&oid, &idx->hash_ctx);
419 entry_size = idx->off - entry_start;
420 if (entry_start > UINT31_MAX) {
421 entry->offset = UINT32_MAX;
422 entry->offset_long = entry_start;
423 } else {
424 entry->offset = (uint32_t)entry_start;
425 }
426
427 if (idx->do_verify) {
428 git_rawobj rawobj = {
429 idx->entry_data.ptr,
430 idx->entry_data.size,
431 idx->entry_type
432 };
433
434 if ((error = check_object_connectivity(idx, &rawobj)) < 0)
435 goto on_error;
436 }
437
438 git_oid_cpy(&pentry->sha1, &oid);
439 pentry->offset = entry_start;
440
441 k = git_oidmap_put(idx->pack->idx_cache, &pentry->sha1, &error);
442 if (error == -1) {
443 git__free(pentry);
444 git_error_set_oom();
445 goto on_error;
446 }
447
448 if (error == 0) {
449 git_error_set(GIT_ERROR_INDEXER, "duplicate object %s found in pack", git_oid_tostr_s(&pentry->sha1));
450 git__free(pentry);
451 goto on_error;
452 }
453
454
455 git_oidmap_set_value_at(idx->pack->idx_cache, k, pentry);
456
457 git_oid_cpy(&entry->oid, &oid);
458
459 if (crc_object(&entry->crc, &idx->pack->mwf, entry_start, entry_size) < 0)
460 goto on_error;
461
462 /* Add the object to the list */
463 if (git_vector_insert(&idx->objects, entry) < 0)
464 goto on_error;
465
466 for (i = oid.id[0]; i < 256; ++i) {
467 idx->fanout[i]++;
468 }
469
470 return 0;
471
472 on_error:
473 git__free(entry);
474
475 return -1;
476 }
477
478 GIT_INLINE(bool) has_entry(git_indexer *idx, git_oid *id)
479 {
480 return git_oidmap_exists(idx->pack->idx_cache, id);
481 }
482
483 static int save_entry(git_indexer *idx, struct entry *entry, struct git_pack_entry *pentry, git_off_t entry_start)
484 {
485 int i, error;
486 size_t k;
487
488 if (entry_start > UINT31_MAX) {
489 entry->offset = UINT32_MAX;
490 entry->offset_long = entry_start;
491 } else {
492 entry->offset = (uint32_t)entry_start;
493 }
494
495 pentry->offset = entry_start;
496 k = git_oidmap_put(idx->pack->idx_cache, &pentry->sha1, &error);
497
498 if (error <= 0) {
499 git_error_set(GIT_ERROR_INDEXER, "cannot insert object into pack");
500 return -1;
501 }
502
503 git_oidmap_set_value_at(idx->pack->idx_cache, k, pentry);
504
505 /* Add the object to the list */
506 if (git_vector_insert(&idx->objects, entry) < 0)
507 return -1;
508
509 for (i = entry->oid.id[0]; i < 256; ++i) {
510 idx->fanout[i]++;
511 }
512
513 return 0;
514 }
515
516 static int hash_and_save(git_indexer *idx, git_rawobj *obj, git_off_t entry_start)
517 {
518 git_oid oid;
519 size_t entry_size;
520 struct entry *entry;
521 struct git_pack_entry *pentry = NULL;
522
523 entry = git__calloc(1, sizeof(*entry));
524 GIT_ERROR_CHECK_ALLOC(entry);
525
526 if (git_odb__hashobj(&oid, obj) < 0) {
527 git_error_set(GIT_ERROR_INDEXER, "failed to hash object");
528 goto on_error;
529 }
530
531 pentry = git__calloc(1, sizeof(struct git_pack_entry));
532 GIT_ERROR_CHECK_ALLOC(pentry);
533
534 git_oid_cpy(&pentry->sha1, &oid);
535 git_oid_cpy(&entry->oid, &oid);
536 entry->crc = crc32(0L, Z_NULL, 0);
537
538 entry_size = (size_t)(idx->off - entry_start);
539 if (crc_object(&entry->crc, &idx->pack->mwf, entry_start, entry_size) < 0)
540 goto on_error;
541
542 return save_entry(idx, entry, pentry, entry_start);
543
544 on_error:
545 git__free(pentry);
546 git__free(entry);
547 git__free(obj->data);
548 return -1;
549 }
550
551 static int do_progress_callback(git_indexer *idx, git_transfer_progress *stats)
552 {
553 if (idx->progress_cb)
554 return git_error_set_after_callback_function(
555 idx->progress_cb(stats, idx->progress_payload),
556 "indexer progress");
557 return 0;
558 }
559
560 /* Hash everything but the last 20B of input */
561 static void hash_partially(git_indexer *idx, const uint8_t *data, size_t size)
562 {
563 size_t to_expell, to_keep;
564
565 if (size == 0)
566 return;
567
568 /* Easy case, dump the buffer and the data minus the last 20 bytes */
569 if (size >= GIT_OID_RAWSZ) {
570 git_hash_update(&idx->trailer, idx->inbuf, idx->inbuf_len);
571 git_hash_update(&idx->trailer, data, size - GIT_OID_RAWSZ);
572
573 data += size - GIT_OID_RAWSZ;
574 memcpy(idx->inbuf, data, GIT_OID_RAWSZ);
575 idx->inbuf_len = GIT_OID_RAWSZ;
576 return;
577 }
578
579 /* We can just append */
580 if (idx->inbuf_len + size <= GIT_OID_RAWSZ) {
581 memcpy(idx->inbuf + idx->inbuf_len, data, size);
582 idx->inbuf_len += size;
583 return;
584 }
585
586 /* We need to partially drain the buffer and then append */
587 to_keep = GIT_OID_RAWSZ - size;
588 to_expell = idx->inbuf_len - to_keep;
589
590 git_hash_update(&idx->trailer, idx->inbuf, to_expell);
591
592 memmove(idx->inbuf, idx->inbuf + to_expell, to_keep);
593 memcpy(idx->inbuf + to_keep, data, size);
594 idx->inbuf_len += size - to_expell;
595 }
596
597 static int write_at(git_indexer *idx, const void *data, git_off_t offset, size_t size)
598 {
599 git_file fd = idx->pack->mwf.fd;
600 size_t mmap_alignment;
601 size_t page_offset;
602 git_off_t page_start;
603 unsigned char *map_data;
604 git_map map;
605 int error;
606
607 assert(data && size);
608
609 if ((error = git__mmap_alignment(&mmap_alignment)) < 0)
610 return error;
611
612 /* the offset needs to be at the mmap boundary for the platform */
613 page_offset = offset % mmap_alignment;
614 page_start = offset - page_offset;
615
616 if ((error = p_mmap(&map, page_offset + size, GIT_PROT_WRITE, GIT_MAP_SHARED, fd, page_start)) < 0)
617 return error;
618
619 map_data = (unsigned char *)map.data;
620 memcpy(map_data + page_offset, data, size);
621 p_munmap(&map);
622
623 return 0;
624 }
625
626 static int append_to_pack(git_indexer *idx, const void *data, size_t size)
627 {
628 git_off_t new_size;
629 size_t mmap_alignment;
630 size_t page_offset;
631 git_off_t page_start;
632 git_off_t current_size = idx->pack->mwf.size;
633 int fd = idx->pack->mwf.fd;
634 int error;
635
636 if (!size)
637 return 0;
638
639 if ((error = git__mmap_alignment(&mmap_alignment)) < 0)
640 return error;
641
642 /* Write a single byte to force the file system to allocate space now or
643 * report an error, since we can't report errors when writing using mmap.
644 * Round the size up to the nearest page so that we only need to perform file
645 * I/O when we add a page, instead of whenever we write even a single byte. */
646 new_size = current_size + size;
647 page_offset = new_size % mmap_alignment;
648 page_start = new_size - page_offset;
649
650 if (p_lseek(fd, page_start + mmap_alignment - 1, SEEK_SET) < 0 ||
651 p_write(idx->pack->mwf.fd, data, 1) < 0) {
652 git_error_set(GIT_ERROR_OS, "cannot extend packfile '%s'", idx->pack->pack_name);
653 return -1;
654 }
655
656 return write_at(idx, data, idx->pack->mwf.size, size);
657 }
658
659 static int read_stream_object(git_indexer *idx, git_transfer_progress *stats)
660 {
661 git_packfile_stream *stream = &idx->stream;
662 git_off_t entry_start = idx->off;
663 size_t entry_size;
664 git_object_t type;
665 git_mwindow *w = NULL;
666 int error;
667
668 if (idx->pack->mwf.size <= idx->off + 20)
669 return GIT_EBUFS;
670
671 if (!idx->have_stream) {
672 error = git_packfile_unpack_header(&entry_size, &type, &idx->pack->mwf, &w, &idx->off);
673 if (error == GIT_EBUFS) {
674 idx->off = entry_start;
675 return error;
676 }
677 if (error < 0)
678 return error;
679
680 git_mwindow_close(&w);
681 idx->entry_start = entry_start;
682 git_hash_init(&idx->hash_ctx);
683 git_buf_clear(&idx->entry_data);
684
685 if (type == GIT_OBJECT_REF_DELTA || type == GIT_OBJECT_OFS_DELTA) {
686 error = advance_delta_offset(idx, type);
687 if (error == GIT_EBUFS) {
688 idx->off = entry_start;
689 return error;
690 }
691 if (error < 0)
692 return error;
693
694 idx->have_delta = 1;
695 } else {
696 idx->have_delta = 0;
697
698 error = hash_header(&idx->hash_ctx, entry_size, type);
699 if (error < 0)
700 return error;
701 }
702
703 idx->have_stream = 1;
704 idx->entry_type = type;
705
706 error = git_packfile_stream_open(stream, idx->pack, idx->off);
707 if (error < 0)
708 return error;
709 }
710
711 if (idx->have_delta) {
712 error = read_object_stream(idx, stream);
713 } else {
714 error = hash_object_stream(idx, stream);
715 }
716
717 idx->off = stream->curpos;
718 if (error == GIT_EBUFS)
719 return error;
720
721 /* We want to free the stream reasorces no matter what here */
722 idx->have_stream = 0;
723 git_packfile_stream_dispose(stream);
724
725 if (error < 0)
726 return error;
727
728 if (idx->have_delta) {
729 error = store_delta(idx);
730 } else {
731 error = store_object(idx);
732 }
733
734 if (error < 0)
735 return error;
736
737 if (!idx->have_delta) {
738 stats->indexed_objects++;
739 }
740 stats->received_objects++;
741
742 if ((error = do_progress_callback(idx, stats)) != 0)
743 return error;
744
745 return 0;
746 }
747
748 int git_indexer_append(git_indexer *idx, const void *data, size_t size, git_transfer_progress *stats)
749 {
750 int error = -1;
751 struct git_pack_header *hdr = &idx->hdr;
752 git_mwindow_file *mwf = &idx->pack->mwf;
753
754 assert(idx && data && stats);
755
756 if ((error = append_to_pack(idx, data, size)) < 0)
757 return error;
758
759 hash_partially(idx, data, (int)size);
760
761 /* Make sure we set the new size of the pack */
762 idx->pack->mwf.size += size;
763
764 if (!idx->parsed_header) {
765 unsigned int total_objects;
766
767 if ((unsigned)idx->pack->mwf.size < sizeof(struct git_pack_header))
768 return 0;
769
770 if ((error = parse_header(&idx->hdr, idx->pack)) < 0)
771 return error;
772
773 idx->parsed_header = 1;
774 idx->nr_objects = ntohl(hdr->hdr_entries);
775 idx->off = sizeof(struct git_pack_header);
776
777 if (idx->nr_objects <= git_indexer__max_objects) {
778 total_objects = (unsigned int)idx->nr_objects;
779 } else {
780 git_error_set(GIT_ERROR_INDEXER, "too many objects");
781 return -1;
782 }
783
784 idx->pack->idx_cache = git_oidmap_alloc();
785 GIT_ERROR_CHECK_ALLOC(idx->pack->idx_cache);
786
787 idx->pack->has_cache = 1;
788 if (git_vector_init(&idx->objects, total_objects, objects_cmp) < 0)
789 return -1;
790
791 if (git_vector_init(&idx->deltas, total_objects / 2, NULL) < 0)
792 return -1;
793
794 stats->received_objects = 0;
795 stats->local_objects = 0;
796 stats->total_deltas = 0;
797 stats->indexed_deltas = 0;
798 stats->indexed_objects = 0;
799 stats->total_objects = total_objects;
800
801 if ((error = do_progress_callback(idx, stats)) != 0)
802 return error;
803 }
804
805 /* Now that we have data in the pack, let's try to parse it */
806
807 /* As the file grows any windows we try to use will be out of date */
808 git_mwindow_free_all(mwf);
809
810 while (stats->indexed_objects < idx->nr_objects) {
811 if ((error = read_stream_object(idx, stats)) != 0) {
812 if (error == GIT_EBUFS)
813 break;
814 else
815 goto on_error;
816 }
817 }
818
819 return 0;
820
821 on_error:
822 git_mwindow_free_all(mwf);
823 return error;
824 }
825
826 static int index_path(git_buf *path, git_indexer *idx, const char *suffix)
827 {
828 const char prefix[] = "pack-";
829 size_t slash = (size_t)path->size;
830
831 /* search backwards for '/' */
832 while (slash > 0 && path->ptr[slash - 1] != '/')
833 slash--;
834
835 if (git_buf_grow(path, slash + 1 + strlen(prefix) +
836 GIT_OID_HEXSZ + strlen(suffix) + 1) < 0)
837 return -1;
838
839 git_buf_truncate(path, slash);
840 git_buf_puts(path, prefix);
841 git_oid_fmt(path->ptr + git_buf_len(path), &idx->hash);
842 path->size += GIT_OID_HEXSZ;
843 git_buf_puts(path, suffix);
844
845 return git_buf_oom(path) ? -1 : 0;
846 }
847
848 /**
849 * Rewind the packfile by the trailer, as we might need to fix the
850 * packfile by injecting objects at the tail and must overwrite it.
851 */
852 static void seek_back_trailer(git_indexer *idx)
853 {
854 idx->pack->mwf.size -= GIT_OID_RAWSZ;
855 git_mwindow_free_all(&idx->pack->mwf);
856 }
857
858 static int inject_object(git_indexer *idx, git_oid *id)
859 {
860 git_odb_object *obj;
861 struct entry *entry;
862 struct git_pack_entry *pentry = NULL;
863 git_oid foo = {{0}};
864 unsigned char hdr[64];
865 git_buf buf = GIT_BUF_INIT;
866 git_off_t entry_start;
867 const void *data;
868 size_t len, hdr_len;
869 int error;
870
871 seek_back_trailer(idx);
872 entry_start = idx->pack->mwf.size;
873
874 if (git_odb_read(&obj, idx->odb, id) < 0) {
875 git_error_set(GIT_ERROR_INDEXER, "missing delta bases");
876 return -1;
877 }
878
879 data = git_odb_object_data(obj);
880 len = git_odb_object_size(obj);
881
882 entry = git__calloc(1, sizeof(*entry));
883 GIT_ERROR_CHECK_ALLOC(entry);
884
885 entry->crc = crc32(0L, Z_NULL, 0);
886
887 /* Write out the object header */
888 hdr_len = git_packfile__object_header(hdr, len, git_odb_object_type(obj));
889 if ((error = append_to_pack(idx, hdr, hdr_len)) < 0)
890 goto cleanup;
891
892 idx->pack->mwf.size += hdr_len;
893 entry->crc = crc32(entry->crc, hdr, (uInt)hdr_len);
894
895 if ((error = git_zstream_deflatebuf(&buf, data, len)) < 0)
896 goto cleanup;
897
898 /* And then the compressed object */
899 if ((error = append_to_pack(idx, buf.ptr, buf.size)) < 0)
900 goto cleanup;
901
902 idx->pack->mwf.size += buf.size;
903 entry->crc = htonl(crc32(entry->crc, (unsigned char *)buf.ptr, (uInt)buf.size));
904 git_buf_dispose(&buf);
905
906 /* Write a fake trailer so the pack functions play ball */
907
908 if ((error = append_to_pack(idx, &foo, GIT_OID_RAWSZ)) < 0)
909 goto cleanup;
910
911 idx->pack->mwf.size += GIT_OID_RAWSZ;
912
913 pentry = git__calloc(1, sizeof(struct git_pack_entry));
914 GIT_ERROR_CHECK_ALLOC(pentry);
915
916 git_oid_cpy(&pentry->sha1, id);
917 git_oid_cpy(&entry->oid, id);
918 idx->off = entry_start + hdr_len + len;
919
920 error = save_entry(idx, entry, pentry, entry_start);
921
922 cleanup:
923 if (error) {
924 git__free(entry);
925 git__free(pentry);
926 }
927
928 git_odb_object_free(obj);
929 return error;
930 }
931
932 static int fix_thin_pack(git_indexer *idx, git_transfer_progress *stats)
933 {
934 int error, found_ref_delta = 0;
935 unsigned int i;
936 struct delta_info *delta;
937 size_t size;
938 git_object_t type;
939 git_mwindow *w = NULL;
940 git_off_t curpos = 0;
941 unsigned char *base_info;
942 unsigned int left = 0;
943 git_oid base;
944
945 assert(git_vector_length(&idx->deltas) > 0);
946
947 if (idx->odb == NULL) {
948 git_error_set(GIT_ERROR_INDEXER, "cannot fix a thin pack without an ODB");
949 return -1;
950 }
951
952 /* Loop until we find the first REF delta */
953 git_vector_foreach(&idx->deltas, i, delta) {
954 if (!delta)
955 continue;
956
957 curpos = delta->delta_off;
958 error = git_packfile_unpack_header(&size, &type, &idx->pack->mwf, &w, &curpos);
959 if (error < 0)
960 return error;
961
962 if (type == GIT_OBJECT_REF_DELTA) {
963 found_ref_delta = 1;
964 break;
965 }
966 }
967
968 if (!found_ref_delta) {
969 git_error_set(GIT_ERROR_INDEXER, "no REF_DELTA found, cannot inject object");
970 return -1;
971 }
972
973 /* curpos now points to the base information, which is an OID */
974 base_info = git_mwindow_open(&idx->pack->mwf, &w, curpos, GIT_OID_RAWSZ, &left);
975 if (base_info == NULL) {
976 git_error_set(GIT_ERROR_INDEXER, "failed to map delta information");
977 return -1;
978 }
979
980 git_oid_fromraw(&base, base_info);
981 git_mwindow_close(&w);
982
983 if (has_entry(idx, &base))
984 return 0;
985
986 if (inject_object(idx, &base) < 0)
987 return -1;
988
989 stats->local_objects++;
990
991 return 0;
992 }
993
994 static int resolve_deltas(git_indexer *idx, git_transfer_progress *stats)
995 {
996 unsigned int i;
997 int error;
998 struct delta_info *delta;
999 int progressed = 0, non_null = 0, progress_cb_result;
1000
1001 while (idx->deltas.length > 0) {
1002 progressed = 0;
1003 non_null = 0;
1004 git_vector_foreach(&idx->deltas, i, delta) {
1005 git_rawobj obj = {0};
1006
1007 if (!delta)
1008 continue;
1009
1010 non_null = 1;
1011 idx->off = delta->delta_off;
1012 if ((error = git_packfile_unpack(&obj, idx->pack, &idx->off)) < 0) {
1013 if (error == GIT_PASSTHROUGH) {
1014 /* We have not seen the base object, we'll try again later. */
1015 continue;
1016 }
1017 return -1;
1018 }
1019
1020 if (idx->do_verify && check_object_connectivity(idx, &obj) < 0)
1021 /* TODO: error? continue? */
1022 continue;
1023
1024 if (hash_and_save(idx, &obj, delta->delta_off) < 0)
1025 continue;
1026
1027 git__free(obj.data);
1028 stats->indexed_objects++;
1029 stats->indexed_deltas++;
1030 progressed = 1;
1031 if ((progress_cb_result = do_progress_callback(idx, stats)) < 0)
1032 return progress_cb_result;
1033
1034 /* remove from the list */
1035 git_vector_set(NULL, &idx->deltas, i, NULL);
1036 git__free(delta);
1037 }
1038
1039 /* if none were actually set, we're done */
1040 if (!non_null)
1041 break;
1042
1043 if (!progressed && (fix_thin_pack(idx, stats) < 0)) {
1044 return -1;
1045 }
1046 }
1047
1048 return 0;
1049 }
1050
1051 static int update_header_and_rehash(git_indexer *idx, git_transfer_progress *stats)
1052 {
1053 void *ptr;
1054 size_t chunk = 1024*1024;
1055 git_off_t hashed = 0;
1056 git_mwindow *w = NULL;
1057 git_mwindow_file *mwf;
1058 unsigned int left;
1059
1060 mwf = &idx->pack->mwf;
1061
1062 git_hash_init(&idx->trailer);
1063
1064
1065 /* Update the header to include the numer of local objects we injected */
1066 idx->hdr.hdr_entries = htonl(stats->total_objects + stats->local_objects);
1067 if (write_at(idx, &idx->hdr, 0, sizeof(struct git_pack_header)) < 0)
1068 return -1;
1069
1070 /*
1071 * We now use the same technique as before to determine the
1072 * hash. We keep reading up to the end and let
1073 * hash_partially() keep the existing trailer out of the
1074 * calculation.
1075 */
1076 git_mwindow_free_all(mwf);
1077 idx->inbuf_len = 0;
1078 while (hashed < mwf->size) {
1079 ptr = git_mwindow_open(mwf, &w, hashed, chunk, &left);
1080 if (ptr == NULL)
1081 return -1;
1082
1083 hash_partially(idx, ptr, left);
1084 hashed += left;
1085
1086 git_mwindow_close(&w);
1087 }
1088
1089 return 0;
1090 }
1091
1092 int git_indexer_commit(git_indexer *idx, git_transfer_progress *stats)
1093 {
1094 git_mwindow *w = NULL;
1095 unsigned int i, long_offsets = 0, left;
1096 int error;
1097 struct git_pack_idx_header hdr;
1098 git_buf filename = GIT_BUF_INIT;
1099 struct entry *entry;
1100 git_oid trailer_hash, file_hash;
1101 git_filebuf index_file = {0};
1102 void *packfile_trailer;
1103
1104 if (!idx->parsed_header) {
1105 git_error_set(GIT_ERROR_INDEXER, "incomplete pack header");
1106 return -1;
1107 }
1108
1109 /* Test for this before resolve_deltas(), as it plays with idx->off */
1110 if (idx->off + 20 < idx->pack->mwf.size) {
1111 git_error_set(GIT_ERROR_INDEXER, "unexpected data at the end of the pack");
1112 return -1;
1113 }
1114 if (idx->off + 20 > idx->pack->mwf.size) {
1115 git_error_set(GIT_ERROR_INDEXER, "missing trailer at the end of the pack");
1116 return -1;
1117 }
1118
1119 packfile_trailer = git_mwindow_open(&idx->pack->mwf, &w, idx->pack->mwf.size - GIT_OID_RAWSZ, GIT_OID_RAWSZ, &left);
1120 if (packfile_trailer == NULL) {
1121 git_mwindow_close(&w);
1122 goto on_error;
1123 }
1124
1125 /* Compare the packfile trailer as it was sent to us and what we calculated */
1126 git_oid_fromraw(&file_hash, packfile_trailer);
1127 git_mwindow_close(&w);
1128
1129 git_hash_final(&trailer_hash, &idx->trailer);
1130 if (git_oid_cmp(&file_hash, &trailer_hash)) {
1131 git_error_set(GIT_ERROR_INDEXER, "packfile trailer mismatch");
1132 return -1;
1133 }
1134
1135 /* Freeze the number of deltas */
1136 stats->total_deltas = stats->total_objects - stats->indexed_objects;
1137
1138 if ((error = resolve_deltas(idx, stats)) < 0)
1139 return error;
1140
1141 if (stats->indexed_objects != stats->total_objects) {
1142 git_error_set(GIT_ERROR_INDEXER, "early EOF");
1143 return -1;
1144 }
1145
1146 if (stats->local_objects > 0) {
1147 if (update_header_and_rehash(idx, stats) < 0)
1148 return -1;
1149
1150 git_hash_final(&trailer_hash, &idx->trailer);
1151 write_at(idx, &trailer_hash, idx->pack->mwf.size - GIT_OID_RAWSZ, GIT_OID_RAWSZ);
1152 }
1153
1154 /*
1155 * Is the resulting graph fully connected or are we still
1156 * missing some objects? In the second case, we can
1157 * bail out due to an incomplete and thus corrupt
1158 * packfile.
1159 */
1160 if (git_oidmap_size(idx->expected_oids) > 0) {
1161 git_error_set(GIT_ERROR_INDEXER, "packfile is missing %"PRIuZ" objects",
1162 git_oidmap_size(idx->expected_oids));
1163 return -1;
1164 }
1165
1166 git_vector_sort(&idx->objects);
1167
1168 /* Use the trailer hash as the pack file name to ensure
1169 * files with different contents have different names */
1170 git_oid_cpy(&idx->hash, &trailer_hash);
1171
1172 git_buf_sets(&filename, idx->pack->pack_name);
1173 git_buf_shorten(&filename, strlen("pack"));
1174 git_buf_puts(&filename, "idx");
1175 if (git_buf_oom(&filename))
1176 return -1;
1177
1178 if (git_filebuf_open(&index_file, filename.ptr,
1179 GIT_FILEBUF_HASH_CONTENTS |
1180 (idx->do_fsync ? GIT_FILEBUF_FSYNC : 0),
1181 idx->mode) < 0)
1182 goto on_error;
1183
1184 /* Write out the header */
1185 hdr.idx_signature = htonl(PACK_IDX_SIGNATURE);
1186 hdr.idx_version = htonl(2);
1187 git_filebuf_write(&index_file, &hdr, sizeof(hdr));
1188
1189 /* Write out the fanout table */
1190 for (i = 0; i < 256; ++i) {
1191 uint32_t n = htonl(idx->fanout[i]);
1192 git_filebuf_write(&index_file, &n, sizeof(n));
1193 }
1194
1195 /* Write out the object names (SHA-1 hashes) */
1196 git_vector_foreach(&idx->objects, i, entry) {
1197 git_filebuf_write(&index_file, &entry->oid, sizeof(git_oid));
1198 }
1199
1200 /* Write out the CRC32 values */
1201 git_vector_foreach(&idx->objects, i, entry) {
1202 git_filebuf_write(&index_file, &entry->crc, sizeof(uint32_t));
1203 }
1204
1205 /* Write out the offsets */
1206 git_vector_foreach(&idx->objects, i, entry) {
1207 uint32_t n;
1208
1209 if (entry->offset == UINT32_MAX)
1210 n = htonl(0x80000000 | long_offsets++);
1211 else
1212 n = htonl(entry->offset);
1213
1214 git_filebuf_write(&index_file, &n, sizeof(uint32_t));
1215 }
1216
1217 /* Write out the long offsets */
1218 git_vector_foreach(&idx->objects, i, entry) {
1219 uint32_t split[2];
1220
1221 if (entry->offset != UINT32_MAX)
1222 continue;
1223
1224 split[0] = htonl(entry->offset_long >> 32);
1225 split[1] = htonl(entry->offset_long & 0xffffffff);
1226
1227 git_filebuf_write(&index_file, &split, sizeof(uint32_t) * 2);
1228 }
1229
1230 /* Write out the packfile trailer to the index */
1231 if (git_filebuf_write(&index_file, &trailer_hash, GIT_OID_RAWSZ) < 0)
1232 goto on_error;
1233
1234 /* Write out the hash of the idx */
1235 if (git_filebuf_hash(&trailer_hash, &index_file) < 0)
1236 goto on_error;
1237
1238 git_filebuf_write(&index_file, &trailer_hash, sizeof(git_oid));
1239
1240 /* Figure out what the final name should be */
1241 if (index_path(&filename, idx, ".idx") < 0)
1242 goto on_error;
1243
1244 /* Commit file */
1245 if (git_filebuf_commit_at(&index_file, filename.ptr) < 0)
1246 goto on_error;
1247
1248 git_mwindow_free_all(&idx->pack->mwf);
1249
1250 /* Truncate file to undo rounding up to next page_size in append_to_pack */
1251 if (p_ftruncate(idx->pack->mwf.fd, idx->pack->mwf.size) < 0) {
1252 git_error_set(GIT_ERROR_OS, "failed to truncate pack file '%s'", idx->pack->pack_name);
1253 return -1;
1254 }
1255
1256 if (idx->do_fsync && p_fsync(idx->pack->mwf.fd) < 0) {
1257 git_error_set(GIT_ERROR_OS, "failed to fsync packfile");
1258 goto on_error;
1259 }
1260
1261 /* We need to close the descriptor here so Windows doesn't choke on commit_at */
1262 if (p_close(idx->pack->mwf.fd) < 0) {
1263 git_error_set(GIT_ERROR_OS, "failed to close packfile");
1264 goto on_error;
1265 }
1266
1267 idx->pack->mwf.fd = -1;
1268
1269 if (index_path(&filename, idx, ".pack") < 0)
1270 goto on_error;
1271
1272 /* And don't forget to rename the packfile to its new place. */
1273 if (p_rename(idx->pack->pack_name, git_buf_cstr(&filename)) < 0)
1274 goto on_error;
1275
1276 /* And fsync the parent directory if we're asked to. */
1277 if (idx->do_fsync &&
1278 git_futils_fsync_parent(git_buf_cstr(&filename)) < 0)
1279 goto on_error;
1280
1281 idx->pack_committed = 1;
1282
1283 git_buf_dispose(&filename);
1284 return 0;
1285
1286 on_error:
1287 git_mwindow_free_all(&idx->pack->mwf);
1288 git_filebuf_cleanup(&index_file);
1289 git_buf_dispose(&filename);
1290 return -1;
1291 }
1292
1293 void git_indexer_free(git_indexer *idx)
1294 {
1295 size_t pos;
1296
1297 if (idx == NULL)
1298 return;
1299
1300 if (idx->have_stream)
1301 git_packfile_stream_dispose(&idx->stream);
1302
1303 git_vector_free_deep(&idx->objects);
1304
1305 if (idx->pack->idx_cache) {
1306 struct git_pack_entry *pentry;
1307 git_oidmap_foreach_value(idx->pack->idx_cache, pentry, {
1308 git__free(pentry);
1309 });
1310
1311 git_oidmap_free(idx->pack->idx_cache);
1312 }
1313
1314 git_vector_free_deep(&idx->deltas);
1315
1316 if (!git_mutex_lock(&git__mwindow_mutex)) {
1317 if (!idx->pack_committed)
1318 git_packfile_close(idx->pack, true);
1319
1320 git_packfile_free(idx->pack);
1321 git_mutex_unlock(&git__mwindow_mutex);
1322 }
1323
1324 for (pos = git_oidmap_begin(idx->expected_oids);
1325 pos != git_oidmap_end(idx->expected_oids); pos++)
1326 {
1327 if (git_oidmap_has_data(idx->expected_oids, pos)) {
1328 git__free((git_oid *) git_oidmap_key(idx->expected_oids, pos));
1329 git_oidmap_delete_at(idx->expected_oids, pos);
1330 }
1331 }
1332
1333 git_hash_ctx_cleanup(&idx->trailer);
1334 git_hash_ctx_cleanup(&idx->hash_ctx);
1335 git_buf_dispose(&idx->entry_data);
1336 git_oidmap_free(idx->expected_oids);
1337 git__free(idx);
1338 }