]> git.proxmox.com Git - libgit2.git/blobdiff - src/libgit2/indexer.c
Merge https://salsa.debian.org/debian/libgit2 into proxmox/bullseye
[libgit2.git] / src / libgit2 / indexer.c
diff --git a/src/libgit2/indexer.c b/src/libgit2/indexer.c
new file mode 100644 (file)
index 0000000..afb9ce8
--- /dev/null
@@ -0,0 +1,1413 @@
+/*
+ * Copyright (C) the libgit2 contributors. All rights reserved.
+ *
+ * This file is part of libgit2, distributed under the GNU GPL v2 with
+ * a Linking Exception. For full terms see the included COPYING file.
+ */
+
+#include "indexer.h"
+
+#include "git2/indexer.h"
+#include "git2/object.h"
+
+#include "commit.h"
+#include "tree.h"
+#include "tag.h"
+#include "pack.h"
+#include "mwindow.h"
+#include "posix.h"
+#include "pack.h"
+#include "filebuf.h"
+#include "oid.h"
+#include "oidarray.h"
+#include "oidmap.h"
+#include "zstream.h"
+#include "object.h"
+
+size_t git_indexer__max_objects = UINT32_MAX;
+
+#define UINT31_MAX (0x7FFFFFFF)
+
+struct entry {
+       git_oid oid;
+       uint32_t crc;
+       uint32_t offset;
+       uint64_t offset_long;
+};
+
+struct git_indexer {
+       unsigned int parsed_header :1,
+               pack_committed :1,
+               have_stream :1,
+               have_delta :1,
+               do_fsync :1,
+               do_verify :1;
+       struct git_pack_header hdr;
+       struct git_pack_file *pack;
+       unsigned int mode;
+       off64_t off;
+       off64_t entry_start;
+       git_object_t entry_type;
+       git_str entry_data;
+       git_packfile_stream stream;
+       size_t nr_objects;
+       git_vector objects;
+       git_vector deltas;
+       unsigned int fanout[256];
+       git_hash_ctx hash_ctx;
+       unsigned char checksum[GIT_HASH_SHA1_SIZE];
+       char name[(GIT_HASH_SHA1_SIZE * 2) + 1];
+       git_indexer_progress_cb progress_cb;
+       void *progress_payload;
+       char objbuf[8*1024];
+
+       /* OIDs referenced from pack objects. Used for verification. */
+       git_oidmap *expected_oids;
+
+       /* Needed to look up objects which we want to inject to fix a thin pack */
+       git_odb *odb;
+
+       /* Fields for calculating the packfile trailer (hash of everything before it) */
+       char inbuf[GIT_OID_RAWSZ];
+       size_t inbuf_len;
+       git_hash_ctx trailer;
+};
+
+struct delta_info {
+       off64_t delta_off;
+};
+
+#ifndef GIT_DEPRECATE_HARD
+const git_oid *git_indexer_hash(const git_indexer *idx)
+{
+       return (git_oid *)idx->checksum;
+}
+#endif
+
+const char *git_indexer_name(const git_indexer *idx)
+{
+       return idx->name;
+}
+
+static int parse_header(struct git_pack_header *hdr, struct git_pack_file *pack)
+{
+       int error;
+       git_map map;
+
+       if ((error = p_mmap(&map, sizeof(*hdr), GIT_PROT_READ, GIT_MAP_SHARED, pack->mwf.fd, 0)) < 0)
+               return error;
+
+       memcpy(hdr, map.data, sizeof(*hdr));
+       p_munmap(&map);
+
+       /* Verify we recognize this pack file format. */
+       if (hdr->hdr_signature != ntohl(PACK_SIGNATURE)) {
+               git_error_set(GIT_ERROR_INDEXER, "wrong pack signature");
+               return -1;
+       }
+
+       if (!pack_version_ok(hdr->hdr_version)) {
+               git_error_set(GIT_ERROR_INDEXER, "wrong pack version");
+               return -1;
+       }
+
+       return 0;
+}
+
+static int objects_cmp(const void *a, const void *b)
+{
+       const struct entry *entrya = a;
+       const struct entry *entryb = b;
+
+       return git_oid__cmp(&entrya->oid, &entryb->oid);
+}
+
+int git_indexer_options_init(git_indexer_options *opts, unsigned int version)
+{
+       GIT_INIT_STRUCTURE_FROM_TEMPLATE(
+               opts, version, git_indexer_options, GIT_INDEXER_OPTIONS_INIT);
+       return 0;
+}
+
+#ifndef GIT_DEPRECATE_HARD
+int git_indexer_init_options(git_indexer_options *opts, unsigned int version)
+{
+       return git_indexer_options_init(opts, version);
+}
+#endif
+
+int git_indexer_new(
+               git_indexer **out,
+               const char *prefix,
+               unsigned int mode,
+               git_odb *odb,
+               git_indexer_options *in_opts)
+{
+       git_indexer_options opts = GIT_INDEXER_OPTIONS_INIT;
+       git_indexer *idx;
+       git_str path = GIT_STR_INIT, tmp_path = GIT_STR_INIT;
+       static const char suff[] = "/pack";
+       int error, fd = -1;
+
+       if (in_opts)
+               memcpy(&opts, in_opts, sizeof(opts));
+
+       idx = git__calloc(1, sizeof(git_indexer));
+       GIT_ERROR_CHECK_ALLOC(idx);
+       idx->odb = odb;
+       idx->progress_cb = opts.progress_cb;
+       idx->progress_payload = opts.progress_cb_payload;
+       idx->mode = mode ? mode : GIT_PACK_FILE_MODE;
+       git_str_init(&idx->entry_data, 0);
+
+       if ((error = git_hash_ctx_init(&idx->hash_ctx, GIT_HASH_ALGORITHM_SHA1)) < 0 ||
+           (error = git_hash_ctx_init(&idx->trailer, GIT_HASH_ALGORITHM_SHA1)) < 0 ||
+           (error = git_oidmap_new(&idx->expected_oids)) < 0)
+               goto cleanup;
+
+       idx->do_verify = opts.verify;
+
+       if (git_repository__fsync_gitdir)
+               idx->do_fsync = 1;
+
+       error = git_str_joinpath(&path, prefix, suff);
+       if (error < 0)
+               goto cleanup;
+
+       fd = git_futils_mktmp(&tmp_path, git_str_cstr(&path), idx->mode);
+       git_str_dispose(&path);
+       if (fd < 0)
+               goto cleanup;
+
+       error = git_packfile_alloc(&idx->pack, git_str_cstr(&tmp_path));
+       git_str_dispose(&tmp_path);
+
+       if (error < 0)
+               goto cleanup;
+
+       idx->pack->mwf.fd = fd;
+       if ((error = git_mwindow_file_register(&idx->pack->mwf)) < 0)
+               goto cleanup;
+
+       *out = idx;
+       return 0;
+
+cleanup:
+       if (fd != -1)
+               p_close(fd);
+
+       if (git_str_len(&tmp_path) > 0)
+               p_unlink(git_str_cstr(&tmp_path));
+
+       if (idx->pack != NULL)
+               p_unlink(idx->pack->pack_name);
+
+       git_str_dispose(&path);
+       git_str_dispose(&tmp_path);
+       git__free(idx);
+       return -1;
+}
+
+void git_indexer__set_fsync(git_indexer *idx, int do_fsync)
+{
+       idx->do_fsync = !!do_fsync;
+}
+
+/* Try to store the delta so we can try to resolve it later */
+static int store_delta(git_indexer *idx)
+{
+       struct delta_info *delta;
+
+       delta = git__calloc(1, sizeof(struct delta_info));
+       GIT_ERROR_CHECK_ALLOC(delta);
+       delta->delta_off = idx->entry_start;
+
+       if (git_vector_insert(&idx->deltas, delta) < 0)
+               return -1;
+
+       return 0;
+}
+
+static int hash_header(git_hash_ctx *ctx, off64_t len, git_object_t type)
+{
+       char buffer[64];
+       size_t hdrlen;
+       int error;
+
+       if ((error = git_odb__format_object_header(&hdrlen,
+               buffer, sizeof(buffer), (size_t)len, type)) < 0)
+               return error;
+
+       return git_hash_update(ctx, buffer, hdrlen);
+}
+
+static int hash_object_stream(git_indexer*idx, git_packfile_stream *stream)
+{
+       ssize_t read;
+
+       GIT_ASSERT_ARG(idx);
+       GIT_ASSERT_ARG(stream);
+
+       do {
+               if ((read = git_packfile_stream_read(stream, idx->objbuf, sizeof(idx->objbuf))) < 0)
+                       break;
+
+               if (idx->do_verify)
+                       git_str_put(&idx->entry_data, idx->objbuf, read);
+
+               git_hash_update(&idx->hash_ctx, idx->objbuf, read);
+       } while (read > 0);
+
+       if (read < 0)
+               return (int)read;
+
+       return 0;
+}
+
+/* In order to create the packfile stream, we need to skip over the delta base description */
+static int advance_delta_offset(git_indexer *idx, git_object_t type)
+{
+       git_mwindow *w = NULL;
+
+       GIT_ASSERT_ARG(type == GIT_OBJECT_REF_DELTA || type == GIT_OBJECT_OFS_DELTA);
+
+       if (type == GIT_OBJECT_REF_DELTA) {
+               idx->off += GIT_OID_RAWSZ;
+       } else {
+               off64_t base_off;
+               int error = get_delta_base(&base_off, idx->pack, &w, &idx->off, type, idx->entry_start);
+               git_mwindow_close(&w);
+               if (error < 0)
+                       return error;
+       }
+
+       return 0;
+}
+
+/* Read from the stream and discard any output */
+static int read_object_stream(git_indexer *idx, git_packfile_stream *stream)
+{
+       ssize_t read;
+
+       GIT_ASSERT_ARG(stream);
+
+       do {
+               read = git_packfile_stream_read(stream, idx->objbuf, sizeof(idx->objbuf));
+       } while (read > 0);
+
+       if (read < 0)
+               return (int)read;
+
+       return 0;
+}
+
+static int crc_object(uint32_t *crc_out, git_mwindow_file *mwf, off64_t start, off64_t size)
+{
+       void *ptr;
+       uint32_t crc;
+       unsigned int left, len;
+       git_mwindow *w = NULL;
+
+       crc = crc32(0L, Z_NULL, 0);
+       while (size) {
+               ptr = git_mwindow_open(mwf, &w, start, (size_t)size, &left);
+               if (ptr == NULL)
+                       return -1;
+
+               len = min(left, (unsigned int)size);
+               crc = crc32(crc, ptr, len);
+               size -= len;
+               start += len;
+               git_mwindow_close(&w);
+       }
+
+       *crc_out = htonl(crc);
+       return 0;
+}
+
+static int add_expected_oid(git_indexer *idx, const git_oid *oid)
+{
+       /*
+        * If we know about that object because it is stored in our ODB or
+        * because we have already processed it as part of our pack file, we do
+        * not have to expect it.
+        */
+       if ((!idx->odb || !git_odb_exists(idx->odb, oid)) &&
+           !git_oidmap_exists(idx->pack->idx_cache, oid) &&
+           !git_oidmap_exists(idx->expected_oids, oid)) {
+                   git_oid *dup = git__malloc(sizeof(*oid));
+                   GIT_ERROR_CHECK_ALLOC(dup);
+                   git_oid_cpy(dup, oid);
+                   return git_oidmap_set(idx->expected_oids, dup, dup);
+       }
+
+       return 0;
+}
+
+static int check_object_connectivity(git_indexer *idx, const git_rawobj *obj)
+{
+       git_object *object;
+       git_oid *expected;
+       int error = 0;
+
+       if (obj->type != GIT_OBJECT_BLOB &&
+           obj->type != GIT_OBJECT_TREE &&
+           obj->type != GIT_OBJECT_COMMIT &&
+           obj->type != GIT_OBJECT_TAG)
+               return 0;
+
+       if (git_object__from_raw(&object, obj->data, obj->len, obj->type) < 0) {
+               /*
+                * parse_raw returns EINVALID on invalid data; downgrade
+                * that to a normal -1 error code.
+                */
+               error = -1;
+               goto out;
+       }
+
+       if ((expected = git_oidmap_get(idx->expected_oids, &object->cached.oid)) != NULL) {
+               git_oidmap_delete(idx->expected_oids, &object->cached.oid);
+               git__free(expected);
+       }
+
+       /*
+        * Check whether this is a known object. If so, we can just continue as
+        * we assume that the ODB has a complete graph.
+        */
+       if (idx->odb && git_odb_exists(idx->odb, &object->cached.oid))
+               return 0;
+
+       switch (obj->type) {
+               case GIT_OBJECT_TREE:
+               {
+                       git_tree *tree = (git_tree *) object;
+                       git_tree_entry *entry;
+                       size_t i;
+
+                       git_array_foreach(tree->entries, i, entry)
+                               if (add_expected_oid(idx, &entry->oid) < 0)
+                                       goto out;
+
+                       break;
+               }
+               case GIT_OBJECT_COMMIT:
+               {
+                       git_commit *commit = (git_commit *) object;
+                       git_oid *parent_oid;
+                       size_t i;
+
+                       git_array_foreach(commit->parent_ids, i, parent_oid)
+                               if (add_expected_oid(idx, parent_oid) < 0)
+                                       goto out;
+
+                       if (add_expected_oid(idx, &commit->tree_id) < 0)
+                               goto out;
+
+                       break;
+               }
+               case GIT_OBJECT_TAG:
+               {
+                       git_tag *tag = (git_tag *) object;
+
+                       if (add_expected_oid(idx, &tag->target) < 0)
+                               goto out;
+
+                       break;
+               }
+               case GIT_OBJECT_BLOB:
+               default:
+                       break;
+       }
+
+out:
+       git_object_free(object);
+
+       return error;
+}
+
+static int store_object(git_indexer *idx)
+{
+       int i, error;
+       git_oid oid;
+       struct entry *entry;
+       off64_t entry_size;
+       struct git_pack_entry *pentry;
+       off64_t entry_start = idx->entry_start;
+
+       entry = git__calloc(1, sizeof(*entry));
+       GIT_ERROR_CHECK_ALLOC(entry);
+
+       pentry = git__calloc(1, sizeof(struct git_pack_entry));
+       GIT_ERROR_CHECK_ALLOC(pentry);
+
+       if (git_hash_final(oid.id, &idx->hash_ctx)) {
+               git__free(pentry);
+               goto on_error;
+       }
+       entry_size = idx->off - entry_start;
+       if (entry_start > UINT31_MAX) {
+               entry->offset = UINT32_MAX;
+               entry->offset_long = entry_start;
+       } else {
+               entry->offset = (uint32_t)entry_start;
+       }
+
+       if (idx->do_verify) {
+               git_rawobj rawobj = {
+                   idx->entry_data.ptr,
+                   idx->entry_data.size,
+                   idx->entry_type
+               };
+
+               if ((error = check_object_connectivity(idx, &rawobj)) < 0)
+                       goto on_error;
+       }
+
+       git_oid_cpy(&pentry->sha1, &oid);
+       pentry->offset = entry_start;
+
+       if (git_oidmap_exists(idx->pack->idx_cache, &pentry->sha1)) {
+               git_error_set(GIT_ERROR_INDEXER, "duplicate object %s found in pack", git_oid_tostr_s(&pentry->sha1));
+               git__free(pentry);
+               goto on_error;
+       }
+
+       if ((error = git_oidmap_set(idx->pack->idx_cache, &pentry->sha1, pentry)) < 0) {
+               git__free(pentry);
+               git_error_set_oom();
+               goto on_error;
+       }
+
+       git_oid_cpy(&entry->oid, &oid);
+
+       if (crc_object(&entry->crc, &idx->pack->mwf, entry_start, entry_size) < 0)
+               goto on_error;
+
+       /* Add the object to the list */
+       if (git_vector_insert(&idx->objects, entry) < 0)
+               goto on_error;
+
+       for (i = oid.id[0]; i < 256; ++i) {
+               idx->fanout[i]++;
+       }
+
+       return 0;
+
+on_error:
+       git__free(entry);
+
+       return -1;
+}
+
+GIT_INLINE(bool) has_entry(git_indexer *idx, git_oid *id)
+{
+       return git_oidmap_exists(idx->pack->idx_cache, id);
+}
+
+static int save_entry(git_indexer *idx, struct entry *entry, struct git_pack_entry *pentry, off64_t entry_start)
+{
+       int i;
+
+       if (entry_start > UINT31_MAX) {
+               entry->offset = UINT32_MAX;
+               entry->offset_long = entry_start;
+       } else {
+               entry->offset = (uint32_t)entry_start;
+       }
+
+       pentry->offset = entry_start;
+
+       if (git_oidmap_exists(idx->pack->idx_cache, &pentry->sha1) ||
+           git_oidmap_set(idx->pack->idx_cache, &pentry->sha1, pentry) < 0) {
+               git_error_set(GIT_ERROR_INDEXER, "cannot insert object into pack");
+               return -1;
+       }
+
+       /* Add the object to the list */
+       if (git_vector_insert(&idx->objects, entry) < 0)
+               return -1;
+
+       for (i = entry->oid.id[0]; i < 256; ++i) {
+               idx->fanout[i]++;
+       }
+
+       return 0;
+}
+
+static int hash_and_save(git_indexer *idx, git_rawobj *obj, off64_t entry_start)
+{
+       git_oid oid;
+       size_t entry_size;
+       struct entry *entry;
+       struct git_pack_entry *pentry = NULL;
+
+       entry = git__calloc(1, sizeof(*entry));
+       GIT_ERROR_CHECK_ALLOC(entry);
+
+       if (git_odb__hashobj(&oid, obj) < 0) {
+               git_error_set(GIT_ERROR_INDEXER, "failed to hash object");
+               goto on_error;
+       }
+
+       pentry = git__calloc(1, sizeof(struct git_pack_entry));
+       GIT_ERROR_CHECK_ALLOC(pentry);
+
+       git_oid_cpy(&pentry->sha1, &oid);
+       git_oid_cpy(&entry->oid, &oid);
+       entry->crc = crc32(0L, Z_NULL, 0);
+
+       entry_size = (size_t)(idx->off - entry_start);
+       if (crc_object(&entry->crc, &idx->pack->mwf, entry_start, entry_size) < 0)
+               goto on_error;
+
+       return save_entry(idx, entry, pentry, entry_start);
+
+on_error:
+       git__free(pentry);
+       git__free(entry);
+       git__free(obj->data);
+       return -1;
+}
+
+static int do_progress_callback(git_indexer *idx, git_indexer_progress *stats)
+{
+       if (idx->progress_cb)
+               return git_error_set_after_callback_function(
+                       idx->progress_cb(stats, idx->progress_payload),
+                       "indexer progress");
+       return 0;
+}
+
+/* Hash everything but the last 20B of input */
+static void hash_partially(git_indexer *idx, const uint8_t *data, size_t size)
+{
+       size_t to_expell, to_keep;
+
+       if (size == 0)
+               return;
+
+       /* Easy case, dump the buffer and the data minus the last 20 bytes */
+       if (size >= GIT_OID_RAWSZ) {
+               git_hash_update(&idx->trailer, idx->inbuf, idx->inbuf_len);
+               git_hash_update(&idx->trailer, data, size - GIT_OID_RAWSZ);
+
+               data += size - GIT_OID_RAWSZ;
+               memcpy(idx->inbuf, data, GIT_OID_RAWSZ);
+               idx->inbuf_len = GIT_OID_RAWSZ;
+               return;
+       }
+
+       /* We can just append */
+       if (idx->inbuf_len + size <= GIT_OID_RAWSZ) {
+               memcpy(idx->inbuf + idx->inbuf_len, data, size);
+               idx->inbuf_len += size;
+               return;
+       }
+
+       /* We need to partially drain the buffer and then append */
+       to_keep   = GIT_OID_RAWSZ - size;
+       to_expell = idx->inbuf_len - to_keep;
+
+       git_hash_update(&idx->trailer, idx->inbuf, to_expell);
+
+       memmove(idx->inbuf, idx->inbuf + to_expell, to_keep);
+       memcpy(idx->inbuf + to_keep, data, size);
+       idx->inbuf_len += size - to_expell;
+}
+
+#if defined(NO_MMAP) || !defined(GIT_WIN32)
+
+static int write_at(git_indexer *idx, const void *data, off64_t offset, size_t size)
+{
+       size_t remaining_size = size;
+       const char *ptr = (const char *)data;
+
+       /* Handle data size larger that ssize_t */
+       while (remaining_size > 0) {
+               ssize_t nb;
+               HANDLE_EINTR(nb, p_pwrite(idx->pack->mwf.fd, (void *)ptr,
+                                         remaining_size, offset));
+               if (nb <= 0)
+                       return -1;
+
+               ptr += nb;
+               offset += nb;
+               remaining_size -= nb;
+       }
+
+       return 0;
+}
+
+static int append_to_pack(git_indexer *idx, const void *data, size_t size)
+{
+       if (write_at(idx, data, idx->pack->mwf.size, size) < 0) {
+               git_error_set(GIT_ERROR_OS, "cannot extend packfile '%s'", idx->pack->pack_name);
+               return -1;
+       }
+
+       return 0;
+}
+
+#else
+
+/*
+ * Windows may keep different views to a networked file for the mmap- and
+ * open-accessed versions of a file, so any writes done through
+ * `write(2)`/`pwrite(2)` may not be reflected on the data that `mmap(2)` is
+ * able to read.
+ */
+
+static int write_at(git_indexer *idx, const void *data, off64_t offset, size_t size)
+{
+       git_file fd = idx->pack->mwf.fd;
+       size_t mmap_alignment;
+       size_t page_offset;
+       off64_t page_start;
+       unsigned char *map_data;
+       git_map map;
+       int error;
+
+       GIT_ASSERT_ARG(data);
+       GIT_ASSERT_ARG(size);
+
+       if ((error = git__mmap_alignment(&mmap_alignment)) < 0)
+               return error;
+
+       /* the offset needs to be at the mmap boundary for the platform */
+       page_offset = offset % mmap_alignment;
+       page_start = offset - page_offset;
+
+       if ((error = p_mmap(&map, page_offset + size, GIT_PROT_WRITE, GIT_MAP_SHARED, fd, page_start)) < 0)
+               return error;
+
+       map_data = (unsigned char *)map.data;
+       memcpy(map_data + page_offset, data, size);
+       p_munmap(&map);
+
+       return 0;
+}
+
+static int append_to_pack(git_indexer *idx, const void *data, size_t size)
+{
+       off64_t new_size;
+       size_t mmap_alignment;
+       size_t page_offset;
+       off64_t page_start;
+       off64_t current_size = idx->pack->mwf.size;
+       int error;
+
+       if (!size)
+               return 0;
+
+       if ((error = git__mmap_alignment(&mmap_alignment)) < 0)
+               return error;
+
+       /* Write a single byte to force the file system to allocate space now or
+        * report an error, since we can't report errors when writing using mmap.
+        * Round the size up to the nearest page so that we only need to perform file
+        * I/O when we add a page, instead of whenever we write even a single byte. */
+       new_size = current_size + size;
+       page_offset = new_size % mmap_alignment;
+       page_start = new_size - page_offset;
+
+       if (p_pwrite(idx->pack->mwf.fd, data, 1, page_start + mmap_alignment - 1) < 0) {
+               git_error_set(GIT_ERROR_OS, "cannot extend packfile '%s'", idx->pack->pack_name);
+               return -1;
+       }
+
+       return write_at(idx, data, idx->pack->mwf.size, size);
+}
+
+#endif
+
+static int read_stream_object(git_indexer *idx, git_indexer_progress *stats)
+{
+       git_packfile_stream *stream = &idx->stream;
+       off64_t entry_start = idx->off;
+       size_t entry_size;
+       git_object_t type;
+       git_mwindow *w = NULL;
+       int error;
+
+       if (idx->pack->mwf.size <= idx->off + 20)
+               return GIT_EBUFS;
+
+       if (!idx->have_stream) {
+               error = git_packfile_unpack_header(&entry_size, &type, idx->pack, &w, &idx->off);
+               if (error == GIT_EBUFS) {
+                       idx->off = entry_start;
+                       return error;
+               }
+               if (error < 0)
+                       return error;
+
+               git_mwindow_close(&w);
+               idx->entry_start = entry_start;
+               git_hash_init(&idx->hash_ctx);
+               git_str_clear(&idx->entry_data);
+
+               if (type == GIT_OBJECT_REF_DELTA || type == GIT_OBJECT_OFS_DELTA) {
+                       error = advance_delta_offset(idx, type);
+                       if (error == GIT_EBUFS) {
+                               idx->off = entry_start;
+                               return error;
+                       }
+                       if (error < 0)
+                               return error;
+
+                       idx->have_delta = 1;
+               } else {
+                       idx->have_delta = 0;
+
+                       error = hash_header(&idx->hash_ctx, entry_size, type);
+                       if (error < 0)
+                               return error;
+               }
+
+               idx->have_stream = 1;
+               idx->entry_type = type;
+
+               error = git_packfile_stream_open(stream, idx->pack, idx->off);
+               if (error < 0)
+                       return error;
+       }
+
+       if (idx->have_delta) {
+               error = read_object_stream(idx, stream);
+       } else {
+               error = hash_object_stream(idx, stream);
+       }
+
+       idx->off = stream->curpos;
+       if (error == GIT_EBUFS)
+               return error;
+
+       /* We want to free the stream reasorces no matter what here */
+       idx->have_stream = 0;
+       git_packfile_stream_dispose(stream);
+
+       if (error < 0)
+               return error;
+
+       if (idx->have_delta) {
+               error = store_delta(idx);
+       } else {
+               error = store_object(idx);
+       }
+
+       if (error < 0)
+               return error;
+
+       if (!idx->have_delta) {
+               stats->indexed_objects++;
+       }
+       stats->received_objects++;
+
+       if ((error = do_progress_callback(idx, stats)) != 0)
+               return error;
+
+       return 0;
+}
+
+int git_indexer_append(git_indexer *idx, const void *data, size_t size, git_indexer_progress *stats)
+{
+       int error = -1;
+       struct git_pack_header *hdr = &idx->hdr;
+       git_mwindow_file *mwf = &idx->pack->mwf;
+
+       GIT_ASSERT_ARG(idx);
+       GIT_ASSERT_ARG(data);
+       GIT_ASSERT_ARG(stats);
+
+       if ((error = append_to_pack(idx, data, size)) < 0)
+               return error;
+
+       hash_partially(idx, data, (int)size);
+
+       /* Make sure we set the new size of the pack */
+       idx->pack->mwf.size += size;
+
+       if (!idx->parsed_header) {
+               unsigned int total_objects;
+
+               if ((unsigned)idx->pack->mwf.size < sizeof(struct git_pack_header))
+                       return 0;
+
+               if ((error = parse_header(&idx->hdr, idx->pack)) < 0)
+                       return error;
+
+               idx->parsed_header = 1;
+               idx->nr_objects = ntohl(hdr->hdr_entries);
+               idx->off = sizeof(struct git_pack_header);
+
+               if (idx->nr_objects <= git_indexer__max_objects) {
+                       total_objects = (unsigned int)idx->nr_objects;
+               } else {
+                       git_error_set(GIT_ERROR_INDEXER, "too many objects");
+                       return -1;
+               }
+
+               if (git_oidmap_new(&idx->pack->idx_cache) < 0)
+                       return -1;
+
+               idx->pack->has_cache = 1;
+               if (git_vector_init(&idx->objects, total_objects, objects_cmp) < 0)
+                       return -1;
+
+               if (git_vector_init(&idx->deltas, total_objects / 2, NULL) < 0)
+                       return -1;
+
+               stats->received_objects = 0;
+               stats->local_objects = 0;
+               stats->total_deltas = 0;
+               stats->indexed_deltas = 0;
+               stats->indexed_objects = 0;
+               stats->total_objects = total_objects;
+
+               if ((error = do_progress_callback(idx, stats)) != 0)
+                       return error;
+       }
+
+       /* Now that we have data in the pack, let's try to parse it */
+
+       /* As the file grows any windows we try to use will be out of date */
+       if ((error = git_mwindow_free_all(mwf)) < 0)
+               goto on_error;
+
+       while (stats->indexed_objects < idx->nr_objects) {
+               if ((error = read_stream_object(idx, stats)) != 0) {
+                       if (error == GIT_EBUFS)
+                               break;
+                       else
+                               goto on_error;
+               }
+       }
+
+       return 0;
+
+on_error:
+       git_mwindow_free_all(mwf);
+       return error;
+}
+
+static int index_path(git_str *path, git_indexer *idx, const char *suffix)
+{
+       const char prefix[] = "pack-";
+       size_t slash = (size_t)path->size;
+
+       /* search backwards for '/' */
+       while (slash > 0 && path->ptr[slash - 1] != '/')
+               slash--;
+
+       if (git_str_grow(path, slash + 1 + strlen(prefix) +
+                                        GIT_OID_HEXSZ + strlen(suffix) + 1) < 0)
+               return -1;
+
+       git_str_truncate(path, slash);
+       git_str_puts(path, prefix);
+       git_str_puts(path, idx->name);
+       git_str_puts(path, suffix);
+
+       return git_str_oom(path) ? -1 : 0;
+}
+
+/**
+ * Rewind the packfile by the trailer, as we might need to fix the
+ * packfile by injecting objects at the tail and must overwrite it.
+ */
+static int seek_back_trailer(git_indexer *idx)
+{
+       idx->pack->mwf.size -= GIT_OID_RAWSZ;
+       return git_mwindow_free_all(&idx->pack->mwf);
+}
+
+static int inject_object(git_indexer *idx, git_oid *id)
+{
+       git_odb_object *obj = NULL;
+       struct entry *entry = NULL;
+       struct git_pack_entry *pentry = NULL;
+       unsigned char empty_checksum[GIT_HASH_SHA1_SIZE] = {0};
+       unsigned char hdr[64];
+       git_str buf = GIT_STR_INIT;
+       off64_t entry_start;
+       const void *data;
+       size_t len, hdr_len;
+       size_t checksum_size = GIT_HASH_SHA1_SIZE;
+       int error;
+
+       if ((error = seek_back_trailer(idx)) < 0)
+               goto cleanup;
+
+       entry_start = idx->pack->mwf.size;
+
+       if ((error = git_odb_read(&obj, idx->odb, id)) < 0) {
+               git_error_set(GIT_ERROR_INDEXER, "missing delta bases");
+               goto cleanup;
+       }
+
+       data = git_odb_object_data(obj);
+       len = git_odb_object_size(obj);
+
+       entry = git__calloc(1, sizeof(*entry));
+       GIT_ERROR_CHECK_ALLOC(entry);
+
+       entry->crc = crc32(0L, Z_NULL, 0);
+
+       /* Write out the object header */
+       if ((error = git_packfile__object_header(&hdr_len, hdr, len, git_odb_object_type(obj))) < 0 ||
+           (error = append_to_pack(idx, hdr, hdr_len)) < 0)
+               goto cleanup;
+
+       idx->pack->mwf.size += hdr_len;
+       entry->crc = crc32(entry->crc, hdr, (uInt)hdr_len);
+
+       if ((error = git_zstream_deflatebuf(&buf, data, len)) < 0)
+               goto cleanup;
+
+       /* And then the compressed object */
+       if ((error = append_to_pack(idx, buf.ptr, buf.size)) < 0)
+               goto cleanup;
+
+       idx->pack->mwf.size += buf.size;
+       entry->crc = htonl(crc32(entry->crc, (unsigned char *)buf.ptr, (uInt)buf.size));
+       git_str_dispose(&buf);
+
+       /* Write a fake trailer so the pack functions play ball */
+
+       if ((error = append_to_pack(idx, empty_checksum, checksum_size)) < 0)
+               goto cleanup;
+
+       idx->pack->mwf.size += GIT_OID_RAWSZ;
+
+       pentry = git__calloc(1, sizeof(struct git_pack_entry));
+       GIT_ERROR_CHECK_ALLOC(pentry);
+
+       git_oid_cpy(&pentry->sha1, id);
+       git_oid_cpy(&entry->oid, id);
+       idx->off = entry_start + hdr_len + len;
+
+       error = save_entry(idx, entry, pentry, entry_start);
+
+cleanup:
+       if (error) {
+               git__free(entry);
+               git__free(pentry);
+       }
+
+       git_odb_object_free(obj);
+       return error;
+}
+
+static int fix_thin_pack(git_indexer *idx, git_indexer_progress *stats)
+{
+       int error, found_ref_delta = 0;
+       unsigned int i;
+       struct delta_info *delta;
+       size_t size;
+       git_object_t type;
+       git_mwindow *w = NULL;
+       off64_t curpos = 0;
+       unsigned char *base_info;
+       unsigned int left = 0;
+       git_oid base;
+
+       GIT_ASSERT(git_vector_length(&idx->deltas) > 0);
+
+       if (idx->odb == NULL) {
+               git_error_set(GIT_ERROR_INDEXER, "cannot fix a thin pack without an ODB");
+               return -1;
+       }
+
+       /* Loop until we find the first REF delta */
+       git_vector_foreach(&idx->deltas, i, delta) {
+               if (!delta)
+                       continue;
+
+               curpos = delta->delta_off;
+               error = git_packfile_unpack_header(&size, &type, idx->pack, &w, &curpos);
+               if (error < 0)
+                       return error;
+
+               if (type == GIT_OBJECT_REF_DELTA) {
+                       found_ref_delta = 1;
+                       break;
+               }
+       }
+
+       if (!found_ref_delta) {
+               git_error_set(GIT_ERROR_INDEXER, "no REF_DELTA found, cannot inject object");
+               return -1;
+       }
+
+       /* curpos now points to the base information, which is an OID */
+       base_info = git_mwindow_open(&idx->pack->mwf, &w, curpos, GIT_OID_RAWSZ, &left);
+       if (base_info == NULL) {
+               git_error_set(GIT_ERROR_INDEXER, "failed to map delta information");
+               return -1;
+       }
+
+       git_oid_fromraw(&base, base_info);
+       git_mwindow_close(&w);
+
+       if (has_entry(idx, &base))
+               return 0;
+
+       if (inject_object(idx, &base) < 0)
+               return -1;
+
+       stats->local_objects++;
+
+       return 0;
+}
+
+static int resolve_deltas(git_indexer *idx, git_indexer_progress *stats)
+{
+       unsigned int i;
+       int error;
+       struct delta_info *delta;
+       int progressed = 0, non_null = 0, progress_cb_result;
+
+       while (idx->deltas.length > 0) {
+               progressed = 0;
+               non_null = 0;
+               git_vector_foreach(&idx->deltas, i, delta) {
+                       git_rawobj obj = {0};
+
+                       if (!delta)
+                               continue;
+
+                       non_null = 1;
+                       idx->off = delta->delta_off;
+                       if ((error = git_packfile_unpack(&obj, idx->pack, &idx->off)) < 0) {
+                               if (error == GIT_PASSTHROUGH) {
+                                       /* We have not seen the base object, we'll try again later. */
+                                       continue;
+                               }
+                               return -1;
+                       }
+
+                       if (idx->do_verify && check_object_connectivity(idx, &obj) < 0)
+                               /* TODO: error? continue? */
+                               continue;
+
+                       if (hash_and_save(idx, &obj, delta->delta_off) < 0)
+                               continue;
+
+                       git__free(obj.data);
+                       stats->indexed_objects++;
+                       stats->indexed_deltas++;
+                       progressed = 1;
+                       if ((progress_cb_result = do_progress_callback(idx, stats)) < 0)
+                               return progress_cb_result;
+
+                       /* remove from the list */
+                       git_vector_set(NULL, &idx->deltas, i, NULL);
+                       git__free(delta);
+               }
+
+               /* if none were actually set, we're done */
+               if (!non_null)
+                       break;
+
+               if (!progressed && (fix_thin_pack(idx, stats) < 0)) {
+                       return -1;
+               }
+       }
+
+       return 0;
+}
+
+static int update_header_and_rehash(git_indexer *idx, git_indexer_progress *stats)
+{
+       void *ptr;
+       size_t chunk = 1024*1024;
+       off64_t hashed = 0;
+       git_mwindow *w = NULL;
+       git_mwindow_file *mwf;
+       unsigned int left;
+
+       mwf = &idx->pack->mwf;
+
+       git_hash_init(&idx->trailer);
+
+
+       /* Update the header to include the number of local objects we injected */
+       idx->hdr.hdr_entries = htonl(stats->total_objects + stats->local_objects);
+       if (write_at(idx, &idx->hdr, 0, sizeof(struct git_pack_header)) < 0)
+               return -1;
+
+       /*
+        * We now use the same technique as before to determine the
+        * hash. We keep reading up to the end and let
+        * hash_partially() keep the existing trailer out of the
+        * calculation.
+        */
+       if (git_mwindow_free_all(mwf) < 0)
+               return -1;
+
+       idx->inbuf_len = 0;
+       while (hashed < mwf->size) {
+               ptr = git_mwindow_open(mwf, &w, hashed, chunk, &left);
+               if (ptr == NULL)
+                       return -1;
+
+               hash_partially(idx, ptr, left);
+               hashed += left;
+
+               git_mwindow_close(&w);
+       }
+
+       return 0;
+}
+
+int git_indexer_commit(git_indexer *idx, git_indexer_progress *stats)
+{
+       git_mwindow *w = NULL;
+       unsigned int i, long_offsets = 0, left;
+       int error;
+       struct git_pack_idx_header hdr;
+       git_str filename = GIT_STR_INIT;
+       struct entry *entry;
+       unsigned char checksum[GIT_HASH_SHA1_SIZE];
+       git_filebuf index_file = {0};
+       void *packfile_trailer;
+       size_t checksum_size = GIT_HASH_SHA1_SIZE;
+       bool mismatch;
+
+       if (!idx->parsed_header) {
+               git_error_set(GIT_ERROR_INDEXER, "incomplete pack header");
+               return -1;
+       }
+
+       /* Test for this before resolve_deltas(), as it plays with idx->off */
+       if (idx->off + (ssize_t)checksum_size < idx->pack->mwf.size) {
+               git_error_set(GIT_ERROR_INDEXER, "unexpected data at the end of the pack");
+               return -1;
+       }
+       if (idx->off + (ssize_t)checksum_size > idx->pack->mwf.size) {
+               git_error_set(GIT_ERROR_INDEXER, "missing trailer at the end of the pack");
+               return -1;
+       }
+
+       packfile_trailer = git_mwindow_open(&idx->pack->mwf, &w, idx->pack->mwf.size - checksum_size, checksum_size, &left);
+       if (packfile_trailer == NULL) {
+               git_mwindow_close(&w);
+               goto on_error;
+       }
+
+       /* Compare the packfile trailer as it was sent to us and what we calculated */
+       git_hash_final(checksum, &idx->trailer);
+       mismatch = !!memcmp(checksum, packfile_trailer, checksum_size);
+       git_mwindow_close(&w);
+
+       if (mismatch) {
+               git_error_set(GIT_ERROR_INDEXER, "packfile trailer mismatch");
+               return -1;
+       }
+
+       /* Freeze the number of deltas */
+       stats->total_deltas = stats->total_objects - stats->indexed_objects;
+
+       if ((error = resolve_deltas(idx, stats)) < 0)
+               return error;
+
+       if (stats->indexed_objects != stats->total_objects) {
+               git_error_set(GIT_ERROR_INDEXER, "early EOF");
+               return -1;
+       }
+
+       if (stats->local_objects > 0) {
+               if (update_header_and_rehash(idx, stats) < 0)
+                       return -1;
+
+               git_hash_final(checksum, &idx->trailer);
+               write_at(idx, checksum, idx->pack->mwf.size - checksum_size, checksum_size);
+       }
+
+       /*
+        * Is the resulting graph fully connected or are we still
+        * missing some objects? In the second case, we can
+        * bail out due to an incomplete and thus corrupt
+        * packfile.
+        */
+       if (git_oidmap_size(idx->expected_oids) > 0) {
+               git_error_set(GIT_ERROR_INDEXER, "packfile is missing %"PRIuZ" objects",
+                       git_oidmap_size(idx->expected_oids));
+               return -1;
+       }
+
+       git_vector_sort(&idx->objects);
+
+       /* Use the trailer hash as the pack file name to ensure
+        * files with different contents have different names */
+       memcpy(idx->checksum, checksum, checksum_size);
+       if (git_hash_fmt(idx->name, checksum, checksum_size) < 0)
+               return -1;
+
+       git_str_sets(&filename, idx->pack->pack_name);
+       git_str_shorten(&filename, strlen("pack"));
+       git_str_puts(&filename, "idx");
+       if (git_str_oom(&filename))
+               return -1;
+
+       if (git_filebuf_open(&index_file, filename.ptr,
+               GIT_FILEBUF_HASH_CONTENTS |
+               (idx->do_fsync ? GIT_FILEBUF_FSYNC : 0),
+               idx->mode) < 0)
+               goto on_error;
+
+       /* Write out the header */
+       hdr.idx_signature = htonl(PACK_IDX_SIGNATURE);
+       hdr.idx_version = htonl(2);
+       git_filebuf_write(&index_file, &hdr, sizeof(hdr));
+
+       /* Write out the fanout table */
+       for (i = 0; i < 256; ++i) {
+               uint32_t n = htonl(idx->fanout[i]);
+               git_filebuf_write(&index_file, &n, sizeof(n));
+       }
+
+       /* Write out the object names (SHA-1 hashes) */
+       git_vector_foreach(&idx->objects, i, entry) {
+               git_filebuf_write(&index_file, &entry->oid.id, GIT_OID_RAWSZ);
+       }
+
+       /* Write out the CRC32 values */
+       git_vector_foreach(&idx->objects, i, entry) {
+               git_filebuf_write(&index_file, &entry->crc, sizeof(uint32_t));
+       }
+
+       /* Write out the offsets */
+       git_vector_foreach(&idx->objects, i, entry) {
+               uint32_t n;
+
+               if (entry->offset == UINT32_MAX)
+                       n = htonl(0x80000000 | long_offsets++);
+               else
+                       n = htonl(entry->offset);
+
+               git_filebuf_write(&index_file, &n, sizeof(uint32_t));
+       }
+
+       /* Write out the long offsets */
+       git_vector_foreach(&idx->objects, i, entry) {
+               uint32_t split[2];
+
+               if (entry->offset != UINT32_MAX)
+                       continue;
+
+               split[0] = htonl(entry->offset_long >> 32);
+               split[1] = htonl(entry->offset_long & 0xffffffff);
+
+               git_filebuf_write(&index_file, &split, sizeof(uint32_t) * 2);
+       }
+
+       /* Write out the packfile trailer to the index */
+       if (git_filebuf_write(&index_file, checksum, checksum_size) < 0)
+               goto on_error;
+
+       /* Write out the hash of the idx */
+       if (git_filebuf_hash(checksum, &index_file) < 0)
+               goto on_error;
+
+       git_filebuf_write(&index_file, checksum, checksum_size);
+
+       /* Figure out what the final name should be */
+       if (index_path(&filename, idx, ".idx") < 0)
+               goto on_error;
+
+       /* Commit file */
+       if (git_filebuf_commit_at(&index_file, filename.ptr) < 0)
+               goto on_error;
+
+       if (git_mwindow_free_all(&idx->pack->mwf) < 0)
+               goto on_error;
+
+#if !defined(NO_MMAP) && defined(GIT_WIN32)
+       /*
+        * Some non-Windows remote filesystems fail when truncating files if the
+        * file permissions change after opening the file (done by p_mkstemp).
+        *
+        * Truncation is only needed when mmap is used to undo rounding up to next
+        * page_size in append_to_pack.
+        */
+       if (p_ftruncate(idx->pack->mwf.fd, idx->pack->mwf.size) < 0) {
+               git_error_set(GIT_ERROR_OS, "failed to truncate pack file '%s'", idx->pack->pack_name);
+               return -1;
+       }
+#endif
+
+       if (idx->do_fsync && p_fsync(idx->pack->mwf.fd) < 0) {
+               git_error_set(GIT_ERROR_OS, "failed to fsync packfile");
+               goto on_error;
+       }
+
+       /* We need to close the descriptor here so Windows doesn't choke on commit_at */
+       if (p_close(idx->pack->mwf.fd) < 0) {
+               git_error_set(GIT_ERROR_OS, "failed to close packfile");
+               goto on_error;
+       }
+
+       idx->pack->mwf.fd = -1;
+
+       if (index_path(&filename, idx, ".pack") < 0)
+               goto on_error;
+
+       /* And don't forget to rename the packfile to its new place. */
+       if (p_rename(idx->pack->pack_name, git_str_cstr(&filename)) < 0)
+               goto on_error;
+
+       /* And fsync the parent directory if we're asked to. */
+       if (idx->do_fsync &&
+               git_futils_fsync_parent(git_str_cstr(&filename)) < 0)
+               goto on_error;
+
+       idx->pack_committed = 1;
+
+       git_str_dispose(&filename);
+       return 0;
+
+on_error:
+       git_mwindow_free_all(&idx->pack->mwf);
+       git_filebuf_cleanup(&index_file);
+       git_str_dispose(&filename);
+       return -1;
+}
+
+void git_indexer_free(git_indexer *idx)
+{
+       const git_oid *key;
+       git_oid *value;
+       size_t iter;
+
+       if (idx == NULL)
+               return;
+
+       if (idx->have_stream)
+               git_packfile_stream_dispose(&idx->stream);
+
+       git_vector_free_deep(&idx->objects);
+
+       if (idx->pack->idx_cache) {
+               struct git_pack_entry *pentry;
+               git_oidmap_foreach_value(idx->pack->idx_cache, pentry, {
+                       git__free(pentry);
+               });
+
+               git_oidmap_free(idx->pack->idx_cache);
+       }
+
+       git_vector_free_deep(&idx->deltas);
+
+       git_packfile_free(idx->pack, !idx->pack_committed);
+
+       iter = 0;
+       while (git_oidmap_iterate((void **) &value, idx->expected_oids, &iter, &key) == 0)
+               git__free(value);
+
+       git_hash_ctx_cleanup(&idx->trailer);
+       git_hash_ctx_cleanup(&idx->hash_ctx);
+       git_str_dispose(&idx->entry_data);
+       git_oidmap_free(idx->expected_oids);
+       git__free(idx);
+}