* a Linking Exception. For full terms see the included COPYING file.
*/
+#include "indexer.h"
+
#include "git2/indexer.h"
#include "git2/object.h"
-#include "common.h"
+#include "commit.h"
+#include "tree.h"
+#include "tag.h"
#include "pack.h"
#include "mwindow.h"
#include "posix.h"
#include "pack.h"
#include "filebuf.h"
#include "oid.h"
+#include "oidarray.h"
#include "oidmap.h"
#include "zstream.h"
+#include "object.h"
-GIT__USE_OIDMAP
-
-extern git_mutex git__mwindow_mutex;
+size_t git_indexer__max_objects = UINT32_MAX;
#define UINT31_MAX (0x7FFFFFFF)
unsigned int parsed_header :1,
pack_committed :1,
have_stream :1,
- have_delta :1;
+ have_delta :1,
+ do_fsync :1,
+ do_verify :1;
struct git_pack_header hdr;
struct git_pack_file *pack;
unsigned int mode;
- git_off_t off;
- git_off_t entry_start;
+ off64_t off;
+ off64_t entry_start;
+ git_object_t entry_type;
+ git_str entry_data;
git_packfile_stream stream;
size_t nr_objects;
git_vector objects;
git_vector deltas;
unsigned int fanout[256];
git_hash_ctx hash_ctx;
- git_oid hash;
- git_transfer_progress_cb progress_cb;
+ unsigned char checksum[GIT_HASH_SHA1_SIZE];
+ char name[(GIT_HASH_SHA1_SIZE * 2) + 1];
+ git_indexer_progress_cb progress_cb;
void *progress_payload;
char objbuf[8*1024];
+ /* OIDs referenced from pack objects. Used for verification. */
+ git_oidmap *expected_oids;
+
/* Needed to look up objects which we want to inject to fix a thin pack */
git_odb *odb;
};
struct delta_info {
- git_off_t delta_off;
+ off64_t delta_off;
};
+#ifndef GIT_DEPRECATE_HARD
const git_oid *git_indexer_hash(const git_indexer *idx)
{
- return &idx->hash;
+ return (git_oid *)idx->checksum;
+}
+#endif
+
+const char *git_indexer_name(const git_indexer *idx)
+{
+ return idx->name;
}
static int parse_header(struct git_pack_header *hdr, struct git_pack_file *pack)
/* Verify we recognize this pack file format. */
if (hdr->hdr_signature != ntohl(PACK_SIGNATURE)) {
- giterr_set(GITERR_INDEXER, "wrong pack signature");
+ git_error_set(GIT_ERROR_INDEXER, "wrong pack signature");
return -1;
}
if (!pack_version_ok(hdr->hdr_version)) {
- giterr_set(GITERR_INDEXER, "wrong pack version");
+ git_error_set(GIT_ERROR_INDEXER, "wrong pack version");
return -1;
}
return git_oid__cmp(&entrya->oid, &entryb->oid);
}
+int git_indexer_options_init(git_indexer_options *opts, unsigned int version)
+{
+ GIT_INIT_STRUCTURE_FROM_TEMPLATE(
+ opts, version, git_indexer_options, GIT_INDEXER_OPTIONS_INIT);
+ return 0;
+}
+
+#ifndef GIT_DEPRECATE_HARD
+int git_indexer_init_options(git_indexer_options *opts, unsigned int version)
+{
+ return git_indexer_options_init(opts, version);
+}
+#endif
+
int git_indexer_new(
git_indexer **out,
const char *prefix,
unsigned int mode,
git_odb *odb,
- git_transfer_progress_cb progress_cb,
- void *progress_payload)
+ git_indexer_options *in_opts)
{
+ git_indexer_options opts = GIT_INDEXER_OPTIONS_INIT;
git_indexer *idx;
- git_buf path = GIT_BUF_INIT, tmp_path = GIT_BUF_INIT;
+ git_str path = GIT_STR_INIT, tmp_path = GIT_STR_INIT;
static const char suff[] = "/pack";
int error, fd = -1;
+ if (in_opts)
+ memcpy(&opts, in_opts, sizeof(opts));
+
idx = git__calloc(1, sizeof(git_indexer));
- GITERR_CHECK_ALLOC(idx);
+ GIT_ERROR_CHECK_ALLOC(idx);
idx->odb = odb;
- idx->progress_cb = progress_cb;
- idx->progress_payload = progress_payload;
+ idx->progress_cb = opts.progress_cb;
+ idx->progress_payload = opts.progress_cb_payload;
idx->mode = mode ? mode : GIT_PACK_FILE_MODE;
- git_hash_ctx_init(&idx->hash_ctx);
- git_hash_ctx_init(&idx->trailer);
+ git_str_init(&idx->entry_data, 0);
- error = git_buf_joinpath(&path, prefix, suff);
+ if ((error = git_hash_ctx_init(&idx->hash_ctx, GIT_HASH_ALGORITHM_SHA1)) < 0 ||
+ (error = git_hash_ctx_init(&idx->trailer, GIT_HASH_ALGORITHM_SHA1)) < 0 ||
+ (error = git_oidmap_new(&idx->expected_oids)) < 0)
+ goto cleanup;
+
+ idx->do_verify = opts.verify;
+
+ if (git_repository__fsync_gitdir)
+ idx->do_fsync = 1;
+
+ error = git_str_joinpath(&path, prefix, suff);
if (error < 0)
goto cleanup;
- fd = git_futils_mktmp(&tmp_path, git_buf_cstr(&path), idx->mode);
- git_buf_free(&path);
+ fd = git_futils_mktmp(&tmp_path, git_str_cstr(&path), idx->mode);
+ git_str_dispose(&path);
if (fd < 0)
goto cleanup;
- error = git_packfile_alloc(&idx->pack, git_buf_cstr(&tmp_path));
- git_buf_free(&tmp_path);
+ error = git_packfile_alloc(&idx->pack, git_str_cstr(&tmp_path));
+ git_str_dispose(&tmp_path);
if (error < 0)
goto cleanup;
if (fd != -1)
p_close(fd);
- if (git_buf_len(&tmp_path) > 0)
- p_unlink(git_buf_cstr(&tmp_path));
+ if (git_str_len(&tmp_path) > 0)
+ p_unlink(git_str_cstr(&tmp_path));
if (idx->pack != NULL)
p_unlink(idx->pack->pack_name);
- git_buf_free(&path);
- git_buf_free(&tmp_path);
+ git_str_dispose(&path);
+ git_str_dispose(&tmp_path);
git__free(idx);
return -1;
}
+void git_indexer__set_fsync(git_indexer *idx, int do_fsync)
+{
+ idx->do_fsync = !!do_fsync;
+}
+
/* Try to store the delta so we can try to resolve it later */
static int store_delta(git_indexer *idx)
{
struct delta_info *delta;
delta = git__calloc(1, sizeof(struct delta_info));
- GITERR_CHECK_ALLOC(delta);
+ GIT_ERROR_CHECK_ALLOC(delta);
delta->delta_off = idx->entry_start;
if (git_vector_insert(&idx->deltas, delta) < 0)
return 0;
}
-static void hash_header(git_hash_ctx *ctx, git_off_t len, git_otype type)
+static int hash_header(git_hash_ctx *ctx, off64_t len, git_object_t type)
{
char buffer[64];
size_t hdrlen;
+ int error;
- hdrlen = git_odb__format_object_header(buffer, sizeof(buffer), (size_t)len, type);
- git_hash_update(ctx, buffer, hdrlen);
+ if ((error = git_odb__format_object_header(&hdrlen,
+ buffer, sizeof(buffer), (size_t)len, type)) < 0)
+ return error;
+
+ return git_hash_update(ctx, buffer, hdrlen);
}
static int hash_object_stream(git_indexer*idx, git_packfile_stream *stream)
{
ssize_t read;
- assert(idx && stream);
+ GIT_ASSERT_ARG(idx);
+ GIT_ASSERT_ARG(stream);
do {
if ((read = git_packfile_stream_read(stream, idx->objbuf, sizeof(idx->objbuf))) < 0)
break;
+ if (idx->do_verify)
+ git_str_put(&idx->entry_data, idx->objbuf, read);
+
git_hash_update(&idx->hash_ctx, idx->objbuf, read);
} while (read > 0);
}
/* In order to create the packfile stream, we need to skip over the delta base description */
-static int advance_delta_offset(git_indexer *idx, git_otype type)
+static int advance_delta_offset(git_indexer *idx, git_object_t type)
{
git_mwindow *w = NULL;
- assert(type == GIT_OBJ_REF_DELTA || type == GIT_OBJ_OFS_DELTA);
+ GIT_ASSERT_ARG(type == GIT_OBJECT_REF_DELTA || type == GIT_OBJECT_OFS_DELTA);
- if (type == GIT_OBJ_REF_DELTA) {
+ if (type == GIT_OBJECT_REF_DELTA) {
idx->off += GIT_OID_RAWSZ;
} else {
- git_off_t base_off = get_delta_base(idx->pack, &w, &idx->off, type, idx->entry_start);
+ off64_t base_off;
+ int error = get_delta_base(&base_off, idx->pack, &w, &idx->off, type, idx->entry_start);
git_mwindow_close(&w);
- if (base_off < 0)
- return (int)base_off;
+ if (error < 0)
+ return error;
}
return 0;
{
ssize_t read;
- assert(stream);
+ GIT_ASSERT_ARG(stream);
do {
read = git_packfile_stream_read(stream, idx->objbuf, sizeof(idx->objbuf));
return 0;
}
-static int crc_object(uint32_t *crc_out, git_mwindow_file *mwf, git_off_t start, git_off_t size)
+static int crc_object(uint32_t *crc_out, git_mwindow_file *mwf, off64_t start, off64_t size)
{
void *ptr;
uint32_t crc;
return 0;
}
+static int add_expected_oid(git_indexer *idx, const git_oid *oid)
+{
+ /*
+ * If we know about that object because it is stored in our ODB or
+ * because we have already processed it as part of our pack file, we do
+ * not have to expect it.
+ */
+ if ((!idx->odb || !git_odb_exists(idx->odb, oid)) &&
+ !git_oidmap_exists(idx->pack->idx_cache, oid) &&
+ !git_oidmap_exists(idx->expected_oids, oid)) {
+ git_oid *dup = git__malloc(sizeof(*oid));
+ GIT_ERROR_CHECK_ALLOC(dup);
+ git_oid_cpy(dup, oid);
+ return git_oidmap_set(idx->expected_oids, dup, dup);
+ }
+
+ return 0;
+}
+
+static int check_object_connectivity(git_indexer *idx, const git_rawobj *obj)
+{
+ git_object *object;
+ git_oid *expected;
+ int error = 0;
+
+ if (obj->type != GIT_OBJECT_BLOB &&
+ obj->type != GIT_OBJECT_TREE &&
+ obj->type != GIT_OBJECT_COMMIT &&
+ obj->type != GIT_OBJECT_TAG)
+ return 0;
+
+ if (git_object__from_raw(&object, obj->data, obj->len, obj->type) < 0) {
+ /*
+ * parse_raw returns EINVALID on invalid data; downgrade
+ * that to a normal -1 error code.
+ */
+ error = -1;
+ goto out;
+ }
+
+ if ((expected = git_oidmap_get(idx->expected_oids, &object->cached.oid)) != NULL) {
+ git_oidmap_delete(idx->expected_oids, &object->cached.oid);
+ git__free(expected);
+ }
+
+ /*
+ * Check whether this is a known object. If so, we can just continue as
+ * we assume that the ODB has a complete graph.
+ */
+ if (idx->odb && git_odb_exists(idx->odb, &object->cached.oid))
+ return 0;
+
+ switch (obj->type) {
+ case GIT_OBJECT_TREE:
+ {
+ git_tree *tree = (git_tree *) object;
+ git_tree_entry *entry;
+ size_t i;
+
+ git_array_foreach(tree->entries, i, entry)
+ if (add_expected_oid(idx, entry->oid) < 0)
+ goto out;
+
+ break;
+ }
+ case GIT_OBJECT_COMMIT:
+ {
+ git_commit *commit = (git_commit *) object;
+ git_oid *parent_oid;
+ size_t i;
+
+ git_array_foreach(commit->parent_ids, i, parent_oid)
+ if (add_expected_oid(idx, parent_oid) < 0)
+ goto out;
+
+ if (add_expected_oid(idx, &commit->tree_id) < 0)
+ goto out;
+
+ break;
+ }
+ case GIT_OBJECT_TAG:
+ {
+ git_tag *tag = (git_tag *) object;
+
+ if (add_expected_oid(idx, &tag->target) < 0)
+ goto out;
+
+ break;
+ }
+ case GIT_OBJECT_BLOB:
+ default:
+ break;
+ }
+
+out:
+ git_object_free(object);
+
+ return error;
+}
+
static int store_object(git_indexer *idx)
{
int i, error;
- khiter_t k;
git_oid oid;
struct entry *entry;
- git_off_t entry_size;
+ off64_t entry_size;
struct git_pack_entry *pentry;
- git_off_t entry_start = idx->entry_start;
+ off64_t entry_start = idx->entry_start;
entry = git__calloc(1, sizeof(*entry));
- GITERR_CHECK_ALLOC(entry);
+ GIT_ERROR_CHECK_ALLOC(entry);
pentry = git__calloc(1, sizeof(struct git_pack_entry));
- GITERR_CHECK_ALLOC(pentry);
+ GIT_ERROR_CHECK_ALLOC(pentry);
- git_hash_final(&oid, &idx->hash_ctx);
+ if (git_hash_final(oid.id, &idx->hash_ctx)) {
+ git__free(pentry);
+ goto on_error;
+ }
entry_size = idx->off - entry_start;
if (entry_start > UINT31_MAX) {
entry->offset = UINT32_MAX;
entry->offset = (uint32_t)entry_start;
}
+ if (idx->do_verify) {
+ git_rawobj rawobj = {
+ idx->entry_data.ptr,
+ idx->entry_data.size,
+ idx->entry_type
+ };
+
+ if ((error = check_object_connectivity(idx, &rawobj)) < 0)
+ goto on_error;
+ }
+
git_oid_cpy(&pentry->sha1, &oid);
pentry->offset = entry_start;
- k = kh_put(oid, idx->pack->idx_cache, &pentry->sha1, &error);
- if (error == -1) {
+ if (git_oidmap_exists(idx->pack->idx_cache, &pentry->sha1)) {
+ git_error_set(GIT_ERROR_INDEXER, "duplicate object %s found in pack", git_oid_tostr_s(&pentry->sha1));
git__free(pentry);
- giterr_set_oom();
goto on_error;
}
- if (error == 0) {
- giterr_set(GITERR_INDEXER, "duplicate object %s found in pack", git_oid_tostr_s(&pentry->sha1));
+ if ((error = git_oidmap_set(idx->pack->idx_cache, &pentry->sha1, pentry)) < 0) {
git__free(pentry);
+ git_error_set_oom();
goto on_error;
}
-
- kh_value(idx->pack->idx_cache, k) = pentry;
-
git_oid_cpy(&entry->oid, &oid);
if (crc_object(&entry->crc, &idx->pack->mwf, entry_start, entry_size) < 0)
GIT_INLINE(bool) has_entry(git_indexer *idx, git_oid *id)
{
- khiter_t k;
- k = kh_get(oid, idx->pack->idx_cache, id);
- return (k != kh_end(idx->pack->idx_cache));
+ return git_oidmap_exists(idx->pack->idx_cache, id);
}
-static int save_entry(git_indexer *idx, struct entry *entry, struct git_pack_entry *pentry, git_off_t entry_start)
+static int save_entry(git_indexer *idx, struct entry *entry, struct git_pack_entry *pentry, off64_t entry_start)
{
- int i, error;
- khiter_t k;
+ int i;
if (entry_start > UINT31_MAX) {
entry->offset = UINT32_MAX;
}
pentry->offset = entry_start;
- k = kh_put(oid, idx->pack->idx_cache, &pentry->sha1, &error);
- if (error <= 0) {
- giterr_set(GITERR_INDEXER, "cannot insert object into pack");
+ if (git_oidmap_exists(idx->pack->idx_cache, &pentry->sha1) ||
+ git_oidmap_set(idx->pack->idx_cache, &pentry->sha1, pentry) < 0) {
+ git_error_set(GIT_ERROR_INDEXER, "cannot insert object into pack");
return -1;
}
- kh_value(idx->pack->idx_cache, k) = pentry;
-
/* Add the object to the list */
if (git_vector_insert(&idx->objects, entry) < 0)
return -1;
return 0;
}
-static int hash_and_save(git_indexer *idx, git_rawobj *obj, git_off_t entry_start)
+static int hash_and_save(git_indexer *idx, git_rawobj *obj, off64_t entry_start)
{
git_oid oid;
size_t entry_size;
struct git_pack_entry *pentry = NULL;
entry = git__calloc(1, sizeof(*entry));
- GITERR_CHECK_ALLOC(entry);
+ GIT_ERROR_CHECK_ALLOC(entry);
if (git_odb__hashobj(&oid, obj) < 0) {
- giterr_set(GITERR_INDEXER, "failed to hash object");
+ git_error_set(GIT_ERROR_INDEXER, "failed to hash object");
goto on_error;
}
pentry = git__calloc(1, sizeof(struct git_pack_entry));
- GITERR_CHECK_ALLOC(pentry);
+ GIT_ERROR_CHECK_ALLOC(pentry);
git_oid_cpy(&pentry->sha1, &oid);
git_oid_cpy(&entry->oid, &oid);
return -1;
}
-static int do_progress_callback(git_indexer *idx, git_transfer_progress *stats)
+static int do_progress_callback(git_indexer *idx, git_indexer_progress *stats)
{
if (idx->progress_cb)
- return giterr_set_after_callback_function(
+ return git_error_set_after_callback_function(
idx->progress_cb(stats, idx->progress_payload),
"indexer progress");
return 0;
idx->inbuf_len += size - to_expell;
}
-static int write_at(git_indexer *idx, const void *data, git_off_t offset, size_t size)
+#if defined(NO_MMAP) || !defined(GIT_WIN32)
+
+static int write_at(git_indexer *idx, const void *data, off64_t offset, size_t size)
+{
+ size_t remaining_size = size;
+ const char *ptr = (const char *)data;
+
+ /* Handle data size larger that ssize_t */
+ while (remaining_size > 0) {
+ ssize_t nb;
+ HANDLE_EINTR(nb, p_pwrite(idx->pack->mwf.fd, (void *)ptr,
+ remaining_size, offset));
+ if (nb <= 0)
+ return -1;
+
+ ptr += nb;
+ offset += nb;
+ remaining_size -= nb;
+ }
+
+ return 0;
+}
+
+static int append_to_pack(git_indexer *idx, const void *data, size_t size)
+{
+ if (write_at(idx, data, idx->pack->mwf.size, size) < 0) {
+ git_error_set(GIT_ERROR_OS, "cannot extend packfile '%s'", idx->pack->pack_name);
+ return -1;
+ }
+
+ return 0;
+}
+
+#else
+
+/*
+ * Windows may keep different views to a networked file for the mmap- and
+ * open-accessed versions of a file, so any writes done through
+ * `write(2)`/`pwrite(2)` may not be reflected on the data that `mmap(2)` is
+ * able to read.
+ */
+
+static int write_at(git_indexer *idx, const void *data, off64_t offset, size_t size)
{
git_file fd = idx->pack->mwf.fd;
size_t mmap_alignment;
size_t page_offset;
- git_off_t page_start;
+ off64_t page_start;
unsigned char *map_data;
git_map map;
int error;
- assert(data && size);
+ GIT_ASSERT_ARG(data);
+ GIT_ASSERT_ARG(size);
if ((error = git__mmap_alignment(&mmap_alignment)) < 0)
return error;
static int append_to_pack(git_indexer *idx, const void *data, size_t size)
{
- git_off_t new_size;
+ off64_t new_size;
size_t mmap_alignment;
size_t page_offset;
- git_off_t page_start;
- git_off_t current_size = idx->pack->mwf.size;
- int fd = idx->pack->mwf.fd;
+ off64_t page_start;
+ off64_t current_size = idx->pack->mwf.size;
int error;
if (!size)
page_offset = new_size % mmap_alignment;
page_start = new_size - page_offset;
- if (p_lseek(fd, page_start + mmap_alignment - 1, SEEK_SET) < 0 ||
- p_write(idx->pack->mwf.fd, data, 1) < 0) {
- giterr_set(GITERR_OS, "cannot extend packfile '%s'", idx->pack->pack_name);
+ if (p_pwrite(idx->pack->mwf.fd, data, 1, page_start + mmap_alignment - 1) < 0) {
+ git_error_set(GIT_ERROR_OS, "cannot extend packfile '%s'", idx->pack->pack_name);
return -1;
}
return write_at(idx, data, idx->pack->mwf.size, size);
}
-int git_indexer_append(git_indexer *idx, const void *data, size_t size, git_transfer_progress *stats)
+#endif
+
+static int read_stream_object(git_indexer *idx, git_indexer_progress *stats)
+{
+ git_packfile_stream *stream = &idx->stream;
+ off64_t entry_start = idx->off;
+ size_t entry_size;
+ git_object_t type;
+ git_mwindow *w = NULL;
+ int error;
+
+ if (idx->pack->mwf.size <= idx->off + 20)
+ return GIT_EBUFS;
+
+ if (!idx->have_stream) {
+ error = git_packfile_unpack_header(&entry_size, &type, idx->pack, &w, &idx->off);
+ if (error == GIT_EBUFS) {
+ idx->off = entry_start;
+ return error;
+ }
+ if (error < 0)
+ return error;
+
+ git_mwindow_close(&w);
+ idx->entry_start = entry_start;
+ git_hash_init(&idx->hash_ctx);
+ git_str_clear(&idx->entry_data);
+
+ if (type == GIT_OBJECT_REF_DELTA || type == GIT_OBJECT_OFS_DELTA) {
+ error = advance_delta_offset(idx, type);
+ if (error == GIT_EBUFS) {
+ idx->off = entry_start;
+ return error;
+ }
+ if (error < 0)
+ return error;
+
+ idx->have_delta = 1;
+ } else {
+ idx->have_delta = 0;
+
+ error = hash_header(&idx->hash_ctx, entry_size, type);
+ if (error < 0)
+ return error;
+ }
+
+ idx->have_stream = 1;
+ idx->entry_type = type;
+
+ error = git_packfile_stream_open(stream, idx->pack, idx->off);
+ if (error < 0)
+ return error;
+ }
+
+ if (idx->have_delta) {
+ error = read_object_stream(idx, stream);
+ } else {
+ error = hash_object_stream(idx, stream);
+ }
+
+ idx->off = stream->curpos;
+ if (error == GIT_EBUFS)
+ return error;
+
+ /* We want to free the stream reasorces no matter what here */
+ idx->have_stream = 0;
+ git_packfile_stream_dispose(stream);
+
+ if (error < 0)
+ return error;
+
+ if (idx->have_delta) {
+ error = store_delta(idx);
+ } else {
+ error = store_object(idx);
+ }
+
+ if (error < 0)
+ return error;
+
+ if (!idx->have_delta) {
+ stats->indexed_objects++;
+ }
+ stats->received_objects++;
+
+ if ((error = do_progress_callback(idx, stats)) != 0)
+ return error;
+
+ return 0;
+}
+
+int git_indexer_append(git_indexer *idx, const void *data, size_t size, git_indexer_progress *stats)
{
int error = -1;
- size_t processed;
struct git_pack_header *hdr = &idx->hdr;
git_mwindow_file *mwf = &idx->pack->mwf;
- assert(idx && data && stats);
-
- processed = stats->indexed_objects;
+ GIT_ASSERT_ARG(idx);
+ GIT_ASSERT_ARG(data);
+ GIT_ASSERT_ARG(stats);
if ((error = append_to_pack(idx, data, size)) < 0)
return error;
idx->nr_objects = ntohl(hdr->hdr_entries);
idx->off = sizeof(struct git_pack_header);
- /* for now, limit to 2^32 objects */
- assert(idx->nr_objects == (size_t)((unsigned int)idx->nr_objects));
- if (idx->nr_objects == (size_t)((unsigned int)idx->nr_objects))
+ if (idx->nr_objects <= git_indexer__max_objects) {
total_objects = (unsigned int)idx->nr_objects;
- else
- total_objects = UINT_MAX;
+ } else {
+ git_error_set(GIT_ERROR_INDEXER, "too many objects");
+ return -1;
+ }
- idx->pack->idx_cache = git_oidmap_alloc();
- GITERR_CHECK_ALLOC(idx->pack->idx_cache);
+ if (git_oidmap_new(&idx->pack->idx_cache) < 0)
+ return -1;
idx->pack->has_cache = 1;
if (git_vector_init(&idx->objects, total_objects, objects_cmp) < 0)
stats->local_objects = 0;
stats->total_deltas = 0;
stats->indexed_deltas = 0;
- processed = stats->indexed_objects = 0;
+ stats->indexed_objects = 0;
stats->total_objects = total_objects;
if ((error = do_progress_callback(idx, stats)) != 0)
/* Now that we have data in the pack, let's try to parse it */
/* As the file grows any windows we try to use will be out of date */
- git_mwindow_free_all(mwf);
-
- while (processed < idx->nr_objects) {
- git_packfile_stream *stream = &idx->stream;
- git_off_t entry_start = idx->off;
- size_t entry_size;
- git_otype type;
- git_mwindow *w = NULL;
-
- if (idx->pack->mwf.size <= idx->off + 20)
- return 0;
-
- if (!idx->have_stream) {
- error = git_packfile_unpack_header(&entry_size, &type, mwf, &w, &idx->off);
- if (error == GIT_EBUFS) {
- idx->off = entry_start;
- return 0;
- }
- if (error < 0)
- goto on_error;
-
- git_mwindow_close(&w);
- idx->entry_start = entry_start;
- git_hash_init(&idx->hash_ctx);
-
- if (type == GIT_OBJ_REF_DELTA || type == GIT_OBJ_OFS_DELTA) {
- error = advance_delta_offset(idx, type);
- if (error == GIT_EBUFS) {
- idx->off = entry_start;
- return 0;
- }
- if (error < 0)
- goto on_error;
-
- idx->have_delta = 1;
- } else {
- idx->have_delta = 0;
- hash_header(&idx->hash_ctx, entry_size, type);
- }
-
- idx->have_stream = 1;
+ if ((error = git_mwindow_free_all(mwf)) < 0)
+ goto on_error;
- error = git_packfile_stream_open(stream, idx->pack, idx->off);
- if (error < 0)
+ while (stats->indexed_objects < idx->nr_objects) {
+ if ((error = read_stream_object(idx, stats)) != 0) {
+ if (error == GIT_EBUFS)
+ break;
+ else
goto on_error;
}
-
- if (idx->have_delta) {
- error = read_object_stream(idx, stream);
- } else {
- error = hash_object_stream(idx, stream);
- }
-
- idx->off = stream->curpos;
- if (error == GIT_EBUFS)
- return 0;
-
- /* We want to free the stream reasorces no matter what here */
- idx->have_stream = 0;
- git_packfile_stream_free(stream);
-
- if (error < 0)
- goto on_error;
-
- if (idx->have_delta) {
- error = store_delta(idx);
- } else {
- error = store_object(idx);
- }
-
- if (error < 0)
- goto on_error;
-
- if (!idx->have_delta) {
- stats->indexed_objects = (unsigned int)++processed;
- }
- stats->received_objects++;
-
- if ((error = do_progress_callback(idx, stats)) != 0)
- goto on_error;
}
return 0;
return error;
}
-static int index_path(git_buf *path, git_indexer *idx, const char *suffix)
+static int index_path(git_str *path, git_indexer *idx, const char *suffix)
{
const char prefix[] = "pack-";
size_t slash = (size_t)path->size;
while (slash > 0 && path->ptr[slash - 1] != '/')
slash--;
- if (git_buf_grow(path, slash + 1 + strlen(prefix) +
+ if (git_str_grow(path, slash + 1 + strlen(prefix) +
GIT_OID_HEXSZ + strlen(suffix) + 1) < 0)
return -1;
- git_buf_truncate(path, slash);
- git_buf_puts(path, prefix);
- git_oid_fmt(path->ptr + git_buf_len(path), &idx->hash);
- path->size += GIT_OID_HEXSZ;
- git_buf_puts(path, suffix);
+ git_str_truncate(path, slash);
+ git_str_puts(path, prefix);
+ git_str_puts(path, idx->name);
+ git_str_puts(path, suffix);
- return git_buf_oom(path) ? -1 : 0;
+ return git_str_oom(path) ? -1 : 0;
}
/**
* Rewind the packfile by the trailer, as we might need to fix the
* packfile by injecting objects at the tail and must overwrite it.
*/
-static void seek_back_trailer(git_indexer *idx)
+static int seek_back_trailer(git_indexer *idx)
{
idx->pack->mwf.size -= GIT_OID_RAWSZ;
- git_mwindow_free_all(&idx->pack->mwf);
+ return git_mwindow_free_all(&idx->pack->mwf);
}
static int inject_object(git_indexer *idx, git_oid *id)
{
- git_odb_object *obj;
- struct entry *entry;
+ git_odb_object *obj = NULL;
+ struct entry *entry = NULL;
struct git_pack_entry *pentry = NULL;
- git_oid foo = {{0}};
+ unsigned char empty_checksum[GIT_HASH_SHA1_SIZE] = {0};
unsigned char hdr[64];
- git_buf buf = GIT_BUF_INIT;
- git_off_t entry_start;
+ git_str buf = GIT_STR_INIT;
+ off64_t entry_start;
const void *data;
size_t len, hdr_len;
+ size_t checksum_size = GIT_HASH_SHA1_SIZE;
int error;
- seek_back_trailer(idx);
+ if ((error = seek_back_trailer(idx)) < 0)
+ goto cleanup;
+
entry_start = idx->pack->mwf.size;
- if (git_odb_read(&obj, idx->odb, id) < 0) {
- giterr_set(GITERR_INDEXER, "missing delta bases");
- return -1;
+ if ((error = git_odb_read(&obj, idx->odb, id)) < 0) {
+ git_error_set(GIT_ERROR_INDEXER, "missing delta bases");
+ goto cleanup;
}
data = git_odb_object_data(obj);
len = git_odb_object_size(obj);
entry = git__calloc(1, sizeof(*entry));
- GITERR_CHECK_ALLOC(entry);
+ GIT_ERROR_CHECK_ALLOC(entry);
entry->crc = crc32(0L, Z_NULL, 0);
/* Write out the object header */
- hdr_len = git_packfile__object_header(hdr, len, git_odb_object_type(obj));
- if ((error = append_to_pack(idx, hdr, hdr_len)) < 0)
+ if ((error = git_packfile__object_header(&hdr_len, hdr, len, git_odb_object_type(obj))) < 0 ||
+ (error = append_to_pack(idx, hdr, hdr_len)) < 0)
goto cleanup;
idx->pack->mwf.size += hdr_len;
idx->pack->mwf.size += buf.size;
entry->crc = htonl(crc32(entry->crc, (unsigned char *)buf.ptr, (uInt)buf.size));
- git_buf_free(&buf);
+ git_str_dispose(&buf);
/* Write a fake trailer so the pack functions play ball */
- if ((error = append_to_pack(idx, &foo, GIT_OID_RAWSZ)) < 0)
+ if ((error = append_to_pack(idx, empty_checksum, checksum_size)) < 0)
goto cleanup;
idx->pack->mwf.size += GIT_OID_RAWSZ;
pentry = git__calloc(1, sizeof(struct git_pack_entry));
- GITERR_CHECK_ALLOC(pentry);
+ GIT_ERROR_CHECK_ALLOC(pentry);
git_oid_cpy(&pentry->sha1, id);
git_oid_cpy(&entry->oid, id);
return error;
}
-static int fix_thin_pack(git_indexer *idx, git_transfer_progress *stats)
+static int fix_thin_pack(git_indexer *idx, git_indexer_progress *stats)
{
int error, found_ref_delta = 0;
unsigned int i;
struct delta_info *delta;
size_t size;
- git_otype type;
+ git_object_t type;
git_mwindow *w = NULL;
- git_off_t curpos = 0;
+ off64_t curpos = 0;
unsigned char *base_info;
unsigned int left = 0;
git_oid base;
- assert(git_vector_length(&idx->deltas) > 0);
+ GIT_ASSERT(git_vector_length(&idx->deltas) > 0);
if (idx->odb == NULL) {
- giterr_set(GITERR_INDEXER, "cannot fix a thin pack without an ODB");
+ git_error_set(GIT_ERROR_INDEXER, "cannot fix a thin pack without an ODB");
return -1;
}
continue;
curpos = delta->delta_off;
- error = git_packfile_unpack_header(&size, &type, &idx->pack->mwf, &w, &curpos);
+ error = git_packfile_unpack_header(&size, &type, idx->pack, &w, &curpos);
if (error < 0)
return error;
- if (type == GIT_OBJ_REF_DELTA) {
+ if (type == GIT_OBJECT_REF_DELTA) {
found_ref_delta = 1;
break;
}
}
if (!found_ref_delta) {
- giterr_set(GITERR_INDEXER, "no REF_DELTA found, cannot inject object");
+ git_error_set(GIT_ERROR_INDEXER, "no REF_DELTA found, cannot inject object");
return -1;
}
/* curpos now points to the base information, which is an OID */
base_info = git_mwindow_open(&idx->pack->mwf, &w, curpos, GIT_OID_RAWSZ, &left);
if (base_info == NULL) {
- giterr_set(GITERR_INDEXER, "failed to map delta information");
+ git_error_set(GIT_ERROR_INDEXER, "failed to map delta information");
return -1;
}
return 0;
}
-static int resolve_deltas(git_indexer *idx, git_transfer_progress *stats)
+static int resolve_deltas(git_indexer *idx, git_indexer_progress *stats)
{
unsigned int i;
+ int error;
struct delta_info *delta;
int progressed = 0, non_null = 0, progress_cb_result;
progressed = 0;
non_null = 0;
git_vector_foreach(&idx->deltas, i, delta) {
- git_rawobj obj = {NULL};
+ git_rawobj obj = {0};
if (!delta)
continue;
non_null = 1;
idx->off = delta->delta_off;
- if (git_packfile_unpack(&obj, idx->pack, &idx->off) < 0)
+ if ((error = git_packfile_unpack(&obj, idx->pack, &idx->off)) < 0) {
+ if (error == GIT_PASSTHROUGH) {
+ /* We have not seen the base object, we'll try again later. */
+ continue;
+ }
+ return -1;
+ }
+
+ if (idx->do_verify && check_object_connectivity(idx, &obj) < 0)
+ /* TODO: error? continue? */
continue;
if (hash_and_save(idx, &obj, delta->delta_off) < 0)
return 0;
}
-static int update_header_and_rehash(git_indexer *idx, git_transfer_progress *stats)
+static int update_header_and_rehash(git_indexer *idx, git_indexer_progress *stats)
{
void *ptr;
size_t chunk = 1024*1024;
- git_off_t hashed = 0;
+ off64_t hashed = 0;
git_mwindow *w = NULL;
git_mwindow_file *mwf;
unsigned int left;
git_hash_init(&idx->trailer);
- /* Update the header to include the numer of local objects we injected */
+ /* Update the header to include the number of local objects we injected */
idx->hdr.hdr_entries = htonl(stats->total_objects + stats->local_objects);
if (write_at(idx, &idx->hdr, 0, sizeof(struct git_pack_header)) < 0)
return -1;
* hash_partially() keep the existing trailer out of the
* calculation.
*/
- git_mwindow_free_all(mwf);
+ if (git_mwindow_free_all(mwf) < 0)
+ return -1;
+
idx->inbuf_len = 0;
while (hashed < mwf->size) {
ptr = git_mwindow_open(mwf, &w, hashed, chunk, &left);
return 0;
}
-int git_indexer_commit(git_indexer *idx, git_transfer_progress *stats)
+int git_indexer_commit(git_indexer *idx, git_indexer_progress *stats)
{
git_mwindow *w = NULL;
unsigned int i, long_offsets = 0, left;
int error;
struct git_pack_idx_header hdr;
- git_buf filename = GIT_BUF_INIT;
+ git_str filename = GIT_STR_INIT;
struct entry *entry;
- git_oid trailer_hash, file_hash;
- git_hash_ctx ctx;
+ unsigned char checksum[GIT_HASH_SHA1_SIZE];
git_filebuf index_file = {0};
void *packfile_trailer;
+ size_t checksum_size = GIT_HASH_SHA1_SIZE;
+ bool mismatch;
if (!idx->parsed_header) {
- giterr_set(GITERR_INDEXER, "incomplete pack header");
+ git_error_set(GIT_ERROR_INDEXER, "incomplete pack header");
return -1;
}
- if (git_hash_ctx_init(&ctx) < 0)
- return -1;
-
/* Test for this before resolve_deltas(), as it plays with idx->off */
- if (idx->off + 20 < idx->pack->mwf.size) {
- giterr_set(GITERR_INDEXER, "unexpected data at the end of the pack");
+ if (idx->off + (ssize_t)checksum_size < idx->pack->mwf.size) {
+ git_error_set(GIT_ERROR_INDEXER, "unexpected data at the end of the pack");
+ return -1;
+ }
+ if (idx->off + (ssize_t)checksum_size > idx->pack->mwf.size) {
+ git_error_set(GIT_ERROR_INDEXER, "missing trailer at the end of the pack");
return -1;
}
- packfile_trailer = git_mwindow_open(&idx->pack->mwf, &w, idx->pack->mwf.size - GIT_OID_RAWSZ, GIT_OID_RAWSZ, &left);
+ packfile_trailer = git_mwindow_open(&idx->pack->mwf, &w, idx->pack->mwf.size - checksum_size, checksum_size, &left);
if (packfile_trailer == NULL) {
git_mwindow_close(&w);
goto on_error;
}
/* Compare the packfile trailer as it was sent to us and what we calculated */
- git_oid_fromraw(&file_hash, packfile_trailer);
+ git_hash_final(checksum, &idx->trailer);
+ mismatch = !!memcmp(checksum, packfile_trailer, checksum_size);
git_mwindow_close(&w);
- git_hash_final(&trailer_hash, &idx->trailer);
- if (git_oid_cmp(&file_hash, &trailer_hash)) {
- giterr_set(GITERR_INDEXER, "packfile trailer mismatch");
+ if (mismatch) {
+ git_error_set(GIT_ERROR_INDEXER, "packfile trailer mismatch");
return -1;
}
return error;
if (stats->indexed_objects != stats->total_objects) {
- giterr_set(GITERR_INDEXER, "early EOF");
+ git_error_set(GIT_ERROR_INDEXER, "early EOF");
return -1;
}
if (update_header_and_rehash(idx, stats) < 0)
return -1;
- git_hash_final(&trailer_hash, &idx->trailer);
- write_at(idx, &trailer_hash, idx->pack->mwf.size - GIT_OID_RAWSZ, GIT_OID_RAWSZ);
+ git_hash_final(checksum, &idx->trailer);
+ write_at(idx, checksum, idx->pack->mwf.size - checksum_size, checksum_size);
+ }
+
+ /*
+ * Is the resulting graph fully connected or are we still
+ * missing some objects? In the second case, we can
+ * bail out due to an incomplete and thus corrupt
+ * packfile.
+ */
+ if (git_oidmap_size(idx->expected_oids) > 0) {
+ git_error_set(GIT_ERROR_INDEXER, "packfile is missing %"PRIuZ" objects",
+ git_oidmap_size(idx->expected_oids));
+ return -1;
}
git_vector_sort(&idx->objects);
- git_buf_sets(&filename, idx->pack->pack_name);
- git_buf_shorten(&filename, strlen("pack"));
- git_buf_puts(&filename, "idx");
- if (git_buf_oom(&filename))
+ /* Use the trailer hash as the pack file name to ensure
+ * files with different contents have different names */
+ memcpy(idx->checksum, checksum, checksum_size);
+ if (git_hash_fmt(idx->name, checksum, checksum_size) < 0)
+ return -1;
+
+ git_str_sets(&filename, idx->pack->pack_name);
+ git_str_shorten(&filename, strlen("pack"));
+ git_str_puts(&filename, "idx");
+ if (git_str_oom(&filename))
return -1;
if (git_filebuf_open(&index_file, filename.ptr,
- GIT_FILEBUF_HASH_CONTENTS, idx->mode) < 0)
+ GIT_FILEBUF_HASH_CONTENTS |
+ (idx->do_fsync ? GIT_FILEBUF_FSYNC : 0),
+ idx->mode) < 0)
goto on_error;
/* Write out the header */
/* Write out the object names (SHA-1 hashes) */
git_vector_foreach(&idx->objects, i, entry) {
git_filebuf_write(&index_file, &entry->oid, sizeof(git_oid));
- git_hash_update(&ctx, &entry->oid, GIT_OID_RAWSZ);
}
- git_hash_final(&idx->hash, &ctx);
/* Write out the CRC32 values */
git_vector_foreach(&idx->objects, i, entry) {
}
/* Write out the packfile trailer to the index */
- if (git_filebuf_write(&index_file, &trailer_hash, GIT_OID_RAWSZ) < 0)
+ if (git_filebuf_write(&index_file, checksum, checksum_size) < 0)
goto on_error;
/* Write out the hash of the idx */
- if (git_filebuf_hash(&trailer_hash, &index_file) < 0)
+ if (git_filebuf_hash(checksum, &index_file) < 0)
goto on_error;
- git_filebuf_write(&index_file, &trailer_hash, sizeof(git_oid));
+ git_filebuf_write(&index_file, checksum, checksum_size);
/* Figure out what the final name should be */
if (index_path(&filename, idx, ".idx") < 0)
if (git_filebuf_commit_at(&index_file, filename.ptr) < 0)
goto on_error;
- git_mwindow_free_all(&idx->pack->mwf);
+ if (git_mwindow_free_all(&idx->pack->mwf) < 0)
+ goto on_error;
- /* Truncate file to undo rounding up to next page_size in append_to_pack */
+#if !defined(NO_MMAP) && defined(GIT_WIN32)
+ /*
+ * Some non-Windows remote filesystems fail when truncating files if the
+ * file permissions change after opening the file (done by p_mkstemp).
+ *
+ * Truncation is only needed when mmap is used to undo rounding up to next
+ * page_size in append_to_pack.
+ */
if (p_ftruncate(idx->pack->mwf.fd, idx->pack->mwf.size) < 0) {
- giterr_set(GITERR_OS, "failed to truncate pack file '%s'", idx->pack->pack_name);
+ git_error_set(GIT_ERROR_OS, "failed to truncate pack file '%s'", idx->pack->pack_name);
return -1;
}
+#endif
+
+ if (idx->do_fsync && p_fsync(idx->pack->mwf.fd) < 0) {
+ git_error_set(GIT_ERROR_OS, "failed to fsync packfile");
+ goto on_error;
+ }
/* We need to close the descriptor here so Windows doesn't choke on commit_at */
if (p_close(idx->pack->mwf.fd) < 0) {
- giterr_set(GITERR_OS, "failed to close packfile");
+ git_error_set(GIT_ERROR_OS, "failed to close packfile");
goto on_error;
}
goto on_error;
/* And don't forget to rename the packfile to its new place. */
- p_rename(idx->pack->pack_name, git_buf_cstr(&filename));
+ if (p_rename(idx->pack->pack_name, git_str_cstr(&filename)) < 0)
+ goto on_error;
+
+ /* And fsync the parent directory if we're asked to. */
+ if (idx->do_fsync &&
+ git_futils_fsync_parent(git_str_cstr(&filename)) < 0)
+ goto on_error;
+
idx->pack_committed = 1;
- git_buf_free(&filename);
- git_hash_ctx_cleanup(&ctx);
+ git_str_dispose(&filename);
return 0;
on_error:
git_mwindow_free_all(&idx->pack->mwf);
git_filebuf_cleanup(&index_file);
- git_buf_free(&filename);
- git_hash_ctx_cleanup(&ctx);
+ git_str_dispose(&filename);
return -1;
}
void git_indexer_free(git_indexer *idx)
{
+ const git_oid *key;
+ git_oid *value;
+ size_t iter;
+
if (idx == NULL)
return;
+ if (idx->have_stream)
+ git_packfile_stream_dispose(&idx->stream);
+
git_vector_free_deep(&idx->objects);
if (idx->pack->idx_cache) {
struct git_pack_entry *pentry;
- kh_foreach_value(
- idx->pack->idx_cache, pentry, { git__free(pentry); });
+ git_oidmap_foreach_value(idx->pack->idx_cache, pentry, {
+ git__free(pentry);
+ });
git_oidmap_free(idx->pack->idx_cache);
}
git_vector_free_deep(&idx->deltas);
- if (!git_mutex_lock(&git__mwindow_mutex)) {
- if (!idx->pack_committed)
- git_packfile_close(idx->pack, true);
+ git_packfile_free(idx->pack, !idx->pack_committed);
- git_packfile_free(idx->pack);
- git_mutex_unlock(&git__mwindow_mutex);
- }
+ iter = 0;
+ while (git_oidmap_iterate((void **) &value, idx->expected_oids, &iter, &key) == 0)
+ git__free(value);
git_hash_ctx_cleanup(&idx->trailer);
git_hash_ctx_cleanup(&idx->hash_ctx);
+ git_str_dispose(&idx->entry_data);
+ git_oidmap_free(idx->expected_oids);
git__free(idx);
}