]> git.proxmox.com Git - libgit2.git/blobdiff - src/midx.c
New upstream version 1.3.0+dfsg.1
[libgit2.git] / src / midx.c
index 21cfff497cfab7e670f48d85ab5dc638a9503278..6a885eddca57a17984f88f6670f949b242bcfc32 100644 (file)
@@ -7,13 +7,15 @@
 
 #include "midx.h"
 
+#include "array.h"
 #include "buffer.h"
+#include "filebuf.h"
 #include "futils.h"
 #include "hash.h"
 #include "odb.h"
 #include "pack.h"
-
-#define GIT_MIDX_FILE_MODE 0444
+#include "path.h"
+#include "repository.h"
 
 #define MIDX_SIGNATURE 0x4d494458 /* "MIDX" */
 #define MIDX_VERSION 1
@@ -38,6 +40,8 @@ struct git_midx_chunk {
        size_t length;
 };
 
+typedef int (*midx_write_cb)(const char *buf, size_t size, void *cb_data);
+
 static int midx_error(const char *message)
 {
        git_error_set(GIT_ERROR_ODB, "invalid multi-pack-index file - %s", message);
@@ -116,7 +120,7 @@ static int midx_parse_oid_lookup(
                return midx_error("missing OID Lookup chunk");
        if (chunk_oid_lookup->length == 0)
                return midx_error("empty OID Lookup chunk");
-       if (chunk_oid_lookup->length != idx->num_objects * 20)
+       if (chunk_oid_lookup->length != idx->num_objects * GIT_OID_RAWSZ)
                return midx_error("OID Lookup chunk has wrong length");
 
        idx->oid_lookup = oid = (git_oid *)(data + chunk_oid_lookup->offset);
@@ -181,9 +185,9 @@ int git_midx_parse(
                                         chunk_object_offsets = {0},
                                         chunk_object_large_offsets = {0};
 
-       assert(idx);
+       GIT_ASSERT_ARG(idx);
 
-       if (size < sizeof(struct git_midx_header) + 20)
+       if (size < sizeof(struct git_midx_header) + GIT_OID_RAWSZ)
                return midx_error("multi-pack index is too short");
 
        hdr = ((struct git_midx_header *)data);
@@ -203,7 +207,7 @@ int git_midx_parse(
        last_chunk_offset =
                        sizeof(struct git_midx_header) +
                        (1 + hdr->chunks) * 12;
-       trailer_offset = size - 20;
+       trailer_offset = size - GIT_OID_RAWSZ;
        if (trailer_offset < last_chunk_offset)
                return midx_error("wrong index size");
        git_oid_cpy(&idx->checksum, (git_oid *)(data + trailer_offset));
@@ -309,6 +313,10 @@ int git_midx_open(
        idx = git__calloc(1, sizeof(git_midx_file));
        GIT_ERROR_CHECK_ALLOC(idx);
 
+       error = git_buf_sets(&idx->filename, path);
+       if (error < 0)
+               return error;
+
        error = git_futils_mmap_ro(&idx->index_map, fd, 0, idx_size);
        p_close(fd);
        if (error < 0) {
@@ -325,6 +333,41 @@ int git_midx_open(
        return 0;
 }
 
+bool git_midx_needs_refresh(
+               const git_midx_file *idx,
+               const char *path)
+{
+       git_file fd = -1;
+       struct stat st;
+       ssize_t bytes_read;
+       git_oid idx_checksum = {{0}};
+
+       /* TODO: properly open the file without access time using O_NOATIME */
+       fd = git_futils_open_ro(path);
+       if (fd < 0)
+               return true;
+
+       if (p_fstat(fd, &st) < 0) {
+               p_close(fd);
+               return true;
+       }
+
+       if (!S_ISREG(st.st_mode) ||
+           !git__is_sizet(st.st_size) ||
+           (size_t)st.st_size != idx->index_map.len) {
+               p_close(fd);
+               return true;
+       }
+
+       bytes_read = p_pread(fd, &idx_checksum, GIT_OID_RAWSZ, st.st_size - GIT_OID_RAWSZ);
+       p_close(fd);
+
+       if (bytes_read != GIT_OID_RAWSZ)
+               return true;
+
+       return !git_oid_equal(&idx_checksum, &idx->checksum);
+}
+
 int git_midx_entry_find(
                git_midx_entry *e,
                git_midx_file *idx,
@@ -338,12 +381,12 @@ int git_midx_entry_find(
        const unsigned char *object_offset;
        off64_t offset;
 
-       assert(idx);
+       GIT_ASSERT_ARG(idx);
 
        hi = ntohl(idx->oid_fanout[(int)short_oid->id[0]]);
        lo = ((short_oid->id[0] == 0x0) ? 0 : ntohl(idx->oid_fanout[(int)short_oid->id[0] - 1]));
 
-       pos = git_pack__lookup_sha1(idx->oid_lookup, 20, lo, hi, short_oid->id);
+       pos = git_pack__lookup_sha1(idx->oid_lookup, GIT_OID_RAWSZ, lo, hi, short_oid->id);
 
        if (pos >= 0) {
                /* An object matching exactly the oid was found */
@@ -399,13 +442,34 @@ int git_midx_entry_find(
        return 0;
 }
 
-void git_midx_close(git_midx_file *idx)
+int git_midx_foreach_entry(
+               git_midx_file *idx,
+               git_odb_foreach_cb cb,
+               void *data)
 {
-       assert(idx);
+       size_t i;
+       int error;
+
+       GIT_ASSERT_ARG(idx);
+
+       for (i = 0; i < idx->num_objects; ++i) {
+               if ((error = cb(&idx->oid_lookup[i], data)) != 0)
+                       return git_error_set_after_callback(error);
+       }
+
+       return error;
+}
+
+int git_midx_close(git_midx_file *idx)
+{
+       GIT_ASSERT_ARG(idx);
 
        if (idx->index_map.data)
                git_futils_mmap_free(&idx->index_map);
+
        git_vector_free(&idx->packfile_names);
+
+       return 0;
 }
 
 void git_midx_free(git_midx_file *idx)
@@ -413,6 +477,403 @@ void git_midx_free(git_midx_file *idx)
        if (!idx)
                return;
 
+       git_buf_dispose(&idx->filename);
        git_midx_close(idx);
        git__free(idx);
 }
+
+static int packfile__cmp(const void *a_, const void *b_)
+{
+       const struct git_pack_file *a = a_;
+       const struct git_pack_file *b = b_;
+
+       return strcmp(a->pack_name, b->pack_name);
+}
+
+int git_midx_writer_new(
+               git_midx_writer **out,
+               const char *pack_dir)
+{
+       git_midx_writer *w = git__calloc(1, sizeof(git_midx_writer));
+       GIT_ERROR_CHECK_ALLOC(w);
+
+       if (git_buf_sets(&w->pack_dir, pack_dir) < 0) {
+               git__free(w);
+               return -1;
+       }
+       git_path_squash_slashes(&w->pack_dir);
+
+       if (git_vector_init(&w->packs, 0, packfile__cmp) < 0) {
+               git_buf_dispose(&w->pack_dir);
+               git__free(w);
+               return -1;
+       }
+
+       *out = w;
+       return 0;
+}
+
+void git_midx_writer_free(git_midx_writer *w)
+{
+       struct git_pack_file *p;
+       size_t i;
+
+       if (!w)
+               return;
+
+       git_vector_foreach (&w->packs, i, p)
+               git_mwindow_put_pack(p);
+       git_vector_free(&w->packs);
+       git_buf_dispose(&w->pack_dir);
+       git__free(w);
+}
+
+int git_midx_writer_add(
+               git_midx_writer *w,
+               const char *idx_path)
+{
+       git_buf idx_path_buf = GIT_BUF_INIT;
+       int error;
+       struct git_pack_file *p;
+
+       error = git_path_prettify(&idx_path_buf, idx_path, git_buf_cstr(&w->pack_dir));
+       if (error < 0)
+               return error;
+
+       error = git_mwindow_get_pack(&p, git_buf_cstr(&idx_path_buf));
+       git_buf_dispose(&idx_path_buf);
+       if (error < 0)
+               return error;
+
+       error = git_vector_insert(&w->packs, p);
+       if (error < 0) {
+               git_mwindow_put_pack(p);
+               return error;
+       }
+
+       return 0;
+}
+
+typedef git_array_t(git_midx_entry) object_entry_array_t;
+
+struct object_entry_cb_state {
+       uint32_t pack_index;
+       object_entry_array_t *object_entries_array;
+};
+
+static int object_entry__cb(const git_oid *oid, off64_t offset, void *data)
+{
+       struct object_entry_cb_state *state = (struct object_entry_cb_state *)data;
+
+       git_midx_entry *entry = git_array_alloc(*state->object_entries_array);
+       GIT_ERROR_CHECK_ALLOC(entry);
+
+       git_oid_cpy(&entry->sha1, oid);
+       entry->offset = offset;
+       entry->pack_index = state->pack_index;
+
+       return 0;
+}
+
+static int object_entry__cmp(const void *a_, const void *b_)
+{
+       const git_midx_entry *a = (const git_midx_entry *)a_;
+       const git_midx_entry *b = (const git_midx_entry *)b_;
+
+       return git_oid_cmp(&a->sha1, &b->sha1);
+}
+
+static int write_offset(off64_t offset, midx_write_cb write_cb, void *cb_data)
+{
+       int error;
+       uint32_t word;
+
+       word = htonl((uint32_t)((offset >> 32) & 0xffffffffu));
+       error = write_cb((const char *)&word, sizeof(word), cb_data);
+       if (error < 0)
+               return error;
+       word = htonl((uint32_t)((offset >> 0) & 0xffffffffu));
+       error = write_cb((const char *)&word, sizeof(word), cb_data);
+       if (error < 0)
+               return error;
+
+       return 0;
+}
+
+static int write_chunk_header(int chunk_id, off64_t offset, midx_write_cb write_cb, void *cb_data)
+{
+       uint32_t word = htonl(chunk_id);
+       int error = write_cb((const char *)&word, sizeof(word), cb_data);
+       if (error < 0)
+               return error;
+       return write_offset(offset, write_cb, cb_data);
+
+       return 0;
+}
+
+static int midx_write_buf(const char *buf, size_t size, void *data)
+{
+       git_buf *b = (git_buf *)data;
+       return git_buf_put(b, buf, size);
+}
+
+struct midx_write_hash_context {
+       midx_write_cb write_cb;
+       void *cb_data;
+       git_hash_ctx *ctx;
+};
+
+static int midx_write_hash(const char *buf, size_t size, void *data)
+{
+       struct midx_write_hash_context *ctx = (struct midx_write_hash_context *)data;
+       int error;
+
+       error = git_hash_update(ctx->ctx, buf, size);
+       if (error < 0)
+               return error;
+
+       return ctx->write_cb(buf, size, ctx->cb_data);
+}
+
+static int midx_write(
+               git_midx_writer *w,
+               midx_write_cb write_cb,
+               void *cb_data)
+{
+       int error = 0;
+       size_t i;
+       struct git_pack_file *p;
+       struct git_midx_header hdr = {0};
+       uint32_t oid_fanout_count;
+       uint32_t object_large_offsets_count;
+       uint32_t oid_fanout[256];
+       off64_t offset;
+       git_buf packfile_names = GIT_BUF_INIT,
+               oid_lookup = GIT_BUF_INIT,
+               object_offsets = GIT_BUF_INIT,
+               object_large_offsets = GIT_BUF_INIT;
+       git_oid idx_checksum = {{0}};
+       git_midx_entry *entry;
+       object_entry_array_t object_entries_array = GIT_ARRAY_INIT;
+       git_vector object_entries = GIT_VECTOR_INIT;
+       git_hash_ctx ctx;
+       struct midx_write_hash_context hash_cb_data = {0};
+
+       hdr.signature = htonl(MIDX_SIGNATURE);
+       hdr.version = MIDX_VERSION;
+       hdr.object_id_version = MIDX_OBJECT_ID_VERSION;
+       hdr.base_midx_files = 0;
+
+       hash_cb_data.write_cb = write_cb;
+       hash_cb_data.cb_data = cb_data;
+       hash_cb_data.ctx = &ctx;
+
+       error = git_hash_ctx_init(&ctx);
+       if (error < 0)
+               return error;
+       cb_data = &hash_cb_data;
+       write_cb = midx_write_hash;
+
+       git_vector_sort(&w->packs);
+       git_vector_foreach (&w->packs, i, p) {
+               git_buf relative_index = GIT_BUF_INIT;
+               struct object_entry_cb_state state = {0};
+               size_t path_len;
+
+               state.pack_index = (uint32_t)i;
+               state.object_entries_array = &object_entries_array;
+
+               error = git_buf_sets(&relative_index, p->pack_name);
+               if (error < 0)
+                       goto cleanup;
+               error = git_path_make_relative(&relative_index, git_buf_cstr(&w->pack_dir));
+               if (error < 0) {
+                       git_buf_dispose(&relative_index);
+                       goto cleanup;
+               }
+               path_len = git_buf_len(&relative_index);
+               if (path_len <= strlen(".pack") || git__suffixcmp(git_buf_cstr(&relative_index), ".pack") != 0) {
+                       git_buf_dispose(&relative_index);
+                       git_error_set(GIT_ERROR_INVALID, "invalid packfile name: '%s'", p->pack_name);
+                       error = -1;
+                       goto cleanup;
+               }
+               path_len -= strlen(".pack");
+
+               git_buf_put(&packfile_names, git_buf_cstr(&relative_index), path_len);
+               git_buf_puts(&packfile_names, ".idx");
+               git_buf_putc(&packfile_names, '\0');
+               git_buf_dispose(&relative_index);
+
+               error = git_pack_foreach_entry_offset(p, object_entry__cb, &state);
+               if (error < 0)
+                       goto cleanup;
+       }
+
+       /* Sort the object entries. */
+       error = git_vector_init(&object_entries, git_array_size(object_entries_array), object_entry__cmp);
+       if (error < 0)
+               goto cleanup;
+       git_array_foreach (object_entries_array, i, entry) {
+               if ((error = git_vector_set(NULL, &object_entries, i, entry)) < 0)
+                       goto cleanup;
+       }
+       git_vector_set_sorted(&object_entries, 0);
+       git_vector_sort(&object_entries);
+       git_vector_uniq(&object_entries, NULL);
+
+       /* Pad the packfile names so it is a multiple of four. */
+       while (git_buf_len(&packfile_names) & 3)
+               git_buf_putc(&packfile_names, '\0');
+
+       /* Fill the OID Fanout table. */
+       oid_fanout_count = 0;
+       for (i = 0; i < 256; i++) {
+               while (oid_fanout_count < git_vector_length(&object_entries) &&
+                      ((const git_midx_entry *)git_vector_get(&object_entries, oid_fanout_count))->sha1.id[0] <= i)
+                       ++oid_fanout_count;
+               oid_fanout[i] = htonl(oid_fanout_count);
+       }
+
+       /* Fill the OID Lookup table. */
+       git_vector_foreach (&object_entries, i, entry) {
+               error = git_buf_put(&oid_lookup, (const char *)&entry->sha1, sizeof(entry->sha1));
+               if (error < 0)
+                       goto cleanup;
+       }
+
+       /* Fill the Object Offsets and Object Large Offsets tables. */
+       object_large_offsets_count = 0;
+       git_vector_foreach (&object_entries, i, entry) {
+               uint32_t word;
+
+               word = htonl((uint32_t)entry->pack_index);
+               error = git_buf_put(&object_offsets, (const char *)&word, sizeof(word));
+               if (error < 0)
+                       goto cleanup;
+               if (entry->offset >= 0x80000000l) {
+                       word = htonl(0x80000000u | object_large_offsets_count++);
+                       if ((error = write_offset(entry->offset, midx_write_buf, &object_large_offsets)) < 0)
+                               goto cleanup;
+               } else {
+                       word = htonl((uint32_t)entry->offset & 0x7fffffffu);
+               }
+
+               error = git_buf_put(&object_offsets, (const char *)&word, sizeof(word));
+               if (error < 0)
+                       goto cleanup;
+       }
+
+       /* Write the header. */
+       hdr.packfiles = htonl((uint32_t)git_vector_length(&w->packs));
+       hdr.chunks = 4;
+       if (git_buf_len(&object_large_offsets) > 0)
+               hdr.chunks++;
+       error = write_cb((const char *)&hdr, sizeof(hdr), cb_data);
+       if (error < 0)
+               goto cleanup;
+
+       /* Write the chunk headers. */
+       offset = sizeof(hdr) + (hdr.chunks + 1) * 12;
+       error = write_chunk_header(MIDX_PACKFILE_NAMES_ID, offset, write_cb, cb_data);
+       if (error < 0)
+               goto cleanup;
+       offset += git_buf_len(&packfile_names);
+       error = write_chunk_header(MIDX_OID_FANOUT_ID, offset, write_cb, cb_data);
+       if (error < 0)
+               goto cleanup;
+       offset += sizeof(oid_fanout);
+       error = write_chunk_header(MIDX_OID_LOOKUP_ID, offset, write_cb, cb_data);
+       if (error < 0)
+               goto cleanup;
+       offset += git_buf_len(&oid_lookup);
+       error = write_chunk_header(MIDX_OBJECT_OFFSETS_ID, offset, write_cb, cb_data);
+       if (error < 0)
+               goto cleanup;
+       offset += git_buf_len(&object_offsets);
+       if (git_buf_len(&object_large_offsets) > 0) {
+               error = write_chunk_header(MIDX_OBJECT_LARGE_OFFSETS_ID, offset, write_cb, cb_data);
+               if (error < 0)
+                       goto cleanup;
+               offset += git_buf_len(&object_large_offsets);
+       }
+       error = write_chunk_header(0, offset, write_cb, cb_data);
+       if (error < 0)
+               goto cleanup;
+
+       /* Write all the chunks. */
+       error = write_cb(git_buf_cstr(&packfile_names), git_buf_len(&packfile_names), cb_data);
+       if (error < 0)
+               goto cleanup;
+       error = write_cb((const char *)oid_fanout, sizeof(oid_fanout), cb_data);
+       if (error < 0)
+               goto cleanup;
+       error = write_cb(git_buf_cstr(&oid_lookup), git_buf_len(&oid_lookup), cb_data);
+       if (error < 0)
+               goto cleanup;
+       error = write_cb(git_buf_cstr(&object_offsets), git_buf_len(&object_offsets), cb_data);
+       if (error < 0)
+               goto cleanup;
+       error = write_cb(git_buf_cstr(&object_large_offsets), git_buf_len(&object_large_offsets), cb_data);
+       if (error < 0)
+               goto cleanup;
+
+       /* Finalize the checksum and write the trailer. */
+       error = git_hash_final(&idx_checksum, &ctx);
+       if (error < 0)
+               goto cleanup;
+       error = write_cb((const char *)&idx_checksum, sizeof(idx_checksum), cb_data);
+       if (error < 0)
+               goto cleanup;
+
+cleanup:
+       git_array_clear(object_entries_array);
+       git_vector_free(&object_entries);
+       git_buf_dispose(&packfile_names);
+       git_buf_dispose(&oid_lookup);
+       git_buf_dispose(&object_offsets);
+       git_buf_dispose(&object_large_offsets);
+       git_hash_ctx_cleanup(&ctx);
+       return error;
+}
+
+static int midx_write_filebuf(const char *buf, size_t size, void *data)
+{
+       git_filebuf *f = (git_filebuf *)data;
+       return git_filebuf_write(f, buf, size);
+}
+
+int git_midx_writer_commit(
+               git_midx_writer *w)
+{
+       int error;
+       int filebuf_flags = GIT_FILEBUF_DO_NOT_BUFFER;
+       git_buf midx_path = GIT_BUF_INIT;
+       git_filebuf output = GIT_FILEBUF_INIT;
+
+       error = git_buf_joinpath(&midx_path, git_buf_cstr(&w->pack_dir), "multi-pack-index");
+       if (error < 0)
+               return error;
+
+       if (git_repository__fsync_gitdir)
+               filebuf_flags |= GIT_FILEBUF_FSYNC;
+       error = git_filebuf_open(&output, git_buf_cstr(&midx_path), filebuf_flags, 0644);
+       git_buf_dispose(&midx_path);
+       if (error < 0)
+               return error;
+
+       error = midx_write(w, midx_write_filebuf, &output);
+       if (error < 0) {
+               git_filebuf_cleanup(&output);
+               return error;
+       }
+
+       return git_filebuf_commit(&output);
+}
+
+int git_midx_writer_dump(
+               git_buf *midx,
+               git_midx_writer *w)
+{
+       return midx_write(w, midx_write_buf, midx);
+}