From adc0327ab308696650b62b2a8ded75f788f7b297 Mon Sep 17 00:00:00 2001 From: Vicent Marti Date: Fri, 29 Oct 2010 00:44:15 +0300 Subject: [PATCH] Add git_odb_read_header method to the ODB As requested, git_odb_read_header looks up an object on the ODB, but loads only the header information (type & size) without loading any of the actual file contents in memory. It is significantly faster than doing a git_odb_read if you only need an object's information and not its contents. Signed-off-by: Vicent Marti --- src/git/odb.h | 19 ++ src/odb.c | 581 ++++++++++++++++++++++++++++++++++---------------- 2 files changed, 419 insertions(+), 181 deletions(-) diff --git a/src/git/odb.h b/src/git/odb.h index a132346c6..b4812424c 100644 --- a/src/git/odb.h +++ b/src/git/odb.h @@ -68,6 +68,25 @@ typedef struct { */ GIT_EXTERN(int) git_odb_read(git_rawobj *out, git_odb *db, const git_oid *id); +/** + * Read the header of an object from the database, without + * reading its full contents. + * + * Only the 'type' and 'len' fields of the git_rawobj structure + * are filled. The 'data' pointer will always be NULL. + * + * The raw object pointed by 'out' doesn't need to be manually + * closed with git_obj_close(). + * + * @param out object descriptor to populate upon reading. + * @param db database to search for the object in. + * @param id identity of the object to read. + * @return + * - GIT_SUCCESS if the object was read; + * - GIT_ENOTFOUND if the object is not in the database. + */ +GIT_EXTERN(int) git_odb_read_header(git_rawobj *out, git_odb *db, const git_oid *id); + /** * Read an object from the database using only pack files. * diff --git a/src/odb.c b/src/odb.c index 8fbcebd03..ff258b5db 100644 --- a/src/odb.c +++ b/src/odb.c @@ -33,6 +33,10 @@ #define GIT_PACK_NAME_MAX (5 + 40 + 1) +#define OBJ_LOCATION_NOTFOUND GIT_ENOTFOUND +#define OBJ_LOCATION_INPACK 1 +#define OBJ_LOCATION_LOOSE 2 + typedef struct { uint32_t n; unsigned char *oid; @@ -40,6 +44,7 @@ typedef struct { off_t size; } index_entry; + struct git_pack { git_odb *db; git_lck lock; @@ -148,6 +153,21 @@ static struct { { "REF_DELTA", 0 } /* 7 = GIT_OBJ_REF_DELTA */ }; +typedef union obj_location { + char loose_path[GIT_PATH_MAX]; + struct { + git_pack *ptr; + uint32_t n; + } pack; +} obj_location; + + +/*********************************************************** + * + * MISCELANEOUS HELPER FUNCTIONS + * + ***********************************************************/ + GIT_INLINE(uint32_t) decode32(void *b) { return ntohl(*((uint32_t *)b)); @@ -340,6 +360,17 @@ static size_t get_object_header(obj_hdr *hdr, unsigned char *data) return used; } + + + + + +/*********************************************************** + * + * ZLIB RELATED FUNCTIONS + * + ***********************************************************/ + static void init_stream(z_stream *s, void *out, size_t len) { memset(s, 0, sizeof(*s)); @@ -624,58 +655,19 @@ static int deflate_obj(gitfo_buf *buf, char *hdr, int hdrlen, git_rawobj *obj, i return GIT_SUCCESS; } -static int write_obj(gitfo_buf *buf, git_oid *id, git_odb *db) -{ - char file[GIT_PATH_MAX]; - char temp[GIT_PATH_MAX]; - git_file fd; - if (object_file_name(file, sizeof(file), db->objects_dir, id)) - return GIT_ERROR; - if (make_temp_file(&fd, temp, sizeof(temp), file) < 0) - return GIT_ERROR; - if (gitfo_write(fd, buf->data, buf->len) < 0) { - gitfo_close(fd); - gitfo_unlink(temp); - return GIT_ERROR; - } - if (db->fsync_object_files) - gitfo_fsync(fd); - gitfo_close(fd); - gitfo_chmod(temp, 0444); - if (gitfo_move_file(temp, file) < 0) { - gitfo_unlink(temp); - return GIT_ERROR; - } - - return GIT_SUCCESS; -} -static int open_alternates(git_odb *db) -{ - unsigned n = 0; - - gitlck_lock(&db->lock); - if (db->alternates) { - gitlck_unlock(&db->lock); - return 1; - } - - db->alternates = git__malloc(sizeof(*db->alternates) * (n + 1)); - if (!db->alternates) { - gitlck_unlock(&db->lock); - return -1; - } - - db->alternates[n] = NULL; - db->n_alternates = n; - gitlck_unlock(&db->lock); - return 0; -} +/*********************************************************** + * + * PACKFILE INDEX FUNCTIONS + * + * Get index formation for packfile indexes v1 and v2 + * + ***********************************************************/ static int pack_openidx_map(git_pack *p) { @@ -1019,6 +1011,19 @@ static int pack_openidx_v2(git_pack *p) return GIT_SUCCESS; } + + + + + +/*********************************************************** + * + * PACKFILE FUNCTIONS + * + * Locate, open and access the contents of a packfile + * + ***********************************************************/ + static int pack_stat(git_pack *p) { char pb[GIT_PATH_MAX]; @@ -1365,127 +1370,20 @@ static int search_packs(git_pack **p, uint32_t *n, git_odb *db, const git_oid *i return GIT_ENOTFOUND; } -static int exists_packed(git_odb *db, const git_oid *id) -{ - return !search_packs(NULL, NULL, db, id); -} - -static int exists_loose(git_odb *db, const git_oid *id) -{ - char file[GIT_PATH_MAX]; - - if (object_file_name(file, sizeof(file), db->objects_dir, id)) - return 0; - - if (gitfo_exists(file) < 0) - return 0; - return 1; -} -int git_odb_exists(git_odb *db, const git_oid *id) -{ - /* TODO: extend to search alternate db's */ - if (exists_packed(db, id)) - return 1; - return exists_loose(db, id); -} -int git_odb_open(git_odb **out, const char *objects_dir) -{ - git_odb *db = git__calloc(1, sizeof(*db)); - if (!db) - return GIT_ERROR; - db->objects_dir = git__strdup(objects_dir); - if (!db->objects_dir) { - free(db); - return GIT_ERROR; - } - - gitlck_init(&db->lock); - db->object_zlib_level = Z_BEST_SPEED; - db->fsync_object_files = 0; - *out = db; - return GIT_SUCCESS; -} -void git_odb_close(git_odb *db) -{ - git_packlist *pl; - - if (!db) - return; - - gitlck_lock(&db->lock); - - pl = db->packlist; - db->packlist = NULL; - - if (db->alternates) { - git_odb **alt; - for (alt = db->alternates; *alt; alt++) - git_odb_close(*alt); - free(db->alternates); - } - - free(db->objects_dir); - - gitlck_unlock(&db->lock); - if (pl) - packlist_dec(db, pl); - gitlck_free(&db->lock); - free(db); -} - -int git_odb_read( - git_rawobj *out, - git_odb *db, - const git_oid *id) -{ -attempt: - - assert(out && db); - - if (!git_odb__read_packed(out, db, id)) - return GIT_SUCCESS; - if (!git_odb__read_loose(out, db, id)) - return GIT_SUCCESS; - if (!open_alternates(db)) - goto attempt; - - out->data = NULL; - return GIT_ENOTFOUND; -} - -int git_odb__read_loose(git_rawobj *out, git_odb *db, const git_oid *id) -{ - char file[GIT_PATH_MAX]; - gitfo_buf obj = GITFO_BUF_INIT; - - assert(out && db && id); - - out->data = NULL; - out->len = 0; - out->type = GIT_OBJ_BAD; - - if (object_file_name(file, sizeof(file), db->objects_dir, id)) - return GIT_ENOTFOUND; /* TODO: error handling */ - - if (gitfo_read_file(&obj, file)) - return GIT_ENOTFOUND; /* TODO: error handling */ - - if (inflate_disk_obj(out, &obj)) { - gitfo_free_buf(&obj); - return GIT_ENOTFOUND; /* TODO: error handling */ - } - - gitfo_free_buf(&obj); - - return GIT_SUCCESS; -} +/*********************************************************** + * + * PACKFILE READING FUNCTIONS + * + * Read the contents of a packfile + * + ***********************************************************/ static int unpack_object(git_rawobj *out, git_pack *p, index_entry *e); @@ -1611,7 +1509,6 @@ static int unpack_object(git_rawobj *out, git_pack *p, index_entry *e) if (!p->idx_search(&n, p, &base_id) && !p->idx_get(&entry, p, n)) { - /* FIXME: deflated_size - 20 ? */ res = unpack_object_delta(out, p, &entry, buffer + GIT_OID_RAWSZ, deflated_size, inflated_size); } @@ -1626,52 +1523,374 @@ static int unpack_object(git_rawobj *out, git_pack *p, index_entry *e) return GIT_SUCCESS; } -static int read_packed(git_rawobj *out, git_pack *p, const git_oid *id) + + + + +/*********************************************************** + * + * ODB OBJECT READING & WRITING + * + * Backend for the public API; read headers and full objects + * from the ODB. Write raw data to the ODB. + * + ***********************************************************/ + +static int open_alternates(git_odb *db) +{ + unsigned n = 0; + + gitlck_lock(&db->lock); + if (db->alternates) { + gitlck_unlock(&db->lock); + return 1; + } + + /* + * FIXME: broken, makes no sense. + * n is always 0, the alternates array is always + * empty! + */ + + db->alternates = git__malloc(sizeof(*db->alternates) * (n + 1)); + if (!db->alternates) { + gitlck_unlock(&db->lock); + return -1; + } + + db->alternates[n] = NULL; + db->n_alternates = n; + gitlck_unlock(&db->lock); + return 0; +} + + +static int locate_object(obj_location *location, git_odb *db, const git_oid *id) +{ + memset(location, 0x0, sizeof(obj_location)); + + do { + if (object_file_name(location->loose_path, GIT_PATH_MAX, db->objects_dir, id) == GIT_SUCCESS && + gitfo_exists(location->loose_path) == 0) + return OBJ_LOCATION_LOOSE; + + if (search_packs(&location->pack.ptr, &location->pack.n, db, id) == GIT_SUCCESS) + return OBJ_LOCATION_INPACK; + + } while (open_alternates(db) == GIT_SUCCESS); + + return GIT_ENOTFOUND; +} + +static int read_packed(git_rawobj *out, const obj_location *loc) { - uint32_t n; index_entry e; int res; - assert(out && p && id); + assert(out && loc); - if (pack_openidx(p)) + if (pack_openidx(loc->pack.ptr)) return GIT_ERROR; - res = p->idx_search(&n, p, id); - if (!res) - res = p->idx_get(&e, p, n); + res = loc->pack.ptr->idx_get(&e, loc->pack.ptr, loc->pack.n); if (!res) - res = unpack_object(out, p, &e); + res = unpack_object(out, loc->pack.ptr, &e); - pack_decidx(p); + pack_decidx(loc->pack.ptr); return res; } -int git_odb__read_packed(git_rawobj *out, git_odb *db, const git_oid *id) +static int read_header_packed(git_rawobj *out, const obj_location *loc) { - git_packlist *pl = packlist_get(db); - size_t j; + git_pack *pack; + index_entry e; + int error = GIT_SUCCESS, shift; + uint8_t *buffer, byte; + + assert(out && loc); + + pack = loc->pack.ptr; + + if (pack_openidx(pack)) + return GIT_ERROR; + + if (pack->idx_get(&e, pack, loc->pack.n) < 0 || + open_pack(pack) < 0) { + error = GIT_ENOTFOUND; + goto cleanup; + } + + buffer = (uint8_t *)pack->pack_map.data + e.offset; + + byte = *buffer++ & 0xFF; + out->type = (byte >> 4) & 0x7; + out->len = byte & 0xF; + shift = 4; - assert(out && db && id); + while (byte & 0x80) { + byte = *buffer++ & 0xFF; + out->len += (byte & 0x7F) << shift; + shift += 7; + } + + /* + * FIXME: if the object is not packed as a whole, + * we need to do a full load and apply the deltas before + * being able to read the header. + * + * I don't think there are any workarounds for this.' + */ + + if (out->type == GIT_OBJ_OFS_DELTA || out->type == GIT_OBJ_REF_DELTA) { + error = unpack_object(out, pack, &e); + git_obj_close(out); + } + +cleanup: + pack_decidx(loc->pack.ptr); + return error; +} + +static int read_loose(git_rawobj *out, git_odb *db, const obj_location *loc) +{ + int error; + gitfo_buf obj = GITFO_BUF_INIT; + + assert(out && db && loc); out->data = NULL; out->len = 0; out->type = GIT_OBJ_BAD; - if (!pl) + if (gitfo_read_file(&obj, loc->loose_path) < 0) return GIT_ENOTFOUND; - for (j = 0; j < pl->n_packs; j++) { - if (!read_packed(out, pl->packs[j], id)) { - packlist_dec(db, pl); - return GIT_SUCCESS; + error = inflate_disk_obj(out, &obj); + gitfo_free_buf(&obj); + + return error; +} + +static int read_header_loose(git_rawobj *out, git_odb *db, const obj_location *loc) +{ + int error = GIT_SUCCESS, z_return = Z_ERRNO, read_bytes; + git_file fd; + z_stream zs; + obj_hdr header_obj; + unsigned char raw_buffer[16], inflated_buffer[64]; + + assert(out && db && loc); + + out->data = NULL; + + if ((fd = gitfo_open(loc->loose_path, O_RDONLY)) < 0) + return GIT_ENOTFOUND; + + init_stream(&zs, inflated_buffer, sizeof(inflated_buffer)); + + if (inflateInit(&zs) < Z_OK) { + error = GIT_ERROR; + goto cleanup; + } + + do { + if ((read_bytes = read(fd, raw_buffer, sizeof(raw_buffer))) > 0) { + set_stream_input(&zs, raw_buffer, read_bytes); + z_return = inflate(&zs, 0); } + } while (z_return == Z_OK); + + if ((z_return != Z_STREAM_END && z_return != Z_BUF_ERROR) + || get_object_header(&header_obj, inflated_buffer) == 0 + || git_obj__loose_object_type(header_obj.type) == 0) { + error = GIT_EOBJCORRUPTED; + goto cleanup; } - packlist_dec(db, pl); - return GIT_ENOTFOUND; + out->len = header_obj.size; + out->type = header_obj.type; + +cleanup: + finish_inflate(&zs); + gitfo_close(fd); + return error; +} + +static int write_obj(gitfo_buf *buf, git_oid *id, git_odb *db) +{ + char file[GIT_PATH_MAX]; + char temp[GIT_PATH_MAX]; + git_file fd; + + if (object_file_name(file, sizeof(file), db->objects_dir, id)) + return GIT_ERROR; + + if (make_temp_file(&fd, temp, sizeof(temp), file) < 0) + return GIT_ERROR; + + if (gitfo_write(fd, buf->data, buf->len) < 0) { + gitfo_close(fd); + gitfo_unlink(temp); + return GIT_ERROR; + } + + if (db->fsync_object_files) + gitfo_fsync(fd); + gitfo_close(fd); + gitfo_chmod(temp, 0444); + + if (gitfo_move_file(temp, file) < 0) { + gitfo_unlink(temp); + return GIT_ERROR; + } + + return GIT_SUCCESS; +} + + + + + + +/*********************************************************** + * + * OBJECT DATABASE PUBLIC API + * + * Public calls for the ODB functionality + * + ***********************************************************/ + +int git_odb_open(git_odb **out, const char *objects_dir) +{ + git_odb *db = git__calloc(1, sizeof(*db)); + if (!db) + return GIT_ERROR; + + db->objects_dir = git__strdup(objects_dir); + if (!db->objects_dir) { + free(db); + return GIT_ERROR; + } + + gitlck_init(&db->lock); + + db->object_zlib_level = Z_BEST_SPEED; + db->fsync_object_files = 0; + + *out = db; + return GIT_SUCCESS; +} + +void git_odb_close(git_odb *db) +{ + git_packlist *pl; + + if (!db) + return; + + gitlck_lock(&db->lock); + + pl = db->packlist; + db->packlist = NULL; + + if (db->alternates) { + git_odb **alt; + for (alt = db->alternates; *alt; alt++) + git_odb_close(*alt); + free(db->alternates); + } + + free(db->objects_dir); + + gitlck_unlock(&db->lock); + if (pl) + packlist_dec(db, pl); + gitlck_free(&db->lock); + free(db); +} + +int git_odb__read_packed(git_rawobj *out, git_odb *db, const git_oid *id) +{ + obj_location loc; + + if (locate_object(&loc, db, id) != OBJ_LOCATION_INPACK) + return GIT_ENOTFOUND; + + return read_packed(out, &loc); +} + +int git_odb__read_loose(git_rawobj *out, git_odb *db, const git_oid *id) +{ + obj_location loc; + + if (locate_object(&loc, db, id) != OBJ_LOCATION_LOOSE) + return GIT_ENOTFOUND; + + return read_loose(out, db, &loc); +} + +int git_odb_exists(git_odb *db, const git_oid *id) +{ + obj_location loc; + assert(db && id); + return locate_object(&loc, db, id) == OBJ_LOCATION_NOTFOUND ? 0 : 1; +} + +int git_odb_read_header(git_rawobj *out, git_odb *db, const git_oid *id) +{ + obj_location loc; + int found, error = 0; + + assert(out && db); + + found = locate_object(&loc, db, id); + + switch (found) { + case OBJ_LOCATION_LOOSE: + error = read_header_loose(out, db, &loc); + break; + + case OBJ_LOCATION_INPACK: + error = read_header_packed(out, &loc); + break; + + case OBJ_LOCATION_NOTFOUND: + error = GIT_ENOTFOUND; + break; + } + + return error; +} + +int git_odb_read( + git_rawobj *out, + git_odb *db, + const git_oid *id) +{ + obj_location loc; + int found, error = 0; + + assert(out && db); + + found = locate_object(&loc, db, id); + + switch (found) { + case OBJ_LOCATION_LOOSE: + error = read_loose(out, db, &loc); + break; + + case OBJ_LOCATION_INPACK: + error = read_packed(out, &loc); + break; + + case OBJ_LOCATION_NOTFOUND: + error = GIT_ENOTFOUND; + break; + } + + return error; } int git_odb_write(git_oid *id, git_odb *db, git_rawobj *obj) -- 2.39.5