2 * Copyright (C) 2009-2012 the libgit2 contributors
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
10 #include "git2/object.h"
15 #include "delta-apply.h"
18 #include "git2/odb_backend.h"
19 #include "git2/types.h"
21 typedef struct { /* object header data */
22 git_otype type
; /* object type */
23 size_t size
; /* object size */
27 git_odb_stream stream
;
31 typedef struct loose_backend
{
32 git_odb_backend parent
;
34 int object_zlib_level
; /** loose object zlib compression level. */
35 int fsync_object_files
; /** loose object file fsync flag. */
39 /* State structure for exploring directories,
40 * in order to locate objects matching a short oid.
44 unsigned char short_oid
[GIT_OID_HEXSZ
]; /* hex formatted oid to match */
45 unsigned int short_oid_len
;
46 int found
; /* number of matching
47 * objects already found */
48 unsigned char res_oid
[GIT_OID_HEXSZ
]; /* hex formatted oid of
50 } loose_locate_object_state
;
53 /***********************************************************
55 * MISCELANEOUS HELPER FUNCTIONS
57 ***********************************************************/
59 static int object_file_name(git_buf
*name
, const char *dir
, const git_oid
*id
)
61 git_buf_sets(name
, dir
);
63 /* expand length for 40 hex sha1 chars + 2 * '/' + '\0' */
64 if (git_buf_grow(name
, name
->size
+ GIT_OID_HEXSZ
+ 3) < 0)
67 git_path_to_dir(name
);
69 /* loose object filename: aa/aaa... (41 bytes) */
70 git_oid_pathfmt(name
->ptr
+ name
->size
, id
);
71 name
->size
+= GIT_OID_HEXSZ
+ 1;
72 name
->ptr
[name
->size
] = '\0';
78 static size_t get_binary_object_header(obj_hdr
*hdr
, git_buf
*obj
)
81 unsigned char *data
= (unsigned char *)obj
->ptr
;
82 size_t shift
, size
, used
= 0;
88 hdr
->type
= (c
>> 4) & 7;
93 if (obj
->size
<= used
)
95 if (sizeof(size_t) * 8 <= shift
)
98 size
+= (c
& 0x7f) << shift
;
106 static size_t get_object_header(obj_hdr
*hdr
, unsigned char *data
)
108 char c
, typename
[10];
109 size_t size
, used
= 0;
112 * type name string followed by space.
114 while ((c
= data
[used
]) != ' ') {
115 typename
[used
++] = c
;
116 if (used
>= sizeof(typename
))
122 hdr
->type
= git_object_string2type(typename
);
123 used
++; /* consume the space */
126 * length follows immediately in decimal (without
129 size
= data
[used
++] - '0';
133 while ((c
= data
[used
]) != '\0') {
138 size
= size
* 10 + d
;
144 * the length must be followed by a zero byte
146 if (data
[used
++] != '\0')
154 /***********************************************************
156 * ZLIB RELATED FUNCTIONS
158 ***********************************************************/
160 static void init_stream(z_stream
*s
, void *out
, size_t len
)
162 memset(s
, 0, sizeof(*s
));
164 s
->avail_out
= (uInt
)len
;
167 static void set_stream_input(z_stream
*s
, void *in
, size_t len
)
170 s
->avail_in
= (uInt
)len
;
173 static void set_stream_output(z_stream
*s
, void *out
, size_t len
)
176 s
->avail_out
= (uInt
)len
;
180 static int start_inflate(z_stream
*s
, git_buf
*obj
, void *out
, size_t len
)
184 init_stream(s
, out
, len
);
185 set_stream_input(s
, obj
->ptr
, obj
->size
);
187 if ((status
= inflateInit(s
)) < Z_OK
)
190 return inflate(s
, 0);
193 static int finish_inflate(z_stream
*s
)
197 while (status
== Z_OK
)
198 status
= inflate(s
, Z_FINISH
);
202 if ((status
!= Z_STREAM_END
) || (s
->avail_in
!= 0)) {
203 giterr_set(GITERR_ZLIB
, "Failed to finish ZLib inflation. Stream aborted prematurely");
210 static int is_zlib_compressed_data(unsigned char *data
)
214 w
= ((unsigned int)(data
[0]) << 8) + data
[1];
215 return (data
[0] & 0x8F) == 0x08 && !(w
% 31);
218 static int inflate_buffer(void *in
, size_t inlen
, void *out
, size_t outlen
)
223 memset(&zs
, 0x0, sizeof(zs
));
226 zs
.avail_out
= (uInt
)outlen
;
229 zs
.avail_in
= (uInt
)inlen
;
231 if (inflateInit(&zs
) < Z_OK
) {
232 giterr_set(GITERR_ZLIB
, "Failed to inflate buffer");
236 while (status
== Z_OK
)
237 status
= inflate(&zs
, Z_FINISH
);
241 if (status
!= Z_STREAM_END
/* || zs.avail_in != 0 */ ||
242 zs
.total_out
!= outlen
)
244 giterr_set(GITERR_ZLIB
, "Failed to inflate buffer. Stream aborted prematurely");
251 static void *inflate_tail(z_stream
*s
, void *hb
, size_t used
, obj_hdr
*hdr
)
253 unsigned char *buf
, *head
= hb
;
257 * allocate a buffer to hold the inflated data and copy the
258 * initial sequence of inflated data from the tail of the
259 * head buffer, if any.
261 if ((buf
= git__malloc(hdr
->size
+ 1)) == NULL
) {
265 tail
= s
->total_out
- used
;
266 if (used
> 0 && tail
> 0) {
267 if (tail
> hdr
->size
)
269 memcpy(buf
, head
+ used
, tail
);
274 * inflate the remainder of the object data, if any
276 if (hdr
->size
< used
)
279 set_stream_output(s
, buf
+ used
, hdr
->size
- used
);
280 if (finish_inflate(s
)) {
290 * At one point, there was a loose object format that was intended to
291 * mimic the format used in pack-files. This was to allow easy copying
292 * of loose object data into packs. This format is no longer used, but
293 * we must still read it.
295 static int inflate_packlike_loose_disk_obj(git_rawobj
*out
, git_buf
*obj
)
297 unsigned char *in
, *buf
;
302 * read the object header, which is an (uncompressed)
303 * binary encoding of the object type and size.
305 if ((used
= get_binary_object_header(&hdr
, obj
)) == 0 ||
306 !git_object_typeisloose(hdr
.type
)) {
307 giterr_set(GITERR_ODB
, "Failed to inflate loose object.");
312 * allocate a buffer and inflate the data into it
314 buf
= git__malloc(hdr
.size
+ 1);
315 GITERR_CHECK_ALLOC(buf
);
317 in
= ((unsigned char *)obj
->ptr
) + used
;
318 len
= obj
->size
- used
;
319 if (inflate_buffer(in
, len
, buf
, hdr
.size
) < 0) {
323 buf
[hdr
.size
] = '\0';
327 out
->type
= hdr
.type
;
332 static int inflate_disk_obj(git_rawobj
*out
, git_buf
*obj
)
334 unsigned char head
[64], *buf
;
340 * check for a pack-like loose object
342 if (!is_zlib_compressed_data((unsigned char *)obj
->ptr
))
343 return inflate_packlike_loose_disk_obj(out
, obj
);
346 * inflate the initial part of the io buffer in order
347 * to parse the object header (type and size).
349 if (start_inflate(&zs
, obj
, head
, sizeof(head
)) < Z_OK
||
350 (used
= get_object_header(&hdr
, head
)) == 0 ||
351 !git_object_typeisloose(hdr
.type
))
353 giterr_set(GITERR_ODB
, "Failed to inflate disk object.");
358 * allocate a buffer and inflate the object data into it
359 * (including the initial sequence in the head buffer).
361 if ((buf
= inflate_tail(&zs
, head
, used
, &hdr
)) == NULL
)
363 buf
[hdr
.size
] = '\0';
367 out
->type
= hdr
.type
;
377 /***********************************************************
379 * ODB OBJECT READING & WRITING
381 * Backend for the public API; read headers and full objects
382 * from the ODB. Write raw data to the ODB.
384 ***********************************************************/
386 static int read_loose(git_rawobj
*out
, git_buf
*loc
)
389 git_buf obj
= GIT_BUF_INIT
;
393 if (git_buf_oom(loc
))
398 out
->type
= GIT_OBJ_BAD
;
400 if (!(error
= git_futils_readbuffer(&obj
, loc
->ptr
)))
401 error
= inflate_disk_obj(out
, &obj
);
408 static int read_header_loose(git_rawobj
*out
, git_buf
*loc
)
410 int error
= 0, z_return
= Z_ERRNO
, read_bytes
;
414 unsigned char raw_buffer
[16], inflated_buffer
[64];
418 if (git_buf_oom(loc
))
423 if ((fd
= git_futils_open_ro(loc
->ptr
)) < 0)
426 init_stream(&zs
, inflated_buffer
, sizeof(inflated_buffer
));
428 z_return
= inflateInit(&zs
);
430 while (z_return
== Z_OK
) {
431 if ((read_bytes
= p_read(fd
, raw_buffer
, sizeof(raw_buffer
))) > 0) {
432 set_stream_input(&zs
, raw_buffer
, read_bytes
);
433 z_return
= inflate(&zs
, 0);
435 z_return
= Z_STREAM_END
;
438 if ((z_return
!= Z_STREAM_END
&& z_return
!= Z_BUF_ERROR
)
439 || get_object_header(&header_obj
, inflated_buffer
) == 0
440 || git_object_typeisloose(header_obj
.type
) == 0)
442 giterr_set(GITERR_ZLIB
, "Failed to read loose object header");
445 out
->len
= header_obj
.size
;
446 out
->type
= header_obj
.type
;
455 static int locate_object(
456 git_buf
*object_location
,
457 loose_backend
*backend
,
460 int error
= object_file_name(object_location
, backend
->objects_dir
, oid
);
462 if (!error
&& !git_path_exists(object_location
->ptr
))
463 return GIT_ENOTFOUND
;
468 /* Explore an entry of a directory and see if it matches a short oid */
469 static int fn_locate_object_short_oid(void *state
, git_buf
*pathbuf
) {
470 loose_locate_object_state
*sstate
= (loose_locate_object_state
*)state
;
472 if (pathbuf
->size
- sstate
->dir_len
!= GIT_OID_HEXSZ
- 2) {
473 /* Entry cannot be an object. Continue to next entry */
477 if (git_path_isdir(pathbuf
->ptr
) == false) {
478 /* We are already in the directory matching the 2 first hex characters,
479 * compare the first ncmp characters of the oids */
480 if (!memcmp(sstate
->short_oid
+ 2,
481 (unsigned char *)pathbuf
->ptr
+ sstate
->dir_len
,
482 sstate
->short_oid_len
- 2)) {
484 if (!sstate
->found
) {
485 sstate
->res_oid
[0] = sstate
->short_oid
[0];
486 sstate
->res_oid
[1] = sstate
->short_oid
[1];
487 memcpy(sstate
->res_oid
+2, pathbuf
->ptr
+sstate
->dir_len
, GIT_OID_HEXSZ
-2);
493 if (sstate
->found
> 1)
494 return git_odb__error_ambiguous("multiple matches in loose objects");
499 /* Locate an object matching a given short oid */
500 static int locate_object_short_oid(
501 git_buf
*object_location
,
503 loose_backend
*backend
,
504 const git_oid
*short_oid
,
507 char *objects_dir
= backend
->objects_dir
;
508 size_t dir_len
= strlen(objects_dir
);
509 loose_locate_object_state state
;
512 /* prealloc memory for OBJ_DIR/xx/ */
513 if (git_buf_grow(object_location
, dir_len
+ 5) < 0)
516 git_buf_sets(object_location
, objects_dir
);
517 git_path_to_dir(object_location
);
519 /* save adjusted position at end of dir so it can be restored later */
520 dir_len
= object_location
->size
;
522 /* Convert raw oid to hex formatted oid */
523 git_oid_fmt((char *)state
.short_oid
, short_oid
);
525 /* Explore OBJ_DIR/xx/ where xx is the beginning of hex formatted short oid */
526 if (git_buf_printf(object_location
, "%.2s/", state
.short_oid
) < 0)
529 /* Check that directory exists */
530 if (git_path_isdir(object_location
->ptr
) == false)
531 return git_odb__error_notfound("failed to locate from short oid");
533 state
.dir_len
= object_location
->size
;
534 state
.short_oid_len
= len
;
537 /* Explore directory to find a unique object matching short_oid */
538 error
= git_path_direach(
539 object_location
, fn_locate_object_short_oid
, &state
);
544 return git_odb__error_notfound("failed to locate from short oid");
546 /* Convert obtained hex formatted oid to raw */
547 error
= git_oid_fromstr(res_oid
, (char *)state
.res_oid
);
551 /* Update the location according to the oid obtained */
553 git_buf_truncate(object_location
, dir_len
);
554 if (git_buf_grow(object_location
, dir_len
+ GIT_OID_HEXSZ
+ 2) < 0)
557 git_oid_pathfmt(object_location
->ptr
+ dir_len
, res_oid
);
559 object_location
->size
+= GIT_OID_HEXSZ
+ 1;
560 object_location
->ptr
[object_location
->size
] = '\0';
573 /***********************************************************
575 * LOOSE BACKEND PUBLIC API
577 * Implement the git_odb_backend API calls
579 ***********************************************************/
581 static int loose_backend__read_header(size_t *len_p
, git_otype
*type_p
, git_odb_backend
*backend
, const git_oid
*oid
)
583 git_buf object_path
= GIT_BUF_INIT
;
587 assert(backend
&& oid
);
590 raw
.type
= GIT_OBJ_BAD
;
592 if (locate_object(&object_path
, (loose_backend
*)backend
, oid
) < 0)
593 error
= git_odb__error_notfound("in loose backend");
594 else if ((error
= read_header_loose(&raw
, &object_path
)) == 0) {
599 git_buf_free(&object_path
);
604 static int loose_backend__read(void **buffer_p
, size_t *len_p
, git_otype
*type_p
, git_odb_backend
*backend
, const git_oid
*oid
)
606 git_buf object_path
= GIT_BUF_INIT
;
610 assert(backend
&& oid
);
612 if (locate_object(&object_path
, (loose_backend
*)backend
, oid
) < 0)
613 error
= git_odb__error_notfound("in loose backend");
614 else if ((error
= read_loose(&raw
, &object_path
)) == 0) {
615 *buffer_p
= raw
.data
;
620 git_buf_free(&object_path
);
625 static int loose_backend__read_prefix(
630 git_odb_backend
*backend
,
631 const git_oid
*short_oid
,
636 if (len
< GIT_OID_MINPREFIXLEN
)
637 error
= git_odb__error_ambiguous("prefix length too short");
639 else if (len
>= GIT_OID_HEXSZ
) {
640 /* We can fall back to regular read method */
641 error
= loose_backend__read(buffer_p
, len_p
, type_p
, backend
, short_oid
);
643 git_oid_cpy(out_oid
, short_oid
);
645 git_buf object_path
= GIT_BUF_INIT
;
648 assert(backend
&& short_oid
);
650 if ((error
= locate_object_short_oid(&object_path
, out_oid
,
651 (loose_backend
*)backend
, short_oid
, len
)) == 0 &&
652 (error
= read_loose(&raw
, &object_path
)) == 0)
654 *buffer_p
= raw
.data
;
659 git_buf_free(&object_path
);
665 static int loose_backend__exists(git_odb_backend
*backend
, const git_oid
*oid
)
667 git_buf object_path
= GIT_BUF_INIT
;
670 assert(backend
&& oid
);
672 error
= locate_object(&object_path
, (loose_backend
*)backend
, oid
);
674 git_buf_free(&object_path
);
679 static int loose_backend__stream_fwrite(git_oid
*oid
, git_odb_stream
*_stream
)
681 loose_writestream
*stream
= (loose_writestream
*)_stream
;
682 loose_backend
*backend
= (loose_backend
*)_stream
->backend
;
683 git_buf final_path
= GIT_BUF_INIT
;
686 if (git_filebuf_hash(oid
, &stream
->fbuf
) < 0 ||
687 object_file_name(&final_path
, backend
->objects_dir
, oid
) < 0 ||
688 git_futils_mkpath2file(final_path
.ptr
, GIT_OBJECT_DIR_MODE
) < 0)
691 * Don't try to add an existing object to the repository. This
692 * is what git does and allows us to sidestep the fact that
693 * we're not allowed to overwrite a read-only file on Windows.
695 else if (git_path_exists(final_path
.ptr
) == true)
696 git_filebuf_cleanup(&stream
->fbuf
);
698 error
= git_filebuf_commit_at(
699 &stream
->fbuf
, final_path
.ptr
, GIT_OBJECT_FILE_MODE
);
701 git_buf_free(&final_path
);
706 static int loose_backend__stream_write(git_odb_stream
*_stream
, const char *data
, size_t len
)
708 loose_writestream
*stream
= (loose_writestream
*)_stream
;
709 return git_filebuf_write(&stream
->fbuf
, data
, len
);
712 static void loose_backend__stream_free(git_odb_stream
*_stream
)
714 loose_writestream
*stream
= (loose_writestream
*)_stream
;
716 git_filebuf_cleanup(&stream
->fbuf
);
720 static int format_object_header(char *hdr
, size_t n
, size_t obj_len
, git_otype obj_type
)
722 const char *type_str
= git_object_type2string(obj_type
);
723 int len
= snprintf(hdr
, n
, "%s %"PRIuZ
, type_str
, obj_len
);
725 assert(len
> 0); /* otherwise snprintf() is broken */
726 assert(((size_t)len
) < n
); /* otherwise the caller is broken! */
731 static int loose_backend__stream(git_odb_stream
**stream_out
, git_odb_backend
*_backend
, size_t length
, git_otype type
)
733 loose_backend
*backend
;
734 loose_writestream
*stream
= NULL
;
736 git_buf tmp_path
= GIT_BUF_INIT
;
741 backend
= (loose_backend
*)_backend
;
744 hdrlen
= format_object_header(hdr
, sizeof(hdr
), length
, type
);
746 stream
= git__calloc(1, sizeof(loose_writestream
));
747 GITERR_CHECK_ALLOC(stream
);
749 stream
->stream
.backend
= _backend
;
750 stream
->stream
.read
= NULL
; /* read only */
751 stream
->stream
.write
= &loose_backend__stream_write
;
752 stream
->stream
.finalize_write
= &loose_backend__stream_fwrite
;
753 stream
->stream
.free
= &loose_backend__stream_free
;
754 stream
->stream
.mode
= GIT_STREAM_WRONLY
;
756 if (git_buf_joinpath(&tmp_path
, backend
->objects_dir
, "tmp_object") < 0 ||
757 git_filebuf_open(&stream
->fbuf
, tmp_path
.ptr
,
758 GIT_FILEBUF_HASH_CONTENTS
|
759 GIT_FILEBUF_TEMPORARY
|
760 (backend
->object_zlib_level
<< GIT_FILEBUF_DEFLATE_SHIFT
)) < 0 ||
761 stream
->stream
.write((git_odb_stream
*)stream
, hdr
, hdrlen
) < 0)
763 git_filebuf_cleanup(&stream
->fbuf
);
767 git_buf_free(&tmp_path
);
768 *stream_out
= (git_odb_stream
*)stream
;
770 return !stream
? -1 : 0;
773 static int loose_backend__write(git_oid
*oid
, git_odb_backend
*_backend
, const void *data
, size_t len
, git_otype type
)
775 int error
, header_len
;
776 git_buf final_path
= GIT_BUF_INIT
;
778 git_filebuf fbuf
= GIT_FILEBUF_INIT
;
779 loose_backend
*backend
;
781 backend
= (loose_backend
*)_backend
;
783 /* prepare the header for the file */
784 header_len
= format_object_header(header
, sizeof(header
), len
, type
);
786 if (git_buf_joinpath(&final_path
, backend
->objects_dir
, "tmp_object") < 0 ||
787 git_filebuf_open(&fbuf
, final_path
.ptr
,
788 GIT_FILEBUF_HASH_CONTENTS
|
789 GIT_FILEBUF_TEMPORARY
|
790 (backend
->object_zlib_level
<< GIT_FILEBUF_DEFLATE_SHIFT
)) < 0)
796 git_filebuf_write(&fbuf
, header
, header_len
);
797 git_filebuf_write(&fbuf
, data
, len
);
798 git_filebuf_hash(oid
, &fbuf
);
800 if (object_file_name(&final_path
, backend
->objects_dir
, oid
) < 0 ||
801 git_futils_mkpath2file(final_path
.ptr
, GIT_OBJECT_DIR_MODE
) < 0 ||
802 git_filebuf_commit_at(&fbuf
, final_path
.ptr
, GIT_OBJECT_FILE_MODE
) < 0)
807 git_filebuf_cleanup(&fbuf
);
808 git_buf_free(&final_path
);
812 static void loose_backend__free(git_odb_backend
*_backend
)
814 loose_backend
*backend
;
816 backend
= (loose_backend
*)_backend
;
818 git__free(backend
->objects_dir
);
822 int git_odb_backend_loose(
823 git_odb_backend
**backend_out
,
824 const char *objects_dir
,
825 int compression_level
,
828 loose_backend
*backend
;
830 backend
= git__calloc(1, sizeof(loose_backend
));
831 GITERR_CHECK_ALLOC(backend
);
833 backend
->objects_dir
= git__strdup(objects_dir
);
834 GITERR_CHECK_ALLOC(backend
->objects_dir
);
836 if (compression_level
< 0)
837 compression_level
= Z_BEST_SPEED
;
839 backend
->object_zlib_level
= compression_level
;
840 backend
->fsync_object_files
= do_fsync
;
842 backend
->parent
.read
= &loose_backend__read
;
843 backend
->parent
.write
= &loose_backend__write
;
844 backend
->parent
.read_prefix
= &loose_backend__read_prefix
;
845 backend
->parent
.read_header
= &loose_backend__read_header
;
846 backend
->parent
.writestream
= &loose_backend__stream
;
847 backend
->parent
.exists
= &loose_backend__exists
;
848 backend
->parent
.free
= &loose_backend__free
;
850 *backend_out
= (git_odb_backend
*)backend
;