2 * Copyright (C) 2009-2011 the libgit2 contributors
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
10 #include "git2/object.h"
15 #include "delta-apply.h"
18 #include "git2/odb_backend.h"
19 #include "git2/types.h"
21 typedef struct { /* object header data */
22 git_otype type
; /* object type */
23 size_t size
; /* object size */
27 git_odb_stream stream
;
32 typedef struct loose_backend
{
33 git_odb_backend parent
;
35 int object_zlib_level
; /** loose object zlib compression level. */
36 int fsync_object_files
; /** loose object file fsync flag. */
40 /* State structure for exploring directories,
41 * in order to locate objects matching a short oid.
45 unsigned char short_oid
[GIT_OID_HEXSZ
]; /* hex formatted oid to match */
46 unsigned int short_oid_len
;
47 int found
; /* number of matching
48 * objects already found */
49 unsigned char res_oid
[GIT_OID_HEXSZ
]; /* hex formatted oid of
51 } loose_locate_object_state
;
55 /***********************************************************
57 * MISCELANEOUS HELPER FUNCTIONS
59 ***********************************************************/
61 static size_t object_file_name(char *name
, size_t n
, char *dir
, const git_oid
*id
)
63 size_t len
= strlen(dir
);
65 /* check length: 43 = 40 hex sha1 chars + 2 * '/' + '\0' */
69 /* the object dir: eg $GIT_DIR/objects */
71 if (name
[len
-1] != '/')
74 /* loose object filename: aa/aaa... (41 bytes) */
75 git_oid_pathfmt(&name
[len
], id
);
82 static size_t get_binary_object_header(obj_hdr
*hdr
, git_fbuffer
*obj
)
85 unsigned char *data
= obj
->data
;
86 size_t shift
, size
, used
= 0;
92 hdr
->type
= (c
>> 4) & 7;
99 if (sizeof(size_t) * 8 <= shift
)
102 size
+= (c
& 0x7f) << shift
;
110 static size_t get_object_header(obj_hdr
*hdr
, unsigned char *data
)
112 char c
, typename
[10];
113 size_t size
, used
= 0;
116 * type name string followed by space.
118 while ((c
= data
[used
]) != ' ') {
119 typename
[used
++] = c
;
120 if (used
>= sizeof(typename
))
126 hdr
->type
= git_object_string2type(typename
);
127 used
++; /* consume the space */
130 * length follows immediately in decimal (without
133 size
= data
[used
++] - '0';
137 while ((c
= data
[used
]) != '\0') {
142 size
= size
* 10 + d
;
148 * the length must be followed by a zero byte
150 if (data
[used
++] != '\0')
158 /***********************************************************
160 * ZLIB RELATED FUNCTIONS
162 ***********************************************************/
164 static void init_stream(z_stream
*s
, void *out
, size_t len
)
166 memset(s
, 0, sizeof(*s
));
168 s
->avail_out
= (uInt
)len
;
171 static void set_stream_input(z_stream
*s
, void *in
, size_t len
)
174 s
->avail_in
= (uInt
)len
;
177 static void set_stream_output(z_stream
*s
, void *out
, size_t len
)
180 s
->avail_out
= (uInt
)len
;
184 static int start_inflate(z_stream
*s
, git_fbuffer
*obj
, void *out
, size_t len
)
188 init_stream(s
, out
, len
);
189 set_stream_input(s
, obj
->data
, obj
->len
);
191 if ((status
= inflateInit(s
)) < Z_OK
)
194 return inflate(s
, 0);
197 static int finish_inflate(z_stream
*s
)
201 while (status
== Z_OK
)
202 status
= inflate(s
, Z_FINISH
);
206 if ((status
!= Z_STREAM_END
) || (s
->avail_in
!= 0))
207 return git__throw(GIT_ERROR
, "Failed to finish inflation. Stream aborted prematurely");
212 static int is_zlib_compressed_data(unsigned char *data
)
216 w
= ((unsigned int)(data
[0]) << 8) + data
[1];
217 return data
[0] == 0x78 && !(w
% 31);
220 static int inflate_buffer(void *in
, size_t inlen
, void *out
, size_t outlen
)
225 memset(&zs
, 0x0, sizeof(zs
));
228 zs
.avail_out
= (uInt
)outlen
;
231 zs
.avail_in
= (uInt
)inlen
;
233 if (inflateInit(&zs
) < Z_OK
)
234 return git__throw(GIT_ERROR
, "Failed to inflate buffer");
236 while (status
== Z_OK
)
237 status
= inflate(&zs
, Z_FINISH
);
241 if ((status
!= Z_STREAM_END
) /*|| (zs.avail_in != 0) */)
242 return git__throw(GIT_ERROR
, "Failed to inflate buffer. Stream aborted prematurely");
244 if (zs
.total_out
!= outlen
)
245 return git__throw(GIT_ERROR
, "Failed to inflate buffer. Stream aborted prematurely");
250 static void *inflate_tail(z_stream
*s
, void *hb
, size_t used
, obj_hdr
*hdr
)
252 unsigned char *buf
, *head
= hb
;
256 * allocate a buffer to hold the inflated data and copy the
257 * initial sequence of inflated data from the tail of the
258 * head buffer, if any.
260 if ((buf
= git__malloc(hdr
->size
+ 1)) == NULL
) {
264 tail
= s
->total_out
- used
;
265 if (used
> 0 && tail
> 0) {
266 if (tail
> hdr
->size
)
268 memcpy(buf
, head
+ used
, tail
);
273 * inflate the remainder of the object data, if any
275 if (hdr
->size
< used
)
278 set_stream_output(s
, buf
+ used
, hdr
->size
- used
);
279 if (finish_inflate(s
)) {
289 * At one point, there was a loose object format that was intended to
290 * mimic the format used in pack-files. This was to allow easy copying
291 * of loose object data into packs. This format is no longer used, but
292 * we must still read it.
294 static int inflate_packlike_loose_disk_obj(git_rawobj
*out
, git_fbuffer
*obj
)
296 unsigned char *in
, *buf
;
301 * read the object header, which is an (uncompressed)
302 * binary encoding of the object type and size.
304 if ((used
= get_binary_object_header(&hdr
, obj
)) == 0)
305 return git__throw(GIT_ERROR
, "Failed to inflate loose object. Object has no header");
307 if (!git_object_typeisloose(hdr
.type
))
308 return git__throw(GIT_ERROR
, "Failed to inflate loose object. Wrong object type");
311 * allocate a buffer and inflate the data into it
313 buf
= git__malloc(hdr
.size
+ 1);
317 in
= ((unsigned char *)obj
->data
) + used
;
318 len
= obj
->len
- used
;
319 if (inflate_buffer(in
, len
, buf
, hdr
.size
)) {
321 return git__throw(GIT_ERROR
, "Failed to inflate loose object. Could not inflate buffer");
323 buf
[hdr
.size
] = '\0';
327 out
->type
= hdr
.type
;
332 static int inflate_disk_obj(git_rawobj
*out
, git_fbuffer
*obj
)
334 unsigned char head
[64], *buf
;
340 * check for a pack-like loose object
342 if (!is_zlib_compressed_data(obj
->data
))
343 return inflate_packlike_loose_disk_obj(out
, obj
);
346 * inflate the initial part of the io buffer in order
347 * to parse the object header (type and size).
349 if (start_inflate(&zs
, obj
, head
, sizeof(head
)) < Z_OK
)
350 return git__throw(GIT_ERROR
, "Failed to inflate disk object. Could not inflate buffer");
352 if ((used
= get_object_header(&hdr
, head
)) == 0)
353 return git__throw(GIT_ERROR
, "Failed to inflate disk object. Object has no header");
355 if (!git_object_typeisloose(hdr
.type
))
356 return git__throw(GIT_ERROR
, "Failed to inflate disk object. Wrong object type");
359 * allocate a buffer and inflate the object data into it
360 * (including the initial sequence in the head buffer).
362 if ((buf
= inflate_tail(&zs
, head
, used
, &hdr
)) == NULL
)
364 buf
[hdr
.size
] = '\0';
368 out
->type
= hdr
.type
;
378 /***********************************************************
380 * ODB OBJECT READING & WRITING
382 * Backend for the public API; read headers and full objects
383 * from the ODB. Write raw data to the ODB.
385 ***********************************************************/
387 static int read_loose(git_rawobj
*out
, const char *loc
)
390 git_fbuffer obj
= GIT_FBUFFER_INIT
;
396 out
->type
= GIT_OBJ_BAD
;
398 if (git_futils_readbuffer(&obj
, loc
) < 0)
399 return git__throw(GIT_ENOTFOUND
, "Failed to read loose object. File not found");
401 error
= inflate_disk_obj(out
, &obj
);
402 git_futils_freebuffer(&obj
);
404 return error
== GIT_SUCCESS
? GIT_SUCCESS
: git__rethrow(error
, "Failed to read loose object");
407 static int read_header_loose(git_rawobj
*out
, const char *loc
)
409 int error
= GIT_SUCCESS
, z_return
= Z_ERRNO
, read_bytes
;
413 unsigned char raw_buffer
[16], inflated_buffer
[64];
419 if ((fd
= p_open(loc
, O_RDONLY
)) < 0)
420 return git__throw(GIT_ENOTFOUND
, "Failed to read loose object header. File not found");
422 init_stream(&zs
, inflated_buffer
, sizeof(inflated_buffer
));
424 if (inflateInit(&zs
) < Z_OK
) {
430 if ((read_bytes
= read(fd
, raw_buffer
, sizeof(raw_buffer
))) > 0) {
431 set_stream_input(&zs
, raw_buffer
, read_bytes
);
432 z_return
= inflate(&zs
, 0);
434 z_return
= Z_STREAM_END
;
437 } while (z_return
== Z_OK
);
439 if ((z_return
!= Z_STREAM_END
&& z_return
!= Z_BUF_ERROR
)
440 || get_object_header(&header_obj
, inflated_buffer
) == 0
441 || git_object_typeisloose(header_obj
.type
) == 0) {
442 error
= GIT_EOBJCORRUPTED
;
446 out
->len
= header_obj
.size
;
447 out
->type
= header_obj
.type
;
453 if (error
< GIT_SUCCESS
)
454 return git__throw(error
, "Failed to read loose object header. Header is corrupted");
459 static int locate_object(char *object_location
, loose_backend
*backend
, const git_oid
*oid
)
461 object_file_name(object_location
, GIT_PATH_MAX
, backend
->objects_dir
, oid
);
462 return git_futils_exists(object_location
);
465 /* Explore an entry of a directory and see if it matches a short oid */
466 int fn_locate_object_short_oid(void *state
, char *pathbuf
) {
467 loose_locate_object_state
*sstate
= (loose_locate_object_state
*)state
;
469 size_t pathbuf_len
= strlen(pathbuf
);
470 if (pathbuf_len
- sstate
->dir_len
!= GIT_OID_HEXSZ
- 2) {
471 /* Entry cannot be an object. Continue to next entry */
475 if (!git_futils_exists(pathbuf
) && git_futils_isdir(pathbuf
)) {
476 /* We are already in the directory matching the 2 first hex characters,
477 * compare the first ncmp characters of the oids */
478 if (!memcmp(sstate
->short_oid
+ 2,
479 (unsigned char *)pathbuf
+ sstate
->dir_len
,
480 sstate
->short_oid_len
- 2)) {
482 if (!sstate
->found
) {
483 sstate
->res_oid
[0] = sstate
->short_oid
[0];
484 sstate
->res_oid
[1] = sstate
->short_oid
[1];
485 memcpy(sstate
->res_oid
+2, pathbuf
+sstate
->dir_len
, GIT_OID_HEXSZ
-2);
490 if (sstate
->found
> 1)
491 return git__throw(GIT_EAMBIGUOUSOIDPREFIX
, "Ambiguous sha1 prefix within loose objects");
496 /* Locate an object matching a given short oid */
497 static int locate_object_short_oid(char *object_location
, git_oid
*res_oid
, loose_backend
*backend
, const git_oid
*short_oid
, unsigned int len
)
499 char *objects_dir
= backend
->objects_dir
;
500 size_t dir_len
= strlen(objects_dir
);
501 loose_locate_object_state state
;
504 if (dir_len
+43 > GIT_PATH_MAX
)
505 return git__throw(GIT_ERROR
, "Failed to locate object from short oid. Object path too long");
507 strcpy(object_location
, objects_dir
);
509 /* Add a separator if not already there */
510 if (object_location
[dir_len
-1] != '/')
511 object_location
[dir_len
++] = '/';
513 /* Convert raw oid to hex formatted oid */
514 git_oid_fmt((char *)state
.short_oid
, short_oid
);
515 /* Explore OBJ_DIR/xx/ where xx is the beginning of hex formatted short oid */
516 sprintf(object_location
+dir_len
, "%.2s/", state
.short_oid
);
518 /* Check that directory exists */
519 if (git_futils_exists(object_location
) || git_futils_isdir(object_location
))
520 return git__throw(GIT_ENOTFOUND
, "Failed to locate object from short oid. Object not found");
522 state
.dir_len
= dir_len
+3;
523 state
.short_oid_len
= len
;
525 /* Explore directory to find a unique object matching short_oid */
526 error
= git_futils_direach(object_location
, GIT_PATH_MAX
, fn_locate_object_short_oid
, &state
);
528 return git__rethrow(error
, "Failed to locate object from short oid");
531 return git__throw(GIT_ENOTFOUND
, "Failed to locate object from short oid. Object not found");
534 /* Convert obtained hex formatted oid to raw */
535 error
= git_oid_fromstr(res_oid
, (char *)state
.res_oid
);
537 return git__rethrow(error
, "Failed to locate object from short oid");
540 /* Update the location according to the oid obtained */
541 git_oid_pathfmt(object_location
+dir_len
, res_oid
);
554 /***********************************************************
556 * LOOSE BACKEND PUBLIC API
558 * Implement the git_odb_backend API calls
560 ***********************************************************/
562 int loose_backend__read_header(size_t *len_p
, git_otype
*type_p
, git_odb_backend
*backend
, const git_oid
*oid
)
564 char object_path
[GIT_PATH_MAX
];
568 assert(backend
&& oid
);
571 raw
.type
= GIT_OBJ_BAD
;
573 if (locate_object(object_path
, (loose_backend
*)backend
, oid
) < 0)
574 return git__throw(GIT_ENOTFOUND
, "Failed to read loose backend header. Object not found");
576 if ((error
= read_header_loose(&raw
, object_path
)) < GIT_SUCCESS
)
584 int loose_backend__read(void **buffer_p
, size_t *len_p
, git_otype
*type_p
, git_odb_backend
*backend
, const git_oid
*oid
)
586 char object_path
[GIT_PATH_MAX
];
590 assert(backend
&& oid
);
592 if (locate_object(object_path
, (loose_backend
*)backend
, oid
) < 0)
593 return git__throw(GIT_ENOTFOUND
, "Failed to read loose backend. Object not found");
595 if ((error
= read_loose(&raw
, object_path
)) < GIT_SUCCESS
)
596 return git__rethrow(error
, "Failed to read loose backend");
598 *buffer_p
= raw
.data
;
605 int loose_backend__read_prefix(
610 git_odb_backend
*backend
,
611 const git_oid
*short_oid
,
614 if (len
< GIT_OID_MINPREFIXLEN
)
615 return git__throw(GIT_EAMBIGUOUSOIDPREFIX
, "Failed to read loose backend. Prefix length is lower than %d.", GIT_OID_MINPREFIXLEN
);
617 if (len
>= GIT_OID_HEXSZ
) {
618 /* We can fall back to regular read method */
619 int error
= loose_backend__read(buffer_p
, len_p
, type_p
, backend
, short_oid
);
620 if (error
== GIT_SUCCESS
)
621 git_oid_cpy(out_oid
, short_oid
);
625 char object_path
[GIT_PATH_MAX
];
629 assert(backend
&& short_oid
);
631 if ((error
= locate_object_short_oid(object_path
, out_oid
, (loose_backend
*)backend
, short_oid
, len
)) < 0) {
632 return git__rethrow(error
, "Failed to read loose backend");
635 if ((error
= read_loose(&raw
, object_path
)) < GIT_SUCCESS
)
636 return git__rethrow(error
, "Failed to read loose backend");
638 *buffer_p
= raw
.data
;
646 int loose_backend__exists(git_odb_backend
*backend
, const git_oid
*oid
)
648 char object_path
[GIT_PATH_MAX
];
650 assert(backend
&& oid
);
652 return locate_object(object_path
, (loose_backend
*)backend
, oid
) == GIT_SUCCESS
;
655 int loose_backend__stream_fwrite(git_oid
*oid
, git_odb_stream
*_stream
)
657 loose_writestream
*stream
= (loose_writestream
*)_stream
;
658 loose_backend
*backend
= (loose_backend
*)_stream
->backend
;
661 char final_path
[GIT_PATH_MAX
];
663 if ((error
= git_filebuf_hash(oid
, &stream
->fbuf
)) < GIT_SUCCESS
)
664 return git__rethrow(error
, "Failed to write loose backend");
666 if (object_file_name(final_path
, sizeof(final_path
), backend
->objects_dir
, oid
))
669 if ((error
= git_futils_mkpath2file(final_path
)) < GIT_SUCCESS
)
670 return git__rethrow(error
, "Failed to write loose backend");
672 stream
->finished
= 1;
673 return git_filebuf_commit_at(&stream
->fbuf
, final_path
);
676 int loose_backend__stream_write(git_odb_stream
*_stream
, const char *data
, size_t len
)
678 loose_writestream
*stream
= (loose_writestream
*)_stream
;
679 return git_filebuf_write(&stream
->fbuf
, data
, len
);
682 void loose_backend__stream_free(git_odb_stream
*_stream
)
684 loose_writestream
*stream
= (loose_writestream
*)_stream
;
686 if (!stream
->finished
)
687 git_filebuf_cleanup(&stream
->fbuf
);
692 static int format_object_header(char *hdr
, size_t n
, size_t obj_len
, git_otype obj_type
)
694 const char *type_str
= git_object_type2string(obj_type
);
695 int len
= snprintf(hdr
, n
, "%s %"PRIuZ
, type_str
, obj_len
);
697 assert(len
> 0); /* otherwise snprintf() is broken */
698 assert(((size_t) len
) < n
); /* otherwise the caller is broken! */
700 if (len
< 0 || ((size_t) len
) >= n
)
701 return git__throw(GIT_ERROR
, "Failed to format object header. Length is out of bounds");
705 int loose_backend__stream(git_odb_stream
**stream_out
, git_odb_backend
*_backend
, size_t length
, git_otype type
)
707 loose_backend
*backend
;
708 loose_writestream
*stream
;
710 char hdr
[64], tmp_path
[GIT_PATH_MAX
];
716 backend
= (loose_backend
*)_backend
;
719 hdrlen
= format_object_header(hdr
, sizeof(hdr
), length
, type
);
720 if (hdrlen
< GIT_SUCCESS
)
721 return git__throw(GIT_EOBJCORRUPTED
, "Failed to create loose backend stream. Object is corrupted");
723 stream
= git__calloc(1, sizeof(loose_writestream
));
727 stream
->stream
.backend
= _backend
;
728 stream
->stream
.read
= NULL
; /* read only */
729 stream
->stream
.write
= &loose_backend__stream_write
;
730 stream
->stream
.finalize_write
= &loose_backend__stream_fwrite
;
731 stream
->stream
.free
= &loose_backend__stream_free
;
732 stream
->stream
.mode
= GIT_STREAM_WRONLY
;
734 git_path_join(tmp_path
, backend
->objects_dir
, "tmp_object");
736 error
= git_filebuf_open(&stream
->fbuf
, tmp_path
,
737 GIT_FILEBUF_HASH_CONTENTS
|
738 GIT_FILEBUF_DEFLATE_CONTENTS
|
739 GIT_FILEBUF_TEMPORARY
);
741 if (error
< GIT_SUCCESS
) {
743 return git__rethrow(error
, "Failed to create loose backend stream");
746 error
= stream
->stream
.write((git_odb_stream
*)stream
, hdr
, hdrlen
);
747 if (error
< GIT_SUCCESS
) {
748 git_filebuf_cleanup(&stream
->fbuf
);
750 return git__rethrow(error
, "Failed to create loose backend stream");
753 *stream_out
= (git_odb_stream
*)stream
;
757 int loose_backend__write(git_oid
*oid
, git_odb_backend
*_backend
, const void *data
, size_t len
, git_otype type
)
759 int error
, header_len
;
760 char final_path
[GIT_PATH_MAX
], header
[64];
762 loose_backend
*backend
;
764 backend
= (loose_backend
*)_backend
;
766 /* prepare the header for the file */
768 header_len
= format_object_header(header
, sizeof(header
), len
, type
);
769 if (header_len
< GIT_SUCCESS
)
770 return GIT_EOBJCORRUPTED
;
773 git_path_join(final_path
, backend
->objects_dir
, "tmp_object");
775 error
= git_filebuf_open(&fbuf
, final_path
,
776 GIT_FILEBUF_HASH_CONTENTS
|
777 GIT_FILEBUF_DEFLATE_CONTENTS
|
778 GIT_FILEBUF_TEMPORARY
);
780 if (error
< GIT_SUCCESS
)
783 git_filebuf_write(&fbuf
, header
, header_len
);
784 git_filebuf_write(&fbuf
, data
, len
);
785 git_filebuf_hash(oid
, &fbuf
);
787 if ((error
= object_file_name(final_path
, sizeof(final_path
), backend
->objects_dir
, oid
)) < GIT_SUCCESS
)
790 if ((error
= git_futils_mkpath2file(final_path
)) < GIT_SUCCESS
)
793 return git_filebuf_commit_at(&fbuf
, final_path
);
796 git_filebuf_cleanup(&fbuf
);
800 void loose_backend__free(git_odb_backend
*_backend
)
802 loose_backend
*backend
;
804 backend
= (loose_backend
*)_backend
;
806 free(backend
->objects_dir
);
810 int git_odb_backend_loose(git_odb_backend
**backend_out
, const char *objects_dir
)
812 loose_backend
*backend
;
814 backend
= git__calloc(1, sizeof(loose_backend
));
818 backend
->objects_dir
= git__strdup(objects_dir
);
819 if (backend
->objects_dir
== NULL
) {
824 backend
->object_zlib_level
= Z_BEST_SPEED
;
825 backend
->fsync_object_files
= 0;
827 backend
->parent
.read
= &loose_backend__read
;
828 backend
->parent
.write
= &loose_backend__write
;
829 backend
->parent
.read_prefix
= &loose_backend__read_prefix
;
830 backend
->parent
.read_header
= &loose_backend__read_header
;
831 backend
->parent
.writestream
= &loose_backend__stream
;
832 backend
->parent
.exists
= &loose_backend__exists
;
833 backend
->parent
.free
= &loose_backend__free
;
835 *backend_out
= (git_odb_backend
*)backend
;