]> git.proxmox.com Git - libgit2.git/blob - src/odb.c
d951bc51b49f12d3269e5f3e2cda0db6adf3a1ba
[libgit2.git] / src / odb.c
1 /*
2 * Copyright (C) 2009-2012 the libgit2 contributors
3 *
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
6 */
7
8 #include "common.h"
9 #include <zlib.h>
10 #include "git2/object.h"
11 #include "fileops.h"
12 #include "hash.h"
13 #include "odb.h"
14 #include "delta-apply.h"
15 #include "filter.h"
16
17 #include "git2/odb_backend.h"
18 #include "git2/oid.h"
19
20 #define GIT_ALTERNATES_FILE "info/alternates"
21
22 /* TODO: is this correct? */
23 #define GIT_LOOSE_PRIORITY 2
24 #define GIT_PACKED_PRIORITY 1
25
26 typedef struct
27 {
28 git_odb_backend *backend;
29 int priority;
30 int is_alternate;
31 } backend_internal;
32
33 static int format_object_header(char *hdr, size_t n, size_t obj_len, git_otype obj_type)
34 {
35 const char *type_str = git_object_type2string(obj_type);
36 int len = p_snprintf(hdr, n, "%s %"PRIuZ, type_str, obj_len);
37 assert(len > 0 && len <= (int)n);
38 return len+1;
39 }
40
41 int git_odb__hashobj(git_oid *id, git_rawobj *obj)
42 {
43 git_buf_vec vec[2];
44 char header[64];
45 int hdrlen;
46
47 assert(id && obj);
48
49 if (!git_object_typeisloose(obj->type))
50 return -1;
51 if (!obj->data && obj->len != 0)
52 return -1;
53
54 hdrlen = format_object_header(header, sizeof(header), obj->len, obj->type);
55
56 vec[0].data = header;
57 vec[0].len = hdrlen;
58 vec[1].data = obj->data;
59 vec[1].len = obj->len;
60
61 git_hash_vec(id, vec, 2);
62
63 return 0;
64 }
65
66
67 static git_odb_object *new_odb_object(const git_oid *oid, git_rawobj *source)
68 {
69 git_odb_object *object = git__malloc(sizeof(git_odb_object));
70 memset(object, 0x0, sizeof(git_odb_object));
71
72 git_oid_cpy(&object->cached.oid, oid);
73 memcpy(&object->raw, source, sizeof(git_rawobj));
74
75 return object;
76 }
77
78 static void free_odb_object(void *o)
79 {
80 git_odb_object *object = (git_odb_object *)o;
81
82 if (object != NULL) {
83 git__free(object->raw.data);
84 git__free(object);
85 }
86 }
87
88 const git_oid *git_odb_object_id(git_odb_object *object)
89 {
90 return &object->cached.oid;
91 }
92
93 const void *git_odb_object_data(git_odb_object *object)
94 {
95 return object->raw.data;
96 }
97
98 size_t git_odb_object_size(git_odb_object *object)
99 {
100 return object->raw.len;
101 }
102
103 git_otype git_odb_object_type(git_odb_object *object)
104 {
105 return object->raw.type;
106 }
107
108 void git_odb_object_free(git_odb_object *object)
109 {
110 git_cached_obj_decref((git_cached_obj *)object, &free_odb_object);
111 }
112
113 int git_odb__hashfd(git_oid *out, git_file fd, size_t size, git_otype type)
114 {
115 int hdr_len;
116 char hdr[64], buffer[2048];
117 git_hash_ctx *ctx;
118 ssize_t read_len = 0;
119
120 if (!git_object_typeisloose(type)) {
121 giterr_set(GITERR_INVALID, "Invalid object type for hash");
122 return -1;
123 }
124
125 hdr_len = format_object_header(hdr, sizeof(hdr), size, type);
126
127 ctx = git_hash_new_ctx();
128 GITERR_CHECK_ALLOC(ctx);
129
130 git_hash_update(ctx, hdr, hdr_len);
131
132 while (size > 0 && (read_len = p_read(fd, buffer, sizeof(buffer))) > 0) {
133 git_hash_update(ctx, buffer, read_len);
134 size -= read_len;
135 }
136
137 /* If p_read returned an error code, the read obviously failed.
138 * If size is not zero, the file was truncated after we originally
139 * stat'd it, so we consider this a read failure too */
140 if (read_len < 0 || size > 0) {
141 git_hash_free_ctx(ctx);
142 giterr_set(GITERR_OS, "Error reading file for hashing");
143 return -1;
144 }
145
146 git_hash_final(out, ctx);
147 git_hash_free_ctx(ctx);
148
149 return 0;
150 }
151
152 int git_odb__hashfd_filtered(
153 git_oid *out, git_file fd, size_t size, git_otype type, git_vector *filters)
154 {
155 int error;
156 git_buf raw = GIT_BUF_INIT;
157 git_buf filtered = GIT_BUF_INIT;
158
159 if (!filters || !filters->length)
160 return git_odb__hashfd(out, fd, size, type);
161
162 /* size of data is used in header, so we have to read the whole file
163 * into memory to apply filters before beginning to calculate the hash
164 */
165
166 if (!(error = git_futils_readbuffer_fd(&raw, fd, size)))
167 error = git_filters_apply(&filtered, &raw, filters);
168
169 git_buf_free(&raw);
170
171 if (!error)
172 error = git_odb_hash(out, filtered.ptr, filtered.size, type);
173
174 git_buf_free(&filtered);
175
176 return error;
177 }
178
179 int git_odb__hashlink(git_oid *out, const char *path)
180 {
181 struct stat st;
182 git_off_t size;
183 int result;
184
185 if (git_path_lstat(path, &st) < 0)
186 return -1;
187
188 size = st.st_size;
189
190 if (!git__is_sizet(size)) {
191 giterr_set(GITERR_OS, "File size overflow for 32-bit systems");
192 return -1;
193 }
194
195 if (S_ISLNK(st.st_mode)) {
196 char *link_data;
197 ssize_t read_len;
198
199 link_data = git__malloc((size_t)(size + 1));
200 GITERR_CHECK_ALLOC(link_data);
201
202 read_len = p_readlink(path, link_data, (size_t)size);
203 link_data[size] = '\0';
204 if (read_len != (ssize_t)size) {
205 giterr_set(GITERR_OS, "Failed to read symlink data for '%s'", path);
206 return -1;
207 }
208
209 result = git_odb_hash(out, link_data, (size_t)size, GIT_OBJ_BLOB);
210 git__free(link_data);
211 } else {
212 int fd = git_futils_open_ro(path);
213 if (fd < 0)
214 return -1;
215 result = git_odb__hashfd(out, fd, (size_t)size, GIT_OBJ_BLOB);
216 p_close(fd);
217 }
218
219 return result;
220 }
221
222 int git_odb_hashfile(git_oid *out, const char *path, git_otype type)
223 {
224 git_off_t size;
225 int result, fd = git_futils_open_ro(path);
226 if (fd < 0)
227 return fd;
228
229 if ((size = git_futils_filesize(fd)) < 0 || !git__is_sizet(size)) {
230 giterr_set(GITERR_OS, "File size overflow for 32-bit systems");
231 p_close(fd);
232 return -1;
233 }
234
235 result = git_odb__hashfd(out, fd, (size_t)size, type);
236 p_close(fd);
237 return result;
238 }
239
240 int git_odb_hash(git_oid *id, const void *data, size_t len, git_otype type)
241 {
242 git_rawobj raw;
243
244 assert(id);
245
246 raw.data = (void *)data;
247 raw.len = len;
248 raw.type = type;
249
250 return git_odb__hashobj(id, &raw);
251 }
252
253 /**
254 * FAKE WSTREAM
255 */
256
257 typedef struct {
258 git_odb_stream stream;
259 char *buffer;
260 size_t size, written;
261 git_otype type;
262 } fake_wstream;
263
264 static int fake_wstream__fwrite(git_oid *oid, git_odb_stream *_stream)
265 {
266 fake_wstream *stream = (fake_wstream *)_stream;
267 return _stream->backend->write(oid, _stream->backend, stream->buffer, stream->size, stream->type);
268 }
269
270 static int fake_wstream__write(git_odb_stream *_stream, const char *data, size_t len)
271 {
272 fake_wstream *stream = (fake_wstream *)_stream;
273
274 if (stream->written + len > stream->size)
275 return -1;
276
277 memcpy(stream->buffer + stream->written, data, len);
278 stream->written += len;
279 return 0;
280 }
281
282 static void fake_wstream__free(git_odb_stream *_stream)
283 {
284 fake_wstream *stream = (fake_wstream *)_stream;
285
286 git__free(stream->buffer);
287 git__free(stream);
288 }
289
290 static int init_fake_wstream(git_odb_stream **stream_p, git_odb_backend *backend, size_t size, git_otype type)
291 {
292 fake_wstream *stream;
293
294 stream = git__calloc(1, sizeof(fake_wstream));
295 GITERR_CHECK_ALLOC(stream);
296
297 stream->size = size;
298 stream->type = type;
299 stream->buffer = git__malloc(size);
300 if (stream->buffer == NULL) {
301 git__free(stream);
302 return -1;
303 }
304
305 stream->stream.backend = backend;
306 stream->stream.read = NULL; /* read only */
307 stream->stream.write = &fake_wstream__write;
308 stream->stream.finalize_write = &fake_wstream__fwrite;
309 stream->stream.free = &fake_wstream__free;
310 stream->stream.mode = GIT_STREAM_WRONLY;
311
312 *stream_p = (git_odb_stream *)stream;
313 return 0;
314 }
315
316 /***********************************************************
317 *
318 * OBJECT DATABASE PUBLIC API
319 *
320 * Public calls for the ODB functionality
321 *
322 ***********************************************************/
323
324 static int backend_sort_cmp(const void *a, const void *b)
325 {
326 const backend_internal *backend_a = (const backend_internal *)(a);
327 const backend_internal *backend_b = (const backend_internal *)(b);
328
329 if (backend_a->is_alternate == backend_b->is_alternate)
330 return (backend_b->priority - backend_a->priority);
331
332 return backend_a->is_alternate ? 1 : -1;
333 }
334
335 int git_odb_new(git_odb **out)
336 {
337 git_odb *db = git__calloc(1, sizeof(*db));
338 GITERR_CHECK_ALLOC(db);
339
340 if (git_cache_init(&db->cache, GIT_DEFAULT_CACHE_SIZE, &free_odb_object) < 0 ||
341 git_vector_init(&db->backends, 4, backend_sort_cmp) < 0)
342 {
343 git__free(db);
344 return -1;
345 }
346
347 *out = db;
348 GIT_REFCOUNT_INC(db);
349 return 0;
350 }
351
352 static int add_backend_internal(git_odb *odb, git_odb_backend *backend, int priority, int is_alternate)
353 {
354 backend_internal *internal;
355
356 assert(odb && backend);
357
358 /* Check if the backend is already owned by another ODB */
359 assert(!backend->odb || backend->odb == odb);
360
361 internal = git__malloc(sizeof(backend_internal));
362 GITERR_CHECK_ALLOC(internal);
363
364 internal->backend = backend;
365 internal->priority = priority;
366 internal->is_alternate = is_alternate;
367
368 if (git_vector_insert(&odb->backends, internal) < 0) {
369 git__free(internal);
370 return -1;
371 }
372
373 git_vector_sort(&odb->backends);
374 internal->backend->odb = odb;
375 return 0;
376 }
377
378 int git_odb_add_backend(git_odb *odb, git_odb_backend *backend, int priority)
379 {
380 return add_backend_internal(odb, backend, priority, 0);
381 }
382
383 int git_odb_add_alternate(git_odb *odb, git_odb_backend *backend, int priority)
384 {
385 return add_backend_internal(odb, backend, priority, 1);
386 }
387
388 static int add_default_backends(git_odb *db, const char *objects_dir, int as_alternates)
389 {
390 git_odb_backend *loose, *packed;
391
392 /* add the loose object backend */
393 if (git_odb_backend_loose(&loose, objects_dir, -1, 0) < 0 ||
394 add_backend_internal(db, loose, GIT_LOOSE_PRIORITY, as_alternates) < 0)
395 return -1;
396
397 /* add the packed file backend */
398 if (git_odb_backend_pack(&packed, objects_dir) < 0 ||
399 add_backend_internal(db, packed, GIT_PACKED_PRIORITY, as_alternates) < 0)
400 return -1;
401
402 return 0;
403 }
404
405 static int load_alternates(git_odb *odb, const char *objects_dir)
406 {
407 git_buf alternates_path = GIT_BUF_INIT;
408 git_buf alternates_buf = GIT_BUF_INIT;
409 char *buffer;
410 const char *alternate;
411 int result = 0;
412
413 if (git_buf_joinpath(&alternates_path, objects_dir, GIT_ALTERNATES_FILE) < 0)
414 return -1;
415
416 if (git_path_exists(alternates_path.ptr) == false) {
417 git_buf_free(&alternates_path);
418 return 0;
419 }
420
421 if (git_futils_readbuffer(&alternates_buf, alternates_path.ptr) < 0) {
422 git_buf_free(&alternates_path);
423 return -1;
424 }
425
426 buffer = (char *)alternates_buf.ptr;
427
428 /* add each alternate as a new backend; one alternate per line */
429 while ((alternate = git__strtok(&buffer, "\r\n")) != NULL) {
430 if (*alternate == '\0' || *alternate == '#')
431 continue;
432
433 /* relative path: build based on the current `objects` folder */
434 if (*alternate == '.') {
435 if ((result = git_buf_joinpath(&alternates_path, objects_dir, alternate)) < 0)
436 break;
437 alternate = git_buf_cstr(&alternates_path);
438 }
439
440 if ((result = add_default_backends(odb, alternate, 1)) < 0)
441 break;
442 }
443
444 git_buf_free(&alternates_path);
445 git_buf_free(&alternates_buf);
446
447 return result;
448 }
449
450 int git_odb_open(git_odb **out, const char *objects_dir)
451 {
452 git_odb *db;
453
454 assert(out && objects_dir);
455
456 *out = NULL;
457
458 if (git_odb_new(&db) < 0)
459 return -1;
460
461 if (add_default_backends(db, objects_dir, 0) < 0 ||
462 load_alternates(db, objects_dir) < 0)
463 {
464 git_odb_free(db);
465 return -1;
466 }
467
468 *out = db;
469 return 0;
470 }
471
472 static void odb_free(git_odb *db)
473 {
474 unsigned int i;
475
476 for (i = 0; i < db->backends.length; ++i) {
477 backend_internal *internal = git_vector_get(&db->backends, i);
478 git_odb_backend *backend = internal->backend;
479
480 if (backend->free) backend->free(backend);
481 else git__free(backend);
482
483 git__free(internal);
484 }
485
486 git_vector_free(&db->backends);
487 git_cache_free(&db->cache);
488 git__free(db);
489 }
490
491 void git_odb_free(git_odb *db)
492 {
493 if (db == NULL)
494 return;
495
496 GIT_REFCOUNT_DEC(db, odb_free);
497 }
498
499 int git_odb_exists(git_odb *db, const git_oid *id)
500 {
501 git_odb_object *object;
502 unsigned int i;
503 bool found = false;
504
505 assert(db && id);
506
507 if ((object = git_cache_get(&db->cache, id)) != NULL) {
508 git_odb_object_free(object);
509 return (int)true;
510 }
511
512 for (i = 0; i < db->backends.length && !found; ++i) {
513 backend_internal *internal = git_vector_get(&db->backends, i);
514 git_odb_backend *b = internal->backend;
515
516 if (b->exists != NULL)
517 found = b->exists(b, id);
518 }
519
520 return (int)found;
521 }
522
523 int git_odb_read_header(size_t *len_p, git_otype *type_p, git_odb *db, const git_oid *id)
524 {
525 int error;
526 git_odb_object *object;
527
528 error = git_odb__read_header_or_object(&object, len_p, type_p, db, id);
529
530 if (object)
531 git_odb_object_free(object);
532
533 return error;
534 }
535
536 int git_odb__read_header_or_object(
537 git_odb_object **out, size_t *len_p, git_otype *type_p,
538 git_odb *db, const git_oid *id)
539 {
540 unsigned int i;
541 int error = GIT_ENOTFOUND;
542 git_odb_object *object;
543
544 assert(db && id && out && len_p && type_p);
545
546 if ((object = git_cache_get(&db->cache, id)) != NULL) {
547 *len_p = object->raw.len;
548 *type_p = object->raw.type;
549 *out = object;
550 return 0;
551 }
552
553 *out = NULL;
554
555 for (i = 0; i < db->backends.length && error < 0; ++i) {
556 backend_internal *internal = git_vector_get(&db->backends, i);
557 git_odb_backend *b = internal->backend;
558
559 if (b->read_header != NULL)
560 error = b->read_header(len_p, type_p, b, id);
561 }
562
563 if (!error || error == GIT_PASSTHROUGH)
564 return 0;
565
566 /*
567 * no backend could read only the header.
568 * try reading the whole object and freeing the contents
569 */
570 if ((error = git_odb_read(&object, db, id)) < 0)
571 return error; /* error already set - pass along */
572
573 *len_p = object->raw.len;
574 *type_p = object->raw.type;
575 *out = object;
576
577 return 0;
578 }
579
580 int git_odb_read(git_odb_object **out, git_odb *db, const git_oid *id)
581 {
582 unsigned int i;
583 int error = GIT_ENOTFOUND;
584 git_rawobj raw;
585
586 assert(out && db && id);
587
588 *out = git_cache_get(&db->cache, id);
589 if (*out != NULL)
590 return 0;
591
592 for (i = 0; i < db->backends.length && error < 0; ++i) {
593 backend_internal *internal = git_vector_get(&db->backends, i);
594 git_odb_backend *b = internal->backend;
595
596 if (b->read != NULL)
597 error = b->read(&raw.data, &raw.len, &raw.type, b, id);
598 }
599
600 /* TODO: If no backends are configured, this returns GIT_ENOTFOUND but
601 * will never have called giterr_set().
602 */
603
604 if (error && error != GIT_PASSTHROUGH)
605 return error;
606
607 *out = git_cache_try_store(&db->cache, new_odb_object(id, &raw));
608 return 0;
609 }
610
611 int git_odb_read_prefix(
612 git_odb_object **out, git_odb *db, const git_oid *short_id, size_t len)
613 {
614 unsigned int i;
615 int error = GIT_ENOTFOUND;
616 git_oid found_full_oid = {{0}};
617 git_rawobj raw;
618 void *data = NULL;
619 bool found = false;
620
621 assert(out && db);
622
623 if (len < GIT_OID_MINPREFIXLEN)
624 return git_odb__error_ambiguous("prefix length too short");
625
626 if (len > GIT_OID_HEXSZ)
627 len = GIT_OID_HEXSZ;
628
629 if (len == GIT_OID_HEXSZ) {
630 *out = git_cache_get(&db->cache, short_id);
631 if (*out != NULL)
632 return 0;
633 }
634
635 for (i = 0; i < db->backends.length; ++i) {
636 backend_internal *internal = git_vector_get(&db->backends, i);
637 git_odb_backend *b = internal->backend;
638
639 if (b->read != NULL) {
640 git_oid full_oid;
641 error = b->read_prefix(&full_oid, &raw.data, &raw.len, &raw.type, b, short_id, len);
642 if (error == GIT_ENOTFOUND || error == GIT_PASSTHROUGH)
643 continue;
644
645 if (error)
646 return error;
647
648 git__free(data);
649 data = raw.data;
650 if (found && git_oid_cmp(&full_oid, &found_full_oid))
651 return git_odb__error_ambiguous("multiple matches for prefix");
652 found_full_oid = full_oid;
653 found = true;
654 }
655 }
656
657 if (!found)
658 return git_odb__error_notfound("no match for prefix", short_id);
659
660 *out = git_cache_try_store(&db->cache, new_odb_object(&found_full_oid, &raw));
661 return 0;
662 }
663
664 int git_odb_foreach(git_odb *db, int (*cb)(git_oid *oid, void *data), void *data)
665 {
666 unsigned int i;
667 backend_internal *internal;
668
669 git_vector_foreach(&db->backends, i, internal) {
670 git_odb_backend *b = internal->backend;
671 int error = b->foreach(b, cb, data);
672 if (error < 0)
673 return error;
674 }
675
676 return 0;
677 }
678
679 int git_odb_write(
680 git_oid *oid, git_odb *db, const void *data, size_t len, git_otype type)
681 {
682 unsigned int i;
683 int error = GIT_ERROR;
684 git_odb_stream *stream;
685
686 assert(oid && db);
687
688 for (i = 0; i < db->backends.length && error < 0; ++i) {
689 backend_internal *internal = git_vector_get(&db->backends, i);
690 git_odb_backend *b = internal->backend;
691
692 /* we don't write in alternates! */
693 if (internal->is_alternate)
694 continue;
695
696 if (b->write != NULL)
697 error = b->write(oid, b, data, len, type);
698 }
699
700 if (!error || error == GIT_PASSTHROUGH)
701 return 0;
702
703 /* if no backends were able to write the object directly, we try a streaming
704 * write to the backends; just write the whole object into the stream in one
705 * push */
706
707 if ((error = git_odb_open_wstream(&stream, db, len, type)) != 0)
708 return error;
709
710 stream->write(stream, data, len);
711 error = stream->finalize_write(oid, stream);
712 stream->free(stream);
713
714 return error;
715 }
716
717 int git_odb_open_wstream(
718 git_odb_stream **stream, git_odb *db, size_t size, git_otype type)
719 {
720 unsigned int i;
721 int error = GIT_ERROR;
722
723 assert(stream && db);
724
725 for (i = 0; i < db->backends.length && error < 0; ++i) {
726 backend_internal *internal = git_vector_get(&db->backends, i);
727 git_odb_backend *b = internal->backend;
728
729 /* we don't write in alternates! */
730 if (internal->is_alternate)
731 continue;
732
733 if (b->writestream != NULL)
734 error = b->writestream(stream, b, size, type);
735 else if (b->write != NULL)
736 error = init_fake_wstream(stream, b, size, type);
737 }
738
739 if (error == GIT_PASSTHROUGH)
740 error = 0;
741
742 return error;
743 }
744
745 int git_odb_open_rstream(git_odb_stream **stream, git_odb *db, const git_oid *oid)
746 {
747 unsigned int i;
748 int error = GIT_ERROR;
749
750 assert(stream && db);
751
752 for (i = 0; i < db->backends.length && error < 0; ++i) {
753 backend_internal *internal = git_vector_get(&db->backends, i);
754 git_odb_backend *b = internal->backend;
755
756 if (b->readstream != NULL)
757 error = b->readstream(stream, b, oid);
758 }
759
760 if (error == GIT_PASSTHROUGH)
761 error = 0;
762
763 return error;
764 }
765
766 void * git_odb_backend_malloc(git_odb_backend *backend, size_t len)
767 {
768 GIT_UNUSED(backend);
769 return git__malloc(len);
770 }
771
772 int git_odb__error_notfound(const char *message, const git_oid *oid)
773 {
774 if (oid != NULL) {
775 char oid_str[GIT_OID_HEXSZ + 1];
776 git_oid_tostr(oid_str, sizeof(oid_str), oid);
777 giterr_set(GITERR_ODB, "Object not found - %s (%s)", message, oid_str);
778 } else
779 giterr_set(GITERR_ODB, "Object not found - %s", message);
780
781 return GIT_ENOTFOUND;
782 }
783
784 int git_odb__error_ambiguous(const char *message)
785 {
786 giterr_set(GITERR_ODB, "Ambiguous SHA1 prefix - %s", message);
787 return GIT_EAMBIGUOUS;
788 }
789