]> git.proxmox.com Git - libgit2.git/blob - src/odb.c
Implement filters for status/diff blobs
[libgit2.git] / src / odb.c
1 /*
2 * Copyright (C) 2009-2012 the libgit2 contributors
3 *
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
6 */
7
8 #include "common.h"
9 #include <zlib.h>
10 #include "git2/object.h"
11 #include "fileops.h"
12 #include "hash.h"
13 #include "odb.h"
14 #include "delta-apply.h"
15 #include "filter.h"
16
17 #include "git2/odb_backend.h"
18 #include "git2/oid.h"
19
20 #define GIT_ALTERNATES_FILE "info/alternates"
21
22 /* TODO: is this correct? */
23 #define GIT_LOOSE_PRIORITY 2
24 #define GIT_PACKED_PRIORITY 1
25
26 typedef struct
27 {
28 git_odb_backend *backend;
29 int priority;
30 int is_alternate;
31 } backend_internal;
32
33 static int format_object_header(char *hdr, size_t n, size_t obj_len, git_otype obj_type)
34 {
35 const char *type_str = git_object_type2string(obj_type);
36 int len = p_snprintf(hdr, n, "%s %"PRIuZ, type_str, obj_len);
37 assert(len > 0 && len <= (int)n);
38 return len+1;
39 }
40
41 int git_odb__hashobj(git_oid *id, git_rawobj *obj)
42 {
43 git_buf_vec vec[2];
44 char header[64];
45 int hdrlen;
46
47 assert(id && obj);
48
49 if (!git_object_typeisloose(obj->type))
50 return -1;
51 if (!obj->data && obj->len != 0)
52 return -1;
53
54 hdrlen = format_object_header(header, sizeof(header), obj->len, obj->type);
55
56 vec[0].data = header;
57 vec[0].len = hdrlen;
58 vec[1].data = obj->data;
59 vec[1].len = obj->len;
60
61 git_hash_vec(id, vec, 2);
62
63 return 0;
64 }
65
66
67 static git_odb_object *new_odb_object(const git_oid *oid, git_rawobj *source)
68 {
69 git_odb_object *object = git__malloc(sizeof(git_odb_object));
70 memset(object, 0x0, sizeof(git_odb_object));
71
72 git_oid_cpy(&object->cached.oid, oid);
73 memcpy(&object->raw, source, sizeof(git_rawobj));
74
75 return object;
76 }
77
78 static void free_odb_object(void *o)
79 {
80 git_odb_object *object = (git_odb_object *)o;
81
82 if (object != NULL) {
83 git__free(object->raw.data);
84 git__free(object);
85 }
86 }
87
88 const git_oid *git_odb_object_id(git_odb_object *object)
89 {
90 return &object->cached.oid;
91 }
92
93 const void *git_odb_object_data(git_odb_object *object)
94 {
95 return object->raw.data;
96 }
97
98 size_t git_odb_object_size(git_odb_object *object)
99 {
100 return object->raw.len;
101 }
102
103 git_otype git_odb_object_type(git_odb_object *object)
104 {
105 return object->raw.type;
106 }
107
108 void git_odb_object_free(git_odb_object *object)
109 {
110 git_cached_obj_decref((git_cached_obj *)object, &free_odb_object);
111 }
112
113 int git_odb__hashfd(git_oid *out, git_file fd, size_t size, git_otype type)
114 {
115 int hdr_len;
116 char hdr[64], buffer[2048];
117 git_hash_ctx *ctx;
118
119 hdr_len = format_object_header(hdr, sizeof(hdr), size, type);
120
121 ctx = git_hash_new_ctx();
122 GITERR_CHECK_ALLOC(ctx);
123
124 git_hash_update(ctx, hdr, hdr_len);
125
126 while (size > 0) {
127 ssize_t read_len = p_read(fd, buffer, sizeof(buffer));
128
129 if (read_len < 0) {
130 git_hash_free_ctx(ctx);
131 giterr_set(GITERR_OS, "Error reading file");
132 return -1;
133 }
134
135 git_hash_update(ctx, buffer, read_len);
136 size -= read_len;
137 }
138
139 git_hash_final(out, ctx);
140 git_hash_free_ctx(ctx);
141
142 return 0;
143 }
144
145 int git_odb__hashfd_filtered(
146 git_oid *out, git_file fd, size_t size, git_otype type, git_vector *filters)
147 {
148 int error;
149 git_buf raw = GIT_BUF_INIT;
150 git_buf filtered = GIT_BUF_INIT;
151
152 if (!filters || !filters->length)
153 return git_odb__hashfd(out, fd, size, type);
154
155 /* size of data is used in header, so we have to read the whole file
156 * into memory to apply filters before beginning to calculate the hash
157 */
158
159 if (!(error = git_futils_readbuffer_fd(&raw, fd, size)))
160 error = git_filters_apply(&filtered, &raw, filters);
161
162 git_buf_free(&raw);
163
164 if (!error)
165 error = git_odb_hash(out, filtered.ptr, filtered.size, type);
166
167 git_buf_free(&filtered);
168
169 return error;
170 }
171
172 int git_odb__hashlink(git_oid *out, const char *path)
173 {
174 struct stat st;
175 git_off_t size;
176 int result;
177
178 if (git_path_lstat(path, &st) < 0)
179 return -1;
180
181 size = st.st_size;
182
183 if (!git__is_sizet(size)) {
184 giterr_set(GITERR_OS, "File size overflow for 32-bit systems");
185 return -1;
186 }
187
188 if (S_ISLNK(st.st_mode)) {
189 char *link_data;
190 ssize_t read_len;
191
192 link_data = git__malloc((size_t)size);
193 GITERR_CHECK_ALLOC(link_data);
194
195 read_len = p_readlink(path, link_data, (size_t)(size + 1));
196 if (read_len != (ssize_t)size) {
197 giterr_set(GITERR_OS, "Failed to read symlink data for '%s'", path);
198 return -1;
199 }
200
201 result = git_odb_hash(out, link_data, (size_t)size, GIT_OBJ_BLOB);
202 git__free(link_data);
203 } else {
204 int fd = git_futils_open_ro(path);
205 if (fd < 0)
206 return -1;
207 result = git_odb__hashfd(out, fd, (size_t)size, GIT_OBJ_BLOB);
208 p_close(fd);
209 }
210
211 return result;
212 }
213
214 int git_odb_hashfile(git_oid *out, const char *path, git_otype type)
215 {
216 git_off_t size;
217 int result, fd = git_futils_open_ro(path);
218 if (fd < 0)
219 return fd;
220
221 if ((size = git_futils_filesize(fd)) < 0 || !git__is_sizet(size)) {
222 giterr_set(GITERR_OS, "File size overflow for 32-bit systems");
223 p_close(fd);
224 return -1;
225 }
226
227 result = git_odb__hashfd(out, fd, (size_t)size, type);
228 p_close(fd);
229 return result;
230 }
231
232 int git_odb_hash(git_oid *id, const void *data, size_t len, git_otype type)
233 {
234 git_rawobj raw;
235
236 assert(id);
237
238 raw.data = (void *)data;
239 raw.len = len;
240 raw.type = type;
241
242 return git_odb__hashobj(id, &raw);
243 }
244
245 /**
246 * FAKE WSTREAM
247 */
248
249 typedef struct {
250 git_odb_stream stream;
251 char *buffer;
252 size_t size, written;
253 git_otype type;
254 } fake_wstream;
255
256 static int fake_wstream__fwrite(git_oid *oid, git_odb_stream *_stream)
257 {
258 fake_wstream *stream = (fake_wstream *)_stream;
259 return _stream->backend->write(oid, _stream->backend, stream->buffer, stream->size, stream->type);
260 }
261
262 static int fake_wstream__write(git_odb_stream *_stream, const char *data, size_t len)
263 {
264 fake_wstream *stream = (fake_wstream *)_stream;
265
266 if (stream->written + len > stream->size)
267 return -1;
268
269 memcpy(stream->buffer + stream->written, data, len);
270 stream->written += len;
271 return 0;
272 }
273
274 static void fake_wstream__free(git_odb_stream *_stream)
275 {
276 fake_wstream *stream = (fake_wstream *)_stream;
277
278 git__free(stream->buffer);
279 git__free(stream);
280 }
281
282 static int init_fake_wstream(git_odb_stream **stream_p, git_odb_backend *backend, size_t size, git_otype type)
283 {
284 fake_wstream *stream;
285
286 stream = git__calloc(1, sizeof(fake_wstream));
287 GITERR_CHECK_ALLOC(stream);
288
289 stream->size = size;
290 stream->type = type;
291 stream->buffer = git__malloc(size);
292 if (stream->buffer == NULL) {
293 git__free(stream);
294 return -1;
295 }
296
297 stream->stream.backend = backend;
298 stream->stream.read = NULL; /* read only */
299 stream->stream.write = &fake_wstream__write;
300 stream->stream.finalize_write = &fake_wstream__fwrite;
301 stream->stream.free = &fake_wstream__free;
302 stream->stream.mode = GIT_STREAM_WRONLY;
303
304 *stream_p = (git_odb_stream *)stream;
305 return 0;
306 }
307
308 /***********************************************************
309 *
310 * OBJECT DATABASE PUBLIC API
311 *
312 * Public calls for the ODB functionality
313 *
314 ***********************************************************/
315
316 static int backend_sort_cmp(const void *a, const void *b)
317 {
318 const backend_internal *backend_a = (const backend_internal *)(a);
319 const backend_internal *backend_b = (const backend_internal *)(b);
320
321 if (backend_a->is_alternate == backend_b->is_alternate)
322 return (backend_b->priority - backend_a->priority);
323
324 return backend_a->is_alternate ? 1 : -1;
325 }
326
327 int git_odb_new(git_odb **out)
328 {
329 git_odb *db = git__calloc(1, sizeof(*db));
330 GITERR_CHECK_ALLOC(db);
331
332 if (git_cache_init(&db->cache, GIT_DEFAULT_CACHE_SIZE, &free_odb_object) < 0 ||
333 git_vector_init(&db->backends, 4, backend_sort_cmp) < 0)
334 {
335 git__free(db);
336 return -1;
337 }
338
339 *out = db;
340 GIT_REFCOUNT_INC(db);
341 return 0;
342 }
343
344 static int add_backend_internal(git_odb *odb, git_odb_backend *backend, int priority, int is_alternate)
345 {
346 backend_internal *internal;
347
348 assert(odb && backend);
349
350 /* Check if the backend is already owned by another ODB */
351 assert(!backend->odb || backend->odb == odb);
352
353 internal = git__malloc(sizeof(backend_internal));
354 GITERR_CHECK_ALLOC(internal);
355
356 internal->backend = backend;
357 internal->priority = priority;
358 internal->is_alternate = is_alternate;
359
360 if (git_vector_insert(&odb->backends, internal) < 0) {
361 git__free(internal);
362 return -1;
363 }
364
365 git_vector_sort(&odb->backends);
366 internal->backend->odb = odb;
367 return 0;
368 }
369
370 int git_odb_add_backend(git_odb *odb, git_odb_backend *backend, int priority)
371 {
372 return add_backend_internal(odb, backend, priority, 0);
373 }
374
375 int git_odb_add_alternate(git_odb *odb, git_odb_backend *backend, int priority)
376 {
377 return add_backend_internal(odb, backend, priority, 1);
378 }
379
380 static int add_default_backends(git_odb *db, const char *objects_dir, int as_alternates)
381 {
382 git_odb_backend *loose, *packed;
383
384 /* add the loose object backend */
385 if (git_odb_backend_loose(&loose, objects_dir, -1, 0) < 0 ||
386 add_backend_internal(db, loose, GIT_LOOSE_PRIORITY, as_alternates) < 0)
387 return -1;
388
389 /* add the packed file backend */
390 if (git_odb_backend_pack(&packed, objects_dir) < 0 ||
391 add_backend_internal(db, packed, GIT_PACKED_PRIORITY, as_alternates) < 0)
392 return -1;
393
394 return 0;
395 }
396
397 static int load_alternates(git_odb *odb, const char *objects_dir)
398 {
399 git_buf alternates_path = GIT_BUF_INIT;
400 git_buf alternates_buf = GIT_BUF_INIT;
401 char *buffer;
402 const char *alternate;
403 int result = 0;
404
405 if (git_buf_joinpath(&alternates_path, objects_dir, GIT_ALTERNATES_FILE) < 0)
406 return -1;
407
408 if (git_path_exists(alternates_path.ptr) == false) {
409 git_buf_free(&alternates_path);
410 return 0;
411 }
412
413 if (git_futils_readbuffer(&alternates_buf, alternates_path.ptr) < 0) {
414 git_buf_free(&alternates_path);
415 return -1;
416 }
417
418 buffer = (char *)alternates_buf.ptr;
419
420 /* add each alternate as a new backend; one alternate per line */
421 while ((alternate = git__strtok(&buffer, "\r\n")) != NULL) {
422 if (*alternate == '\0' || *alternate == '#')
423 continue;
424
425 /* relative path: build based on the current `objects` folder */
426 if (*alternate == '.') {
427 if ((result = git_buf_joinpath(&alternates_path, objects_dir, alternate)) < 0)
428 break;
429 alternate = git_buf_cstr(&alternates_path);
430 }
431
432 if ((result = add_default_backends(odb, alternate, 1)) < 0)
433 break;
434 }
435
436 git_buf_free(&alternates_path);
437 git_buf_free(&alternates_buf);
438
439 return result;
440 }
441
442 int git_odb_open(git_odb **out, const char *objects_dir)
443 {
444 git_odb *db;
445
446 assert(out && objects_dir);
447
448 *out = NULL;
449
450 if (git_odb_new(&db) < 0)
451 return -1;
452
453 if (add_default_backends(db, objects_dir, 0) < 0 ||
454 load_alternates(db, objects_dir) < 0)
455 {
456 git_odb_free(db);
457 return -1;
458 }
459
460 *out = db;
461 return 0;
462 }
463
464 static void odb_free(git_odb *db)
465 {
466 unsigned int i;
467
468 for (i = 0; i < db->backends.length; ++i) {
469 backend_internal *internal = git_vector_get(&db->backends, i);
470 git_odb_backend *backend = internal->backend;
471
472 if (backend->free) backend->free(backend);
473 else git__free(backend);
474
475 git__free(internal);
476 }
477
478 git_vector_free(&db->backends);
479 git_cache_free(&db->cache);
480 git__free(db);
481 }
482
483 void git_odb_free(git_odb *db)
484 {
485 if (db == NULL)
486 return;
487
488 GIT_REFCOUNT_DEC(db, odb_free);
489 }
490
491 int git_odb_exists(git_odb *db, const git_oid *id)
492 {
493 git_odb_object *object;
494 unsigned int i;
495 bool found = false;
496
497 assert(db && id);
498
499 if ((object = git_cache_get(&db->cache, id)) != NULL) {
500 git_odb_object_free(object);
501 return (int)true;
502 }
503
504 for (i = 0; i < db->backends.length && !found; ++i) {
505 backend_internal *internal = git_vector_get(&db->backends, i);
506 git_odb_backend *b = internal->backend;
507
508 if (b->exists != NULL)
509 found = b->exists(b, id);
510 }
511
512 return (int)found;
513 }
514
515 int git_odb_read_header(size_t *len_p, git_otype *type_p, git_odb *db, const git_oid *id)
516 {
517 unsigned int i;
518 int error = GIT_ENOTFOUND;
519 git_odb_object *object;
520
521 assert(db && id);
522
523 if ((object = git_cache_get(&db->cache, id)) != NULL) {
524 *len_p = object->raw.len;
525 *type_p = object->raw.type;
526 git_odb_object_free(object);
527 return 0;
528 }
529
530 for (i = 0; i < db->backends.length && error < 0; ++i) {
531 backend_internal *internal = git_vector_get(&db->backends, i);
532 git_odb_backend *b = internal->backend;
533
534 if (b->read_header != NULL)
535 error = b->read_header(len_p, type_p, b, id);
536 }
537
538 if (!error || error == GIT_PASSTHROUGH)
539 return 0;
540
541 /*
542 * no backend could read only the header.
543 * try reading the whole object and freeing the contents
544 */
545 if ((error = git_odb_read(&object, db, id)) < 0)
546 return error; /* error already set - pass along */
547
548 *len_p = object->raw.len;
549 *type_p = object->raw.type;
550 git_odb_object_free(object);
551 return 0;
552 }
553
554 int git_odb_read(git_odb_object **out, git_odb *db, const git_oid *id)
555 {
556 unsigned int i;
557 int error = GIT_ENOTFOUND;
558 git_rawobj raw;
559
560 assert(out && db && id);
561
562 *out = git_cache_get(&db->cache, id);
563 if (*out != NULL)
564 return 0;
565
566 for (i = 0; i < db->backends.length && error < 0; ++i) {
567 backend_internal *internal = git_vector_get(&db->backends, i);
568 git_odb_backend *b = internal->backend;
569
570 if (b->read != NULL)
571 error = b->read(&raw.data, &raw.len, &raw.type, b, id);
572 }
573
574 /* TODO: If no backends are configured, this returns GIT_ENOTFOUND but
575 * will never have called giterr_set().
576 */
577
578 if (error && error != GIT_PASSTHROUGH)
579 return error;
580
581 *out = git_cache_try_store(&db->cache, new_odb_object(id, &raw));
582 return 0;
583 }
584
585 int git_odb_read_prefix(
586 git_odb_object **out, git_odb *db, const git_oid *short_id, size_t len)
587 {
588 unsigned int i;
589 int error = GIT_ENOTFOUND;
590 git_oid found_full_oid = {{0}};
591 git_rawobj raw;
592 void *data = NULL;
593 bool found = false;
594
595 assert(out && db);
596
597 if (len < GIT_OID_MINPREFIXLEN)
598 return git_odb__error_ambiguous("prefix length too short");
599
600 if (len > GIT_OID_HEXSZ)
601 len = GIT_OID_HEXSZ;
602
603 if (len == GIT_OID_HEXSZ) {
604 *out = git_cache_get(&db->cache, short_id);
605 if (*out != NULL)
606 return 0;
607 }
608
609 for (i = 0; i < db->backends.length; ++i) {
610 backend_internal *internal = git_vector_get(&db->backends, i);
611 git_odb_backend *b = internal->backend;
612
613 if (b->read != NULL) {
614 git_oid full_oid;
615 error = b->read_prefix(&full_oid, &raw.data, &raw.len, &raw.type, b, short_id, len);
616 if (error == GIT_ENOTFOUND || error == GIT_PASSTHROUGH)
617 continue;
618
619 if (error)
620 return error;
621
622 git__free(data);
623 data = raw.data;
624 if (found && git_oid_cmp(&full_oid, &found_full_oid))
625 return git_odb__error_ambiguous("multiple matches for prefix");
626 found_full_oid = full_oid;
627 found = true;
628 }
629 }
630
631 if (!found)
632 return git_odb__error_notfound("no match for prefix", short_id);
633
634 *out = git_cache_try_store(&db->cache, new_odb_object(&found_full_oid, &raw));
635 return 0;
636 }
637
638 int git_odb_foreach(git_odb *db, int (*cb)(git_oid *oid, void *data), void *data)
639 {
640 unsigned int i;
641 backend_internal *internal;
642
643 git_vector_foreach(&db->backends, i, internal) {
644 git_odb_backend *b = internal->backend;
645 int error = b->foreach(b, cb, data);
646 if (error < 0)
647 return error;
648 }
649
650 return 0;
651 }
652
653 int git_odb_write(
654 git_oid *oid, git_odb *db, const void *data, size_t len, git_otype type)
655 {
656 unsigned int i;
657 int error = GIT_ERROR;
658 git_odb_stream *stream;
659
660 assert(oid && db);
661
662 for (i = 0; i < db->backends.length && error < 0; ++i) {
663 backend_internal *internal = git_vector_get(&db->backends, i);
664 git_odb_backend *b = internal->backend;
665
666 /* we don't write in alternates! */
667 if (internal->is_alternate)
668 continue;
669
670 if (b->write != NULL)
671 error = b->write(oid, b, data, len, type);
672 }
673
674 if (!error || error == GIT_PASSTHROUGH)
675 return 0;
676
677 /* if no backends were able to write the object directly, we try a streaming
678 * write to the backends; just write the whole object into the stream in one
679 * push */
680
681 if ((error = git_odb_open_wstream(&stream, db, len, type)) != 0)
682 return error;
683
684 stream->write(stream, data, len);
685 error = stream->finalize_write(oid, stream);
686 stream->free(stream);
687
688 return error;
689 }
690
691 int git_odb_open_wstream(
692 git_odb_stream **stream, git_odb *db, size_t size, git_otype type)
693 {
694 unsigned int i;
695 int error = GIT_ERROR;
696
697 assert(stream && db);
698
699 for (i = 0; i < db->backends.length && error < 0; ++i) {
700 backend_internal *internal = git_vector_get(&db->backends, i);
701 git_odb_backend *b = internal->backend;
702
703 /* we don't write in alternates! */
704 if (internal->is_alternate)
705 continue;
706
707 if (b->writestream != NULL)
708 error = b->writestream(stream, b, size, type);
709 else if (b->write != NULL)
710 error = init_fake_wstream(stream, b, size, type);
711 }
712
713 if (error == GIT_PASSTHROUGH)
714 error = 0;
715
716 return error;
717 }
718
719 int git_odb_open_rstream(git_odb_stream **stream, git_odb *db, const git_oid *oid)
720 {
721 unsigned int i;
722 int error = GIT_ERROR;
723
724 assert(stream && db);
725
726 for (i = 0; i < db->backends.length && error < 0; ++i) {
727 backend_internal *internal = git_vector_get(&db->backends, i);
728 git_odb_backend *b = internal->backend;
729
730 if (b->readstream != NULL)
731 error = b->readstream(stream, b, oid);
732 }
733
734 if (error == GIT_PASSTHROUGH)
735 error = 0;
736
737 return error;
738 }
739
740 void * git_odb_backend_malloc(git_odb_backend *backend, size_t len)
741 {
742 GIT_UNUSED(backend);
743 return git__malloc(len);
744 }
745
746 int git_odb__error_notfound(const char *message, const git_oid *oid)
747 {
748 if (oid != NULL) {
749 char oid_str[GIT_OID_HEXSZ + 1];
750 git_oid_tostr(oid_str, sizeof(oid_str), oid);
751 giterr_set(GITERR_ODB, "Object not found - %s (%s)", message, oid_str);
752 } else
753 giterr_set(GITERR_ODB, "Object not found - %s", message);
754
755 return GIT_ENOTFOUND;
756 }
757
758 int git_odb__error_ambiguous(const char *message)
759 {
760 giterr_set(GITERR_ODB, "Ambiguous SHA1 prefix - %s", message);
761 return GIT_EAMBIGUOUS;
762 }
763