]> git.proxmox.com Git - libgit2.git/blob - src/odb_loose.c
I broke your bindings
[libgit2.git] / src / odb_loose.c
1 /*
2 * This file is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2,
4 * as published by the Free Software Foundation.
5 *
6 * In addition to the permissions in the GNU General Public License,
7 * the authors give you unlimited permission to link the compiled
8 * version of this file into combinations with other programs,
9 * and to distribute those combinations without any restriction
10 * coming from the use of this file. (The General Public License
11 * restrictions do apply in other respects; for example, they cover
12 * modification of the file, and distribution when not linked into
13 * a combined executable.)
14 *
15 * This file is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; see the file COPYING. If not, write to
22 * the Free Software Foundation, 51 Franklin Street, Fifth Floor,
23 * Boston, MA 02110-1301, USA.
24 */
25
26 #include "common.h"
27 #include "git2/zlib.h"
28 #include "git2/object.h"
29 #include "fileops.h"
30 #include "hash.h"
31 #include "odb.h"
32 #include "delta-apply.h"
33 #include "filebuf.h"
34
35 #include "git2/odb_backend.h"
36 #include "git2/types.h"
37
38 typedef struct { /* object header data */
39 git_otype type; /* object type */
40 size_t size; /* object size */
41 } obj_hdr;
42
43 typedef struct {
44 git_odb_stream stream;
45 git_filebuf fbuf;
46 int finished;
47 } loose_writestream;
48
49 typedef struct loose_backend {
50 git_odb_backend parent;
51
52 int object_zlib_level; /** loose object zlib compression level. */
53 int fsync_object_files; /** loose object file fsync flag. */
54 char *objects_dir;
55 } loose_backend;
56
57
58 /***********************************************************
59 *
60 * MISCELANEOUS HELPER FUNCTIONS
61 *
62 ***********************************************************/
63
64 static size_t object_file_name(char *name, size_t n, char *dir, const git_oid *id)
65 {
66 size_t len = strlen(dir);
67
68 /* check length: 43 = 40 hex sha1 chars + 2 * '/' + '\0' */
69 if (len+43 > n)
70 return len+43;
71
72 /* the object dir: eg $GIT_DIR/objects */
73 strcpy(name, dir);
74 if (name[len-1] != '/')
75 name[len++] = '/';
76
77 /* loose object filename: aa/aaa... (41 bytes) */
78 git_oid_pathfmt(&name[len], id);
79 name[len+41] = '\0';
80
81 return 0;
82 }
83
84
85 static size_t get_binary_object_header(obj_hdr *hdr, gitfo_buf *obj)
86 {
87 unsigned char c;
88 unsigned char *data = obj->data;
89 size_t shift, size, used = 0;
90
91 if (obj->len == 0)
92 return 0;
93
94 c = data[used++];
95 hdr->type = (c >> 4) & 7;
96
97 size = c & 15;
98 shift = 4;
99 while (c & 0x80) {
100 if (obj->len <= used)
101 return 0;
102 if (sizeof(size_t) * 8 <= shift)
103 return 0;
104 c = data[used++];
105 size += (c & 0x7f) << shift;
106 shift += 7;
107 }
108 hdr->size = size;
109
110 return used;
111 }
112
113 static size_t get_object_header(obj_hdr *hdr, unsigned char *data)
114 {
115 char c, typename[10];
116 size_t size, used = 0;
117
118 /*
119 * type name string followed by space.
120 */
121 while ((c = data[used]) != ' ') {
122 typename[used++] = c;
123 if (used >= sizeof(typename))
124 return 0;
125 }
126 typename[used] = 0;
127 if (used == 0)
128 return 0;
129 hdr->type = git_object_string2type(typename);
130 used++; /* consume the space */
131
132 /*
133 * length follows immediately in decimal (without
134 * leading zeros).
135 */
136 size = data[used++] - '0';
137 if (size > 9)
138 return 0;
139 if (size) {
140 while ((c = data[used]) != '\0') {
141 size_t d = c - '0';
142 if (d > 9)
143 break;
144 used++;
145 size = size * 10 + d;
146 }
147 }
148 hdr->size = size;
149
150 /*
151 * the length must be followed by a zero byte
152 */
153 if (data[used++] != '\0')
154 return 0;
155
156 return used;
157 }
158
159
160
161 /***********************************************************
162 *
163 * ZLIB RELATED FUNCTIONS
164 *
165 ***********************************************************/
166
167 static void init_stream(z_stream *s, void *out, size_t len)
168 {
169 memset(s, 0, sizeof(*s));
170 s->next_out = out;
171 s->avail_out = len;
172 }
173
174 static void set_stream_input(z_stream *s, void *in, size_t len)
175 {
176 s->next_in = in;
177 s->avail_in = len;
178 }
179
180 static void set_stream_output(z_stream *s, void *out, size_t len)
181 {
182 s->next_out = out;
183 s->avail_out = len;
184 }
185
186
187 static int start_inflate(z_stream *s, gitfo_buf *obj, void *out, size_t len)
188 {
189 int status;
190
191 init_stream(s, out, len);
192 set_stream_input(s, obj->data, obj->len);
193
194 if ((status = inflateInit(s)) < Z_OK)
195 return status;
196
197 return inflate(s, 0);
198 }
199
200 static int finish_inflate(z_stream *s)
201 {
202 int status = Z_OK;
203
204 while (status == Z_OK)
205 status = inflate(s, Z_FINISH);
206
207 inflateEnd(s);
208
209 if ((status != Z_STREAM_END) || (s->avail_in != 0))
210 return GIT_ERROR;
211
212 return GIT_SUCCESS;
213 }
214
215 static int is_zlib_compressed_data(unsigned char *data)
216 {
217 unsigned int w;
218
219 w = ((unsigned int)(data[0]) << 8) + data[1];
220 return data[0] == 0x78 && !(w % 31);
221 }
222
223 static int inflate_buffer(void *in, size_t inlen, void *out, size_t outlen)
224 {
225 z_stream zs;
226 int status = Z_OK;
227
228 memset(&zs, 0x0, sizeof(zs));
229
230 zs.next_out = out;
231 zs.avail_out = outlen;
232
233 zs.next_in = in;
234 zs.avail_in = inlen;
235
236 if (inflateInit(&zs) < Z_OK)
237 return GIT_ERROR;
238
239 while (status == Z_OK)
240 status = inflate(&zs, Z_FINISH);
241
242 inflateEnd(&zs);
243
244 if ((status != Z_STREAM_END) /*|| (zs.avail_in != 0) */)
245 return GIT_ERROR;
246
247 if (zs.total_out != outlen)
248 return GIT_ERROR;
249
250 return GIT_SUCCESS;
251 }
252
253 static void *inflate_tail(z_stream *s, void *hb, size_t used, obj_hdr *hdr)
254 {
255 unsigned char *buf, *head = hb;
256 size_t tail;
257
258 /*
259 * allocate a buffer to hold the inflated data and copy the
260 * initial sequence of inflated data from the tail of the
261 * head buffer, if any.
262 */
263 if ((buf = git__malloc(hdr->size + 1)) == NULL) {
264 inflateEnd(s);
265 return NULL;
266 }
267 tail = s->total_out - used;
268 if (used > 0 && tail > 0) {
269 if (tail > hdr->size)
270 tail = hdr->size;
271 memcpy(buf, head + used, tail);
272 }
273 used = tail;
274
275 /*
276 * inflate the remainder of the object data, if any
277 */
278 if (hdr->size < used)
279 inflateEnd(s);
280 else {
281 set_stream_output(s, buf + used, hdr->size - used);
282 if (finish_inflate(s)) {
283 free(buf);
284 return NULL;
285 }
286 }
287
288 return buf;
289 }
290
291 /*
292 * At one point, there was a loose object format that was intended to
293 * mimic the format used in pack-files. This was to allow easy copying
294 * of loose object data into packs. This format is no longer used, but
295 * we must still read it.
296 */
297 static int inflate_packlike_loose_disk_obj(git_rawobj *out, gitfo_buf *obj)
298 {
299 unsigned char *in, *buf;
300 obj_hdr hdr;
301 size_t len, used;
302
303 /*
304 * read the object header, which is an (uncompressed)
305 * binary encoding of the object type and size.
306 */
307 if ((used = get_binary_object_header(&hdr, obj)) == 0)
308 return GIT_ERROR;
309
310 if (!git_object_typeisloose(hdr.type))
311 return GIT_ERROR;
312
313 /*
314 * allocate a buffer and inflate the data into it
315 */
316 buf = git__malloc(hdr.size + 1);
317 if (!buf)
318 return GIT_ERROR;
319
320 in = ((unsigned char *)obj->data) + used;
321 len = obj->len - used;
322 if (inflate_buffer(in, len, buf, hdr.size)) {
323 free(buf);
324 return GIT_ERROR;
325 }
326 buf[hdr.size] = '\0';
327
328 out->data = buf;
329 out->len = hdr.size;
330 out->type = hdr.type;
331
332 return GIT_SUCCESS;
333 }
334
335 static int inflate_disk_obj(git_rawobj *out, gitfo_buf *obj)
336 {
337 unsigned char head[64], *buf;
338 z_stream zs;
339 int z_status;
340 obj_hdr hdr;
341 size_t used;
342
343 /*
344 * check for a pack-like loose object
345 */
346 if (!is_zlib_compressed_data(obj->data))
347 return inflate_packlike_loose_disk_obj(out, obj);
348
349 /*
350 * inflate the initial part of the io buffer in order
351 * to parse the object header (type and size).
352 */
353 if ((z_status = start_inflate(&zs, obj, head, sizeof(head))) < Z_OK)
354 return GIT_ERROR;
355
356 if ((used = get_object_header(&hdr, head)) == 0)
357 return GIT_ERROR;
358
359 if (!git_object_typeisloose(hdr.type))
360 return GIT_ERROR;
361
362 /*
363 * allocate a buffer and inflate the object data into it
364 * (including the initial sequence in the head buffer).
365 */
366 if ((buf = inflate_tail(&zs, head, used, &hdr)) == NULL)
367 return GIT_ERROR;
368 buf[hdr.size] = '\0';
369
370 out->data = buf;
371 out->len = hdr.size;
372 out->type = hdr.type;
373
374 return GIT_SUCCESS;
375 }
376
377
378
379
380
381
382 /***********************************************************
383 *
384 * ODB OBJECT READING & WRITING
385 *
386 * Backend for the public API; read headers and full objects
387 * from the ODB. Write raw data to the ODB.
388 *
389 ***********************************************************/
390
391 static int read_loose(git_rawobj *out, const char *loc)
392 {
393 int error;
394 gitfo_buf obj = GITFO_BUF_INIT;
395
396 assert(out && loc);
397
398 out->data = NULL;
399 out->len = 0;
400 out->type = GIT_OBJ_BAD;
401
402 if (gitfo_read_file(&obj, loc) < 0)
403 return GIT_ENOTFOUND;
404
405 error = inflate_disk_obj(out, &obj);
406 gitfo_free_buf(&obj);
407
408 return error;
409 }
410
411 static int read_header_loose(git_rawobj *out, const char *loc)
412 {
413 int error = GIT_SUCCESS, z_return = Z_ERRNO, read_bytes;
414 git_file fd;
415 z_stream zs;
416 obj_hdr header_obj;
417 unsigned char raw_buffer[16], inflated_buffer[64];
418
419 assert(out && loc);
420
421 out->data = NULL;
422
423 if ((fd = gitfo_open(loc, O_RDONLY)) < 0)
424 return GIT_ENOTFOUND;
425
426 init_stream(&zs, inflated_buffer, sizeof(inflated_buffer));
427
428 if (inflateInit(&zs) < Z_OK) {
429 error = GIT_EZLIB;
430 goto cleanup;
431 }
432
433 do {
434 if ((read_bytes = read(fd, raw_buffer, sizeof(raw_buffer))) > 0) {
435 set_stream_input(&zs, raw_buffer, read_bytes);
436 z_return = inflate(&zs, 0);
437 }
438 } while (z_return == Z_OK);
439
440 if ((z_return != Z_STREAM_END && z_return != Z_BUF_ERROR)
441 || get_object_header(&header_obj, inflated_buffer) == 0
442 || git_object_typeisloose(header_obj.type) == 0) {
443 error = GIT_EOBJCORRUPTED;
444 goto cleanup;
445 }
446
447 out->len = header_obj.size;
448 out->type = header_obj.type;
449
450 cleanup:
451 finish_inflate(&zs);
452 gitfo_close(fd);
453 return error;
454 }
455
456 static int locate_object(char *object_location, loose_backend *backend, const git_oid *oid)
457 {
458 object_file_name(object_location, GIT_PATH_MAX, backend->objects_dir, oid);
459 return gitfo_exists(object_location);
460 }
461
462
463
464
465
466
467
468
469
470 /***********************************************************
471 *
472 * LOOSE BACKEND PUBLIC API
473 *
474 * Implement the git_odb_backend API calls
475 *
476 ***********************************************************/
477
478 int loose_backend__read_header(size_t *len_p, git_otype *type_p, git_odb_backend *backend, const git_oid *oid)
479 {
480 char object_path[GIT_PATH_MAX];
481 git_rawobj raw;
482 int error;
483
484 assert(backend && oid);
485
486 if (locate_object(object_path, (loose_backend *)backend, oid) < 0)
487 return GIT_ENOTFOUND;
488
489 if ((error = read_header_loose(&raw, object_path)) < GIT_SUCCESS)
490 return error;
491
492 *len_p = raw.len;
493 *type_p = raw.type;
494 return GIT_SUCCESS;
495 }
496
497 int loose_backend__read(void **buffer_p, size_t *len_p, git_otype *type_p, git_odb_backend *backend, const git_oid *oid)
498 {
499 char object_path[GIT_PATH_MAX];
500 git_rawobj raw;
501 int error;
502
503 assert(backend && oid);
504
505 if (locate_object(object_path, (loose_backend *)backend, oid) < 0)
506 return GIT_ENOTFOUND;
507
508 if ((error = read_loose(&raw, object_path)) < GIT_SUCCESS)
509 return error;
510
511 *buffer_p = raw.data;
512 *len_p = raw.len;
513 *type_p = raw.type;
514
515 return GIT_SUCCESS;
516 }
517
518 int loose_backend__exists(git_odb_backend *backend, const git_oid *oid)
519 {
520 char object_path[GIT_PATH_MAX];
521
522 assert(backend && oid);
523
524 return locate_object(object_path, (loose_backend *)backend, oid) == GIT_SUCCESS;
525 }
526
527 int loose_backend__stream_fwrite(git_oid *oid, git_odb_stream *_stream)
528 {
529 loose_writestream *stream = (loose_writestream *)_stream;
530 loose_backend *backend = (loose_backend *)_stream->backend;
531
532 int error;
533 char final_path[GIT_PATH_MAX];
534
535 if ((error = git_filebuf_hash(oid, &stream->fbuf)) < GIT_SUCCESS)
536 return error;
537
538 if (object_file_name(final_path, sizeof(final_path), backend->objects_dir, oid))
539 return GIT_ENOMEM;
540
541 if ((error = gitfo_mkdir_2file(final_path)) < GIT_SUCCESS)
542 return error;
543
544 stream->finished = 1;
545 return git_filebuf_commit_at(&stream->fbuf, final_path);
546 }
547
548 int loose_backend__stream_write(git_odb_stream *_stream, const char *data, size_t len)
549 {
550 loose_writestream *stream = (loose_writestream *)_stream;
551 return git_filebuf_write(&stream->fbuf, data, len);
552 }
553
554 void loose_backend__stream_free(git_odb_stream *_stream)
555 {
556 loose_writestream *stream = (loose_writestream *)_stream;
557
558 if (!stream->finished)
559 git_filebuf_cleanup(&stream->fbuf);
560
561 free(stream);
562 }
563
564 static int format_object_header(char *hdr, size_t n, size_t obj_len, git_otype obj_type)
565 {
566 const char *type_str = git_object_type2string(obj_type);
567 int len = snprintf(hdr, n, "%s %"PRIuZ, type_str, obj_len);
568
569 assert(len > 0); /* otherwise snprintf() is broken */
570 assert(((size_t) len) < n); /* otherwise the caller is broken! */
571
572 if (len < 0 || ((size_t) len) >= n)
573 return GIT_ERROR;
574 return len+1;
575 }
576
577 int loose_backend__stream(git_odb_stream **stream_out, git_odb_backend *_backend, size_t length, git_otype type)
578 {
579 loose_backend *backend;
580 loose_writestream *stream;
581
582 char hdr[64];
583 int hdrlen;
584 int error;
585
586 assert(_backend);
587
588 backend = (loose_backend *)_backend;
589 *stream_out = NULL;
590
591 hdrlen = format_object_header(hdr, sizeof(hdr), length, type);
592 if (hdrlen < GIT_SUCCESS)
593 return GIT_EOBJCORRUPTED;
594
595 stream = git__calloc(1, sizeof(loose_writestream));
596 if (stream == NULL)
597 return GIT_ENOMEM;
598
599 stream->stream.backend = _backend;
600 stream->stream.read = NULL; /* read only */
601 stream->stream.write = &loose_backend__stream_write;
602 stream->stream.finalize_write = &loose_backend__stream_fwrite;
603 stream->stream.free = &loose_backend__stream_free;
604 stream->stream.mode = GIT_STREAM_WRONLY;
605
606 error = git_filebuf_open(&stream->fbuf, NULL,
607 GIT_FILEBUF_HASH_CONTENTS |
608 GIT_FILEBUF_DEFLATE_CONTENTS |
609 GIT_FILEBUF_TEMPORARY);
610
611 if (error < GIT_SUCCESS) {
612 free(stream);
613 return error;
614 }
615
616 error = stream->stream.write((git_odb_stream *)stream, hdr, hdrlen);
617 if (error < GIT_SUCCESS) {
618 git_filebuf_cleanup(&stream->fbuf);
619 free(stream);
620 return error;
621 }
622
623 *stream_out = (git_odb_stream *)stream;
624 return GIT_SUCCESS;
625 }
626
627 void loose_backend__free(git_odb_backend *_backend)
628 {
629 loose_backend *backend;
630 assert(_backend);
631 backend = (loose_backend *)_backend;
632
633 free(backend->objects_dir);
634 free(backend);
635 }
636
637 int git_odb_backend_loose(git_odb_backend **backend_out, const char *objects_dir)
638 {
639 loose_backend *backend;
640
641 backend = git__calloc(1, sizeof(loose_backend));
642 if (backend == NULL)
643 return GIT_ENOMEM;
644
645 backend->objects_dir = git__strdup(objects_dir);
646 if (backend->objects_dir == NULL) {
647 free(backend);
648 return GIT_ENOMEM;
649 }
650
651 backend->object_zlib_level = Z_BEST_SPEED;
652 backend->fsync_object_files = 0;
653
654 backend->parent.read = &loose_backend__read;
655 backend->parent.read_header = &loose_backend__read_header;
656 backend->parent.writestream = &loose_backend__stream;
657 backend->parent.exists = &loose_backend__exists;
658 backend->parent.free = &loose_backend__free;
659
660 *backend_out = (git_odb_backend *)backend;
661 return GIT_SUCCESS;
662 }