]> git.proxmox.com Git - libgit2.git/blob - src/odb.c
Add a pack index 'virtual function' to fetch an index entry
[libgit2.git] / src / odb.c
1 /*
2 * This file is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2,
4 * as published by the Free Software Foundation.
5 *
6 * In addition to the permissions in the GNU General Public License,
7 * the authors give you unlimited permission to link the compiled
8 * version of this file into combinations with other programs,
9 * and to distribute those combinations without any restriction
10 * coming from the use of this file. (The General Public License
11 * restrictions do apply in other respects; for example, they cover
12 * modification of the file, and distribution when not linked into
13 * a combined executable.)
14 *
15 * This file is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; see the file COPYING. If not, write to
22 * the Free Software Foundation, 51 Franklin Street, Fifth Floor,
23 * Boston, MA 02110-1301, USA.
24 */
25
26 #include "common.h"
27 #include "git/odb.h"
28 #include "git/zlib.h"
29 #include "fileops.h"
30 #include "hash.h"
31 #include "odb.h"
32
33 #define GIT_PACK_NAME_MAX (5 + 40 + 1)
34
35 typedef struct {
36 uint32_t n;
37 unsigned char *oid;
38 off_t offset;
39 off_t size;
40 } index_entry;
41
42 struct git_pack {
43 git_odb *db;
44 git_lck lock;
45
46 /** Functions to access idx_map. */
47 int (*idx_search)(
48 uint32_t *,
49 struct git_pack *,
50 const git_oid *);
51 int (*idx_search_offset)(
52 uint32_t *,
53 struct git_pack *,
54 off_t);
55 int (*idx_get)(
56 index_entry *,
57 struct git_pack *,
58 uint32_t n);
59
60 /** The .idx file, mapped into memory. */
61 git_file idx_fd;
62 git_map idx_map;
63 uint32_t *im_fanout;
64 unsigned char *im_oid;
65 uint32_t *im_crc;
66 uint32_t *im_offset32;
67 uint32_t *im_offset64;
68 uint32_t *im_off_idx;
69 uint32_t *im_off_next;
70
71 /** Number of objects in this pack. */
72 uint32_t obj_cnt;
73
74 /** The size of the .pack file. */
75 off_t pack_size;
76
77 /** The mtime of the .pack file. */
78 time_t pack_mtime;
79
80 /** Number of git_packlist we appear in. */
81 unsigned int refcnt;
82
83 /** Number of active users of the idx_map data. */
84 unsigned int idxcnt;
85 unsigned
86 invalid:1 /* the pack is unable to be read by libgit2 */
87 ;
88
89 /** Name of the pack file(s), without extension ("pack-abc"). */
90 char pack_name[GIT_PACK_NAME_MAX];
91 };
92 typedef struct git_pack git_pack;
93
94 typedef struct {
95 size_t n_packs;
96 unsigned int refcnt;
97 git_pack *packs[GIT_FLEX_ARRAY];
98 } git_packlist;
99
100 struct git_odb {
101 git_lck lock;
102
103 /** Path to the "objects" directory. */
104 char *objects_dir;
105
106 /** Known pack files from ${objects_dir}/packs. */
107 git_packlist *packlist;
108
109 /** Alternate databases to search. */
110 git_odb **alternates;
111 size_t n_alternates;
112
113 /** loose object zlib compression level. */
114 int object_zlib_level;
115 /** loose object file fsync flag. */
116 int fsync_object_files;
117 };
118
119 typedef struct { /* object header data */
120 git_otype type; /* object type */
121 size_t size; /* object size */
122 } obj_hdr;
123
124 static struct {
125 const char *str; /* type name string */
126 int loose; /* valid loose object type flag */
127 } obj_type_table[] = {
128 { "", 0 }, /* 0 = GIT_OBJ__EXT1 */
129 { "commit", 1 }, /* 1 = GIT_OBJ_COMMIT */
130 { "tree", 1 }, /* 2 = GIT_OBJ_TREE */
131 { "blob", 1 }, /* 3 = GIT_OBJ_BLOB */
132 { "tag", 1 }, /* 4 = GIT_OBJ_TAG */
133 { "", 0 }, /* 5 = GIT_OBJ__EXT2 */
134 { "OFS_DELTA", 0 }, /* 6 = GIT_OBJ_OFS_DELTA */
135 { "REF_DELTA", 0 } /* 7 = GIT_OBJ_REF_DELTA */
136 };
137
138 GIT_INLINE(uint32_t) decode32(void *b)
139 {
140 return ntohl(*((uint32_t *)b));
141 }
142
143 GIT_INLINE(uint64_t) decode64(void *b)
144 {
145 uint32_t *p = b;
146 return (((uint64_t)ntohl(p[0])) << 32) | ntohl(p[1]);
147 }
148
149 const char *git_obj_type_to_string(git_otype type)
150 {
151 if (type < 0 || ((size_t) type) >= ARRAY_SIZE(obj_type_table))
152 return "";
153 return obj_type_table[type].str;
154 }
155
156 git_otype git_obj_string_to_type(const char *str)
157 {
158 size_t i;
159
160 if (!str || !*str)
161 return GIT_OBJ_BAD;
162
163 for (i = 0; i < ARRAY_SIZE(obj_type_table); i++)
164 if (!strcmp(str, obj_type_table[i].str))
165 return (git_otype) i;
166
167 return GIT_OBJ_BAD;
168 }
169
170 int git_obj__loose_object_type(git_otype type)
171 {
172 if (type < 0 || ((size_t) type) >= ARRAY_SIZE(obj_type_table))
173 return 0;
174 return obj_type_table[type].loose;
175 }
176
177 static int format_object_header(char *hdr, size_t n, git_obj *obj)
178 {
179 const char *type_str = git_obj_type_to_string(obj->type);
180 int len = snprintf(hdr, n, "%s %"PRIuZ, type_str, obj->len);
181
182 assert(len > 0); /* otherwise snprintf() is broken */
183 assert(((size_t) len) < n); /* otherwise the caller is broken! */
184
185 if (len < 0 || ((size_t) len) >= n)
186 return GIT_ERROR;
187 return len+1;
188 }
189
190 static int hash_obj(git_oid *id, char *hdr, size_t n, int *len, git_obj *obj)
191 {
192 git_buf_vec vec[2];
193 int hdrlen;
194
195 assert(id && hdr && len && obj);
196
197 if (!git_obj__loose_object_type(obj->type))
198 return GIT_ERROR;
199
200 if (!obj->data && obj->len != 0)
201 return GIT_ERROR;
202
203 if ((hdrlen = format_object_header(hdr, n, obj)) < 0)
204 return GIT_ERROR;
205
206 *len = hdrlen;
207
208 vec[0].data = hdr;
209 vec[0].len = hdrlen;
210 vec[1].data = obj->data;
211 vec[1].len = obj->len;
212
213 git_hash_vec(id, vec, 2);
214
215 return GIT_SUCCESS;
216 }
217
218 int git_obj_hash(git_oid *id, git_obj *obj)
219 {
220 char hdr[64];
221 int hdrlen;
222
223 assert(id && obj);
224
225 return hash_obj(id, hdr, sizeof(hdr), &hdrlen, obj);
226 }
227
228 static size_t object_file_name(char *name, size_t n, char *dir, const git_oid *id)
229 {
230 size_t len = strlen(dir);
231
232 /* check length: 43 = 40 hex sha1 chars + 2 * '/' + '\0' */
233 if (len+43 > n)
234 return len+43;
235
236 /* the object dir: eg $GIT_DIR/objects */
237 strcpy(name, dir);
238 if (name[len-1] != '/')
239 name[len++] = '/';
240
241 /* loose object filename: aa/aaa... (41 bytes) */
242 git_oid_pathfmt(&name[len], id);
243 name[len+41] = '\0';
244
245 return 0;
246 }
247
248 static int is_zlib_compressed_data(unsigned char *data)
249 {
250 unsigned int w;
251
252 w = ((unsigned int)(data[0]) << 8) + data[1];
253 return data[0] == 0x78 && !(w % 31);
254 }
255
256 static size_t get_binary_object_header(obj_hdr *hdr, gitfo_buf *obj)
257 {
258 unsigned char c;
259 unsigned char *data = obj->data;
260 size_t shift, size, used = 0;
261
262 if (obj->len == 0)
263 return 0;
264
265 c = data[used++];
266 hdr->type = (c >> 4) & 7;
267
268 size = c & 15;
269 shift = 4;
270 while (c & 0x80) {
271 if (obj->len <= used)
272 return 0;
273 if (sizeof(size_t) * 8 <= shift)
274 return 0;
275 c = data[used++];
276 size += (c & 0x7f) << shift;
277 shift += 7;
278 }
279 hdr->size = size;
280
281 return used;
282 }
283
284 static size_t get_object_header(obj_hdr *hdr, unsigned char *data)
285 {
286 char c, typename[10];
287 size_t size, used = 0;
288
289 /*
290 * type name string followed by space.
291 */
292 while ((c = data[used]) != ' ') {
293 typename[used++] = c;
294 if (used >= sizeof(typename))
295 return 0;
296 }
297 typename[used] = 0;
298 if (used == 0)
299 return 0;
300 hdr->type = git_obj_string_to_type(typename);
301 used++; /* consume the space */
302
303 /*
304 * length follows immediately in decimal (without
305 * leading zeros).
306 */
307 size = data[used++] - '0';
308 if (size > 9)
309 return 0;
310 if (size) {
311 while ((c = data[used]) != '\0') {
312 size_t d = c - '0';
313 if (d > 9)
314 break;
315 used++;
316 size = size * 10 + d;
317 }
318 }
319 hdr->size = size;
320
321 /*
322 * the length must be followed by a zero byte
323 */
324 if (data[used++] != '\0')
325 return 0;
326
327 return used;
328 }
329
330 static void init_stream(z_stream *s, void *out, size_t len)
331 {
332 memset(s, 0, sizeof(*s));
333 s->next_out = out;
334 s->avail_out = len;
335 }
336
337 static void set_stream_input(z_stream *s, void *in, size_t len)
338 {
339 s->next_in = in;
340 s->avail_in = len;
341 }
342
343 static void set_stream_output(z_stream *s, void *out, size_t len)
344 {
345 s->next_out = out;
346 s->avail_out = len;
347 }
348
349 static int start_inflate(z_stream *s, gitfo_buf *obj, void *out, size_t len)
350 {
351 int status;
352
353 init_stream(s, out, len);
354 set_stream_input(s, obj->data, obj->len);
355
356 if ((status = inflateInit(s)) < Z_OK)
357 return status;
358
359 return inflate(s, 0);
360 }
361
362 static int finish_inflate(z_stream *s)
363 {
364 int status = Z_OK;
365
366 while (status == Z_OK)
367 status = inflate(s, Z_FINISH);
368
369 inflateEnd(s);
370
371 if ((status != Z_STREAM_END) || (s->avail_in != 0))
372 return GIT_ERROR;
373
374 return GIT_SUCCESS;
375 }
376
377 static void *inflate_tail(z_stream *s, void *hb, size_t used, obj_hdr *hdr)
378 {
379 unsigned char *buf, *head = hb;
380 size_t tail;
381
382 /*
383 * allocate a buffer to hold the inflated data and copy the
384 * initial sequence of inflated data from the tail of the
385 * head buffer, if any.
386 */
387 if ((buf = git__malloc(hdr->size + 1)) == NULL) {
388 inflateEnd(s);
389 return NULL;
390 }
391 tail = s->total_out - used;
392 if (used > 0 && tail > 0) {
393 if (tail > hdr->size)
394 tail = hdr->size;
395 memcpy(buf, head + used, tail);
396 }
397 used = tail;
398
399 /*
400 * inflate the remainder of the object data, if any
401 */
402 if (hdr->size < used)
403 inflateEnd(s);
404 else {
405 set_stream_output(s, buf + used, hdr->size - used);
406 if (finish_inflate(s)) {
407 free(buf);
408 return NULL;
409 }
410 }
411
412 return buf;
413 }
414
415 static int inflate_buffer(void *in, size_t inlen, void *out, size_t outlen)
416 {
417 z_stream zs;
418 int status = Z_OK;
419
420 init_stream(&zs, out, outlen);
421 set_stream_input(&zs, in, inlen);
422
423 if (inflateInit(&zs) < Z_OK)
424 return GIT_ERROR;
425
426 while (status == Z_OK)
427 status = inflate(&zs, Z_FINISH);
428
429 inflateEnd(&zs);
430
431 if ((status != Z_STREAM_END) || (zs.avail_in != 0))
432 return GIT_ERROR;
433
434 if (zs.total_out != outlen)
435 return GIT_ERROR;
436
437 return GIT_SUCCESS;
438 }
439
440 /*
441 * At one point, there was a loose object format that was intended to
442 * mimic the format used in pack-files. This was to allow easy copying
443 * of loose object data into packs. This format is no longer used, but
444 * we must still read it.
445 */
446 static int inflate_packlike_loose_disk_obj(git_obj *out, gitfo_buf *obj)
447 {
448 unsigned char *in, *buf;
449 obj_hdr hdr;
450 size_t len, used;
451
452 /*
453 * read the object header, which is an (uncompressed)
454 * binary encoding of the object type and size.
455 */
456 if ((used = get_binary_object_header(&hdr, obj)) == 0)
457 return GIT_ERROR;
458
459 if (!git_obj__loose_object_type(hdr.type))
460 return GIT_ERROR;
461
462 /*
463 * allocate a buffer and inflate the data into it
464 */
465 buf = git__malloc(hdr.size + 1);
466 if (!buf)
467 return GIT_ERROR;
468
469 in = ((unsigned char *)obj->data) + used;
470 len = obj->len - used;
471 if (inflate_buffer(in, len, buf, hdr.size)) {
472 free(buf);
473 return GIT_ERROR;
474 }
475 buf[hdr.size] = '\0';
476
477 out->data = buf;
478 out->len = hdr.size;
479 out->type = hdr.type;
480
481 return GIT_SUCCESS;
482 }
483
484 static int inflate_disk_obj(git_obj *out, gitfo_buf *obj)
485 {
486 unsigned char head[64], *buf;
487 z_stream zs;
488 int z_status;
489 obj_hdr hdr;
490 size_t used;
491
492 /*
493 * check for a pack-like loose object
494 */
495 if (!is_zlib_compressed_data(obj->data))
496 return inflate_packlike_loose_disk_obj(out, obj);
497
498 /*
499 * inflate the initial part of the io buffer in order
500 * to parse the object header (type and size).
501 */
502 if ((z_status = start_inflate(&zs, obj, head, sizeof(head))) < Z_OK)
503 return GIT_ERROR;
504
505 if ((used = get_object_header(&hdr, head)) == 0)
506 return GIT_ERROR;
507
508 if (!git_obj__loose_object_type(hdr.type))
509 return GIT_ERROR;
510
511 /*
512 * allocate a buffer and inflate the object data into it
513 * (including the initial sequence in the head buffer).
514 */
515 if ((buf = inflate_tail(&zs, head, used, &hdr)) == NULL)
516 return GIT_ERROR;
517 buf[hdr.size] = '\0';
518
519 out->data = buf;
520 out->len = hdr.size;
521 out->type = hdr.type;
522
523 return GIT_SUCCESS;
524 }
525
526 static int make_temp_file(git_file *fd, char *tmp, size_t n, char *file)
527 {
528 char *template = "/tmp_obj_XXXXXX";
529 size_t tmplen = strlen(template);
530 int dirlen;
531
532 if ((dirlen = git__dirname(tmp, n, file)) < 0)
533 return GIT_ERROR;
534
535 if ((dirlen + tmplen) >= n)
536 return GIT_ERROR;
537
538 strcpy(tmp + dirlen, (dirlen) ? template : template + 1);
539
540 *fd = gitfo_mkstemp(tmp);
541 if (*fd < 0 && dirlen) {
542 /* create directory if it doesn't exist */
543 tmp[dirlen] = '\0';
544 if ((gitfo_exists(tmp) < 0) && gitfo_mkdir(tmp, 0755))
545 return GIT_ERROR;
546 /* try again */
547 strcpy(tmp + dirlen, template);
548 *fd = gitfo_mkstemp(tmp);
549 }
550 if (*fd < 0)
551 return GIT_ERROR;
552
553 return GIT_SUCCESS;
554 }
555
556 static int deflate_buf(z_stream *s, void *in, size_t len, int flush)
557 {
558 int status = Z_OK;
559
560 set_stream_input(s, in, len);
561 while (status == Z_OK) {
562 status = deflate(s, flush);
563 if (s->avail_in == 0)
564 break;
565 }
566 return status;
567 }
568
569 static int deflate_obj(gitfo_buf *buf, char *hdr, int hdrlen, git_obj *obj, int level)
570 {
571 z_stream zs;
572 int status;
573 size_t size;
574
575 assert(buf && !buf->data && hdr && obj);
576 assert(level == Z_DEFAULT_COMPRESSION || (level >= 0 && level <= 9));
577
578 buf->data = NULL;
579 buf->len = 0;
580 init_stream(&zs, NULL, 0);
581
582 if (deflateInit(&zs, level) < Z_OK)
583 return GIT_ERROR;
584
585 size = deflateBound(&zs, hdrlen + obj->len);
586
587 if ((buf->data = git__malloc(size)) == NULL) {
588 deflateEnd(&zs);
589 return GIT_ERROR;
590 }
591
592 set_stream_output(&zs, buf->data, size);
593
594 /* compress the header */
595 status = deflate_buf(&zs, hdr, hdrlen, Z_NO_FLUSH);
596
597 /* if header compressed OK, compress the object */
598 if (status == Z_OK)
599 status = deflate_buf(&zs, obj->data, obj->len, Z_FINISH);
600
601 if (status != Z_STREAM_END) {
602 deflateEnd(&zs);
603 free(buf->data);
604 buf->data = NULL;
605 return GIT_ERROR;
606 }
607
608 buf->len = zs.total_out;
609 deflateEnd(&zs);
610
611 return GIT_SUCCESS;
612 }
613
614 static int write_obj(gitfo_buf *buf, git_oid *id, git_odb *db)
615 {
616 char file[GIT_PATH_MAX];
617 char temp[GIT_PATH_MAX];
618 git_file fd;
619
620 if (object_file_name(file, sizeof(file), db->objects_dir, id))
621 return GIT_ERROR;
622
623 if (make_temp_file(&fd, temp, sizeof(temp), file) < 0)
624 return GIT_ERROR;
625
626 if (gitfo_write(fd, buf->data, buf->len) < 0) {
627 gitfo_close(fd);
628 gitfo_unlink(temp);
629 return GIT_ERROR;
630 }
631
632 if (db->fsync_object_files)
633 gitfo_fsync(fd);
634 gitfo_close(fd);
635 gitfo_chmod(temp, 0444);
636
637 if (gitfo_move_file(temp, file) < 0) {
638 gitfo_unlink(temp);
639 return GIT_ERROR;
640 }
641
642 return GIT_SUCCESS;
643 }
644
645 static int open_alternates(git_odb *db)
646 {
647 unsigned n = 0;
648
649 gitlck_lock(&db->lock);
650 if (db->alternates) {
651 gitlck_unlock(&db->lock);
652 return 1;
653 }
654
655 db->alternates = git__malloc(sizeof(*db->alternates) * (n + 1));
656 if (!db->alternates) {
657 gitlck_unlock(&db->lock);
658 return -1;
659 }
660
661 db->alternates[n] = NULL;
662 db->n_alternates = n;
663 gitlck_unlock(&db->lock);
664 return 0;
665 }
666
667 static int pack_openidx_map(git_pack *p)
668 {
669 char pb[GIT_PATH_MAX];
670 off_t len;
671
672 if (git__fmt(pb, sizeof(pb), "%s/pack/%s.idx",
673 p->db->objects_dir,
674 p->pack_name) < 0)
675 return GIT_ERROR;
676
677 if ((p->idx_fd = gitfo_open(pb, O_RDONLY)) < 0)
678 return GIT_ERROR;
679
680 if ((len = gitfo_size(p->idx_fd)) < 0
681 || !git__is_sizet(len)
682 || gitfo_map_ro(&p->idx_map, p->idx_fd, 0, (size_t)len)) {
683 gitfo_close(p->idx_fd);
684 return GIT_ERROR;
685 }
686
687 return GIT_SUCCESS;
688 }
689
690 typedef struct {
691 off_t offset;
692 uint32_t n;
693 } offset_idx_info;
694
695 static int cmp_offset_idx_info(const void *lhs, const void *rhs)
696 {
697 const offset_idx_info *a = lhs;
698 const offset_idx_info *b = rhs;
699 return (a->offset < b->offset) ? -1 : (a->offset > b->offset) ? 1 : 0;
700 }
701
702 static int make_offset_index(git_pack *p, offset_idx_info *data)
703 {
704 off_t min_off = 3 * 4, max_off = p->pack_size - GIT_OID_RAWSZ;
705 uint32_t *idx, *next;
706 uint32_t j;
707
708 qsort(data, p->obj_cnt, sizeof(*data), cmp_offset_idx_info);
709
710 if (data[0].offset < min_off || data[p->obj_cnt].offset > max_off)
711 return GIT_ERROR;
712
713 if ((idx = git__malloc(sizeof(*idx) * (p->obj_cnt+1))) == NULL)
714 return GIT_ERROR;
715 if ((next = git__malloc(sizeof(*next) * p->obj_cnt)) == NULL) {
716 free(idx);
717 return GIT_ERROR;
718 }
719
720 for (j = 0; j < p->obj_cnt+1; j++)
721 idx[j] = data[j].n;
722
723 for (j = 0; j < p->obj_cnt; j++) {
724 assert(idx[j] < p->obj_cnt);
725 assert(idx[j+1] < p->obj_cnt+1);
726
727 next[idx[j]] = idx[j+1];
728 }
729
730 p->im_off_idx = idx;
731 p->im_off_next = next;
732 return GIT_SUCCESS;
733 }
734
735 static int idxv1_search(uint32_t *out, git_pack *p, const git_oid *id)
736 {
737 unsigned char *data = p->im_oid;
738 uint32_t lo = id->id[0] ? p->im_fanout[id->id[0] - 1] : 0;
739 uint32_t hi = p->im_fanout[id->id[0]];
740
741 do {
742 uint32_t mid = (lo + hi) >> 1;
743 uint32_t pos = 24 * mid;
744 int cmp = memcmp(id->id, data + pos + 4, 20);
745 if (cmp < 0)
746 hi = mid;
747 else if (!cmp) {
748 *out = mid;
749 return GIT_SUCCESS;
750 } else
751 lo = mid + 1;
752 } while (lo < hi);
753 return GIT_ENOTFOUND;
754 }
755
756 static int idxv1_search_offset(uint32_t *out, git_pack *p, off_t offset)
757 {
758 if (offset > 0 && offset < (p->pack_size - GIT_OID_RAWSZ)) {
759 uint32_t lo = 0, hi = p->obj_cnt+1;
760 unsigned char *data = p->im_oid;
761 uint32_t *idx = p->im_off_idx;
762 do {
763 uint32_t mid = (lo + hi) >> 1;
764 uint32_t n = idx[mid];
765 uint32_t pos = n * (GIT_OID_RAWSZ + 4);
766 off_t here = decode32(data + pos);
767 if (offset < here)
768 hi = mid;
769 else if (offset == here) {
770 *out = n;
771 return GIT_SUCCESS;
772 } else
773 lo = mid + 1;
774 } while (lo < hi);
775 }
776 return GIT_ENOTFOUND;
777 }
778
779 static int idxv1_get(index_entry *e, git_pack *p, uint32_t n)
780 {
781 unsigned char *data = p->im_oid;
782 uint32_t *next = p->im_off_next;
783
784 if (n < p->obj_cnt) {
785 uint32_t pos = n * (GIT_OID_RAWSZ + 4);
786 off_t next_off = p->pack_size - GIT_OID_RAWSZ;
787 e->n = n;
788 e->oid = data + pos + 4;
789 e->offset = decode32(data + pos);
790 if (next[n] < p->obj_cnt) {
791 pos = next[n] * (GIT_OID_RAWSZ + 4);
792 next_off = decode32(data + pos);
793 }
794 e->size = next_off - e->offset;
795 return GIT_SUCCESS;
796 }
797 return GIT_ENOTFOUND;
798 }
799
800 static int pack_openidx_v1(git_pack *p)
801 {
802 uint32_t *src_fanout = p->idx_map.data;
803 uint32_t *im_fanout;
804 offset_idx_info *info;
805 size_t expsz;
806 uint32_t j;
807
808
809 if ((im_fanout = git__malloc(sizeof(*im_fanout) * 256)) == NULL)
810 return GIT_ERROR;
811
812 im_fanout[0] = decode32(&src_fanout[0]);
813 for (j = 1; j < 256; j++) {
814 im_fanout[j] = decode32(&src_fanout[j]);
815 if (im_fanout[j] < im_fanout[j - 1]) {
816 free(im_fanout);
817 return GIT_ERROR;
818 }
819 }
820 p->obj_cnt = im_fanout[255];
821
822 expsz = 4 * 256 + 24 * p->obj_cnt + 2 * 20;
823 if (expsz != p->idx_map.len) {
824 free(im_fanout);
825 return GIT_ERROR;
826 }
827
828 p->idx_search = idxv1_search;
829 p->idx_search_offset = idxv1_search_offset;
830 p->idx_get = idxv1_get;
831 p->im_fanout = im_fanout;
832 p->im_oid = (unsigned char *)(src_fanout + 256);
833
834 if ((info = git__malloc(sizeof(*info) * (p->obj_cnt+1))) == NULL) {
835 free(im_fanout);
836 return GIT_ERROR;
837 }
838
839 for (j = 0; j < p->obj_cnt; j++) {
840 uint32_t pos = j * (GIT_OID_RAWSZ + 4);
841 info[j].offset = decode32(p->im_oid + pos);
842 info[j].n = j;
843 }
844 info[p->obj_cnt].offset = p->pack_size - GIT_OID_RAWSZ;
845 info[p->obj_cnt].n = p->obj_cnt;
846
847 if (make_offset_index(p, info)) {
848 free(im_fanout);
849 free(info);
850 return GIT_ERROR;
851 }
852 free(info);
853
854 return GIT_SUCCESS;
855 }
856
857 static int idxv2_search(uint32_t *out, git_pack *p, const git_oid *id)
858 {
859 unsigned char *data = p->im_oid;
860 uint32_t lo = id->id[0] ? p->im_fanout[id->id[0] - 1] : 0;
861 uint32_t hi = p->im_fanout[id->id[0]];
862
863 do {
864 uint32_t mid = (lo + hi) >> 1;
865 uint32_t pos = 20 * mid;
866 int cmp = memcmp(id->id, data + pos, 20);
867 if (cmp < 0)
868 hi = mid;
869 else if (!cmp) {
870 *out = mid;
871 return GIT_SUCCESS;
872 } else
873 lo = mid + 1;
874 } while (lo < hi);
875 return GIT_ENOTFOUND;
876 }
877
878 static int idxv2_search_offset(uint32_t *out, git_pack *p, off_t offset)
879 {
880 if (offset > 0 && offset < (p->pack_size - GIT_OID_RAWSZ)) {
881 uint32_t lo = 0, hi = p->obj_cnt+1;
882 uint32_t *idx = p->im_off_idx;
883 do {
884 uint32_t mid = (lo + hi) >> 1;
885 uint32_t n = idx[mid];
886 uint32_t o32 = decode32(p->im_offset32 + n);
887 off_t here = o32;
888
889 if (o32 & 0x80000000) {
890 uint32_t o64_idx = (o32 & ~0x80000000);
891 here = decode64(p->im_offset64 + 2*o64_idx);
892 }
893
894 if (offset < here)
895 hi = mid;
896 else if (offset == here) {
897 *out = n;
898 return GIT_SUCCESS;
899 } else
900 lo = mid + 1;
901 } while (lo < hi);
902 }
903 return GIT_ENOTFOUND;
904 }
905
906 static int idxv2_get(index_entry *e, git_pack *p, uint32_t n)
907 {
908 unsigned char *data = p->im_oid;
909 uint32_t *next = p->im_off_next;
910
911 if (n < p->obj_cnt) {
912 uint32_t o32 = decode32(p->im_offset32 + n);
913 off_t next_off = p->pack_size - GIT_OID_RAWSZ;
914 e->n = n;
915 e->oid = data + n * GIT_OID_RAWSZ;
916 e->offset = o32;
917 if (o32 & 0x80000000) {
918 uint32_t o64_idx = (o32 & ~0x80000000);
919 e->offset = decode64(p->im_offset64 + 2*o64_idx);
920 }
921 if (next[n] < p->obj_cnt) {
922 o32 = decode32(p->im_offset32 + next[n]);
923 next_off = o32;
924 if (o32 & 0x80000000) {
925 uint32_t o64_idx = (o32 & ~0x80000000);
926 next_off = decode64(p->im_offset64 + 2*o64_idx);
927 }
928 }
929 e->size = next_off - e->offset;
930 return GIT_SUCCESS;
931 }
932 return GIT_ENOTFOUND;
933 }
934
935 static int pack_openidx_v2(git_pack *p)
936 {
937 unsigned char *data = p->idx_map.data;
938 uint32_t *src_fanout = (uint32_t *)(data + 8);
939 uint32_t *im_fanout;
940 offset_idx_info *info;
941 size_t sz, o64_sz, o64_len;
942 uint32_t j;
943
944 if ((im_fanout = git__malloc(sizeof(*im_fanout) * 256)) == NULL)
945 return GIT_ERROR;
946
947 im_fanout[0] = decode32(&src_fanout[0]);
948 for (j = 1; j < 256; j++) {
949 im_fanout[j] = decode32(&src_fanout[j]);
950 if (im_fanout[j] < im_fanout[j - 1]) {
951 free(im_fanout);
952 return GIT_ERROR;
953 }
954 }
955 p->obj_cnt = im_fanout[255];
956
957 /* minimum size of .idx file (with empty 64-bit offsets table): */
958 sz = 4 + 4 + 256 * 4 + p->obj_cnt * (20 + 4 + 4) + 2 * 20;
959 if (p->idx_map.len < sz) {
960 free(im_fanout);
961 return GIT_ERROR;
962 }
963
964 p->idx_search = idxv2_search;
965 p->idx_search_offset = idxv2_search_offset;
966 p->idx_get = idxv2_get;
967 p->im_fanout = im_fanout;
968 p->im_oid = (unsigned char *)(src_fanout + 256);
969 p->im_crc = (uint32_t *)(p->im_oid + 20 * p->obj_cnt);
970 p->im_offset32 = p->im_crc + p->obj_cnt;
971 p->im_offset64 = p->im_offset32 + p->obj_cnt;
972
973 if ((info = git__malloc(sizeof(*info) * (p->obj_cnt+1))) == NULL) {
974 free(im_fanout);
975 return GIT_ERROR;
976 }
977
978 /* check 64-bit offset table index values are within bounds */
979 o64_sz = p->idx_map.len - sz;
980 o64_len = o64_sz / 8;
981 for (j = 0; j < p->obj_cnt; j++) {
982 uint32_t o32 = decode32(p->im_offset32 + j);
983 off_t offset = o32;
984 if (o32 & 0x80000000) {
985 uint32_t o64_idx = (o32 & ~0x80000000);
986 if (o64_idx >= o64_len) {
987 free(im_fanout);
988 free(info);
989 return GIT_ERROR;
990 }
991 offset = decode64(p->im_offset64 + 2*o64_idx);
992 }
993 info[j].offset = offset;
994 info[j].n = j;
995 }
996 info[p->obj_cnt].offset = p->pack_size - GIT_OID_RAWSZ;
997 info[p->obj_cnt].n = p->obj_cnt;
998
999 if (make_offset_index(p, info)) {
1000 free(im_fanout);
1001 free(info);
1002 return GIT_ERROR;
1003 }
1004 free(info);
1005
1006 return GIT_SUCCESS;
1007 }
1008
1009 static int pack_stat(git_pack *p)
1010 {
1011 char pb[GIT_PATH_MAX];
1012 struct stat sb;
1013
1014 if (git__fmt(pb, sizeof(pb), "%s/pack/%s.pack",
1015 p->db->objects_dir,
1016 p->pack_name) < 0)
1017 return GIT_ERROR;
1018
1019 if (stat(pb, &sb) || !S_ISREG(sb.st_mode))
1020 return GIT_ERROR;
1021
1022 if (sb.st_size < (3 * 4 + GIT_OID_RAWSZ))
1023 return GIT_ERROR;
1024
1025 p->pack_size = sb.st_size;
1026 p->pack_mtime = sb.st_mtime;
1027
1028 return GIT_SUCCESS;
1029 }
1030
1031 static int pack_openidx(git_pack *p)
1032 {
1033 gitlck_lock(&p->lock);
1034
1035 if (p->invalid) {
1036 gitlck_unlock(&p->lock);
1037 return GIT_ERROR;
1038 }
1039
1040 if (++p->idxcnt == 1 && !p->idx_search) {
1041 int status, version;
1042 uint32_t *data;
1043
1044 if (pack_stat(p) || pack_openidx_map(p)) {
1045 p->invalid = 1;
1046 p->idxcnt--;
1047 gitlck_unlock(&p->lock);
1048 return GIT_ERROR;
1049 }
1050 data = p->idx_map.data;
1051 status = GIT_SUCCESS;
1052 version = 1;
1053
1054 if (decode32(&data[0]) == PACK_TOC)
1055 version = decode32(&data[1]);
1056
1057 switch (version) {
1058 case 1:
1059 status = pack_openidx_v1(p);
1060 break;
1061 case 2:
1062 status = pack_openidx_v2(p);
1063 break;
1064 default:
1065 status = GIT_ERROR;
1066 }
1067
1068 if (status != GIT_SUCCESS) {
1069 gitfo_free_map(&p->idx_map);
1070 p->invalid = 1;
1071 p->idxcnt--;
1072 gitlck_unlock(&p->lock);
1073 return status;
1074 }
1075 }
1076
1077 gitlck_unlock(&p->lock);
1078 return GIT_SUCCESS;
1079 }
1080
1081 static void pack_decidx(git_pack *p)
1082 {
1083 gitlck_lock(&p->lock);
1084 p->idxcnt--;
1085 gitlck_unlock(&p->lock);
1086 }
1087
1088 static void pack_dec(git_pack *p)
1089 {
1090 int need_free;
1091
1092 gitlck_lock(&p->lock);
1093 need_free = !--p->refcnt;
1094 gitlck_unlock(&p->lock);
1095
1096 if (need_free) {
1097 if (p->idx_search) {
1098 gitfo_free_map(&p->idx_map);
1099 gitfo_close(p->idx_fd);
1100 free(p->im_fanout);
1101 }
1102
1103 gitlck_free(&p->lock);
1104 free(p);
1105 }
1106 }
1107
1108 static void packlist_dec(git_odb *db, git_packlist *pl)
1109 {
1110 int need_free;
1111
1112 assert(db && pl);
1113
1114 gitlck_lock(&db->lock);
1115 need_free = !--pl->refcnt;
1116 gitlck_unlock(&db->lock);
1117
1118 if (need_free) {
1119 size_t j;
1120 for (j = 0; j < pl->n_packs; j++)
1121 pack_dec(pl->packs[j]);
1122 free(pl);
1123 }
1124 }
1125
1126 static git_pack *alloc_pack(const char *pack_name)
1127 {
1128 git_pack *p = git__calloc(1, sizeof(*p));
1129 if (!p)
1130 return NULL;
1131
1132 gitlck_init(&p->lock);
1133 strcpy(p->pack_name, pack_name);
1134 p->refcnt = 1;
1135 return p;
1136 }
1137
1138 struct scanned_pack {
1139 struct scanned_pack *next;
1140 git_pack *pack;
1141 };
1142
1143 static int scan_one_pack(void *state, char *name)
1144 {
1145 struct scanned_pack **ret = state, *r;
1146 char *s = strrchr(name, '/'), *d;
1147
1148 if (git__prefixcmp(s + 1, "pack-")
1149 || git__suffixcmp(s, ".pack")
1150 || strlen(s + 1) != GIT_PACK_NAME_MAX + 4)
1151 return 0;
1152
1153 d = strrchr(s + 1, '.');
1154 strcpy(d + 1, "idx"); /* "pack-abc.pack" -> "pack-abc.idx" */
1155 if (gitfo_exists(name))
1156 return 0;
1157
1158 if ((r = git__malloc(sizeof(*r))) == NULL)
1159 return GIT_ERROR;
1160
1161 *d = '\0'; /* "pack-abc.pack" -_> "pack-abc" */
1162 if ((r->pack = alloc_pack(s + 1)) == NULL) {
1163 free(r);
1164 return GIT_ERROR;
1165 }
1166
1167 r->next = *ret;
1168 *ret = r;
1169 return 0;
1170 }
1171
1172 static git_packlist *scan_packs(git_odb *db)
1173 {
1174 char pb[GIT_PATH_MAX];
1175 struct scanned_pack *state = NULL, *c;
1176 size_t cnt;
1177 git_packlist *new_list;
1178
1179 if (git__fmt(pb, sizeof(pb), "%s/pack", db->objects_dir) < 0)
1180 return NULL;
1181 gitfo_dirent(pb, sizeof(pb), scan_one_pack, &state);
1182
1183 /* TODO - merge old entries into the new array */
1184 for (cnt = 0, c = state; c; c = c->next)
1185 cnt++;
1186 new_list = git__malloc(sizeof(*new_list)
1187 + (sizeof(new_list->packs[0]) * cnt));
1188 if (!new_list)
1189 goto fail;
1190
1191 for (cnt = 0, c = state; c; ) {
1192 struct scanned_pack *n = c->next;
1193 c->pack->db = db;
1194 new_list->packs[cnt++] = c->pack;
1195 free(c);
1196 c = n;
1197 }
1198 new_list->n_packs = cnt;
1199 new_list->refcnt = 2;
1200 db->packlist = new_list;
1201 return new_list;
1202
1203 fail:
1204 while (state) {
1205 struct scanned_pack *n = state->next;
1206 pack_dec(state->pack);
1207 free(state);
1208 state = n;
1209 }
1210 return NULL;
1211 }
1212
1213 static git_packlist *packlist_get(git_odb *db)
1214 {
1215 git_packlist *pl;
1216
1217 gitlck_lock(&db->lock);
1218 if ((pl = db->packlist) != NULL)
1219 pl->refcnt++;
1220 else
1221 pl = scan_packs(db);
1222 gitlck_unlock(&db->lock);
1223 return pl;
1224 }
1225
1226 static int search_packs(git_pack **p, uint32_t *n, git_odb *db, const git_oid *id)
1227 {
1228 git_packlist *pl = packlist_get(db);
1229 size_t j;
1230
1231 if (!pl)
1232 return GIT_ENOTFOUND;
1233
1234 for (j = 0; j < pl->n_packs; j++) {
1235
1236 git_pack *pack = pl->packs[j];
1237 uint32_t pos;
1238 int res;
1239
1240 if (pack_openidx(pack))
1241 continue;
1242 res = pack->idx_search(&pos, pack, id);
1243 pack_decidx(pack);
1244
1245 if (!res) {
1246 packlist_dec(db, pl);
1247 if (p)
1248 *p = pack;
1249 if (n)
1250 *n = pos;
1251 return GIT_SUCCESS;
1252 }
1253
1254 }
1255
1256 packlist_dec(db, pl);
1257 return GIT_ENOTFOUND;
1258 }
1259
1260 static int exists_packed(git_odb *db, const git_oid *id)
1261 {
1262 return !search_packs(NULL, NULL, db, id);
1263 }
1264
1265 static int exists_loose(git_odb *db, const git_oid *id)
1266 {
1267 char file[GIT_PATH_MAX];
1268
1269 if (object_file_name(file, sizeof(file), db->objects_dir, id))
1270 return 0;
1271
1272 if (gitfo_exists(file) < 0)
1273 return 0;
1274
1275 return 1;
1276 }
1277
1278 int git_odb_exists(git_odb *db, const git_oid *id)
1279 {
1280 /* TODO: extend to search alternate db's */
1281 if (exists_packed(db, id))
1282 return 1;
1283 return exists_loose(db, id);
1284 }
1285
1286 int git_odb_open(git_odb **out, const char *objects_dir)
1287 {
1288 git_odb *db = git__calloc(1, sizeof(*db));
1289 if (!db)
1290 return GIT_ERROR;
1291
1292 db->objects_dir = git__strdup(objects_dir);
1293 if (!db->objects_dir) {
1294 free(db);
1295 return GIT_ERROR;
1296 }
1297
1298 gitlck_init(&db->lock);
1299
1300 db->object_zlib_level = Z_BEST_SPEED;
1301 db->fsync_object_files = 0;
1302
1303 *out = db;
1304 return GIT_SUCCESS;
1305 }
1306
1307 void git_odb_close(git_odb *db)
1308 {
1309 git_packlist *pl;
1310
1311 if (!db)
1312 return;
1313
1314 gitlck_lock(&db->lock);
1315
1316 pl = db->packlist;
1317 db->packlist = NULL;
1318
1319 if (db->alternates) {
1320 git_odb **alt;
1321 for (alt = db->alternates; *alt; alt++)
1322 git_odb_close(*alt);
1323 free(db->alternates);
1324 }
1325
1326 free(db->objects_dir);
1327
1328 gitlck_unlock(&db->lock);
1329 if (pl)
1330 packlist_dec(db, pl);
1331 gitlck_free(&db->lock);
1332 free(db);
1333 }
1334
1335 int git_odb_read(
1336 git_obj *out,
1337 git_odb *db,
1338 const git_oid *id)
1339 {
1340 attempt:
1341 if (!git_odb__read_packed(out, db, id))
1342 return GIT_SUCCESS;
1343 if (!git_odb__read_loose(out, db, id))
1344 return GIT_SUCCESS;
1345 if (!open_alternates(db))
1346 goto attempt;
1347
1348 out->data = NULL;
1349 return GIT_ENOTFOUND;
1350 }
1351
1352 int git_odb__read_loose(git_obj *out, git_odb *db, const git_oid *id)
1353 {
1354 char file[GIT_PATH_MAX];
1355 gitfo_buf obj = GITFO_BUF_INIT;
1356
1357 assert(out && db && id);
1358
1359 out->data = NULL;
1360 out->len = 0;
1361 out->type = GIT_OBJ_BAD;
1362
1363 if (object_file_name(file, sizeof(file), db->objects_dir, id))
1364 return GIT_ENOTFOUND; /* TODO: error handling */
1365
1366 if (gitfo_read_file(&obj, file))
1367 return GIT_ENOTFOUND; /* TODO: error handling */
1368
1369 if (inflate_disk_obj(out, &obj)) {
1370 gitfo_free_buf(&obj);
1371 return GIT_ENOTFOUND; /* TODO: error handling */
1372 }
1373
1374 gitfo_free_buf(&obj);
1375
1376 return GIT_SUCCESS;
1377 }
1378
1379 static int read_packed(git_obj *out, git_pack *p, const git_oid *id)
1380 {
1381 uint32_t n;
1382 int res;
1383
1384 assert(out && p && id);
1385
1386 if (pack_openidx(p))
1387 return GIT_ERROR;
1388 res = p->idx_search(&n, p, id);
1389 pack_decidx(p);
1390
1391 if (!res) {
1392 /* TODO unpack object */
1393 res = GIT_ERROR;
1394 }
1395
1396 return res;
1397 }
1398
1399 int git_odb__read_packed(git_obj *out, git_odb *db, const git_oid *id)
1400 {
1401 git_packlist *pl = packlist_get(db);
1402 size_t j;
1403
1404 assert(out && db && id);
1405
1406 out->data = NULL;
1407 out->len = 0;
1408 out->type = GIT_OBJ_BAD;
1409
1410 if (!pl)
1411 return GIT_ENOTFOUND;
1412
1413 for (j = 0; j < pl->n_packs; j++) {
1414 if (!read_packed(out, pl->packs[j], id)) {
1415 packlist_dec(db, pl);
1416 return GIT_SUCCESS;
1417 }
1418 }
1419
1420 packlist_dec(db, pl);
1421 return GIT_ENOTFOUND;
1422 }
1423
1424 int git_odb_write(git_oid *id, git_odb *db, git_obj *obj)
1425 {
1426 char hdr[64];
1427 int hdrlen;
1428 gitfo_buf buf = GITFO_BUF_INIT;
1429
1430 assert(id && db && obj);
1431
1432 if (hash_obj(id, hdr, sizeof(hdr), &hdrlen, obj) < 0)
1433 return GIT_ERROR;
1434
1435 if (git_odb_exists(db, id))
1436 return GIT_SUCCESS;
1437
1438 if (deflate_obj(&buf, hdr, hdrlen, obj, db->object_zlib_level) < 0)
1439 return GIT_ERROR;
1440
1441 if (write_obj(&buf, id, db) < 0) {
1442 gitfo_free_buf(&buf);
1443 return GIT_ERROR;
1444 }
1445
1446 gitfo_free_buf(&buf);
1447
1448 return GIT_SUCCESS;
1449 }
1450