]> git.proxmox.com Git - libgit2.git/blob - src/pack-objects.c
Merge pull request #968 from arrbee/diff-support-typechange
[libgit2.git] / src / pack-objects.c
1 /*
2 * Copyright (C) 2009-2012 the libgit2 contributors
3 *
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
6 */
7
8 #include "pack-objects.h"
9
10 #include "compress.h"
11 #include "delta.h"
12 #include "iterator.h"
13 #include "netops.h"
14 #include "pack.h"
15 #include "thread-utils.h"
16 #include "tree.h"
17
18 #include "git2/pack.h"
19 #include "git2/commit.h"
20 #include "git2/tag.h"
21 #include "git2/indexer.h"
22 #include "git2/config.h"
23
24 GIT__USE_OIDMAP;
25
26 struct unpacked {
27 git_pobject *object;
28 void *data;
29 struct git_delta_index *index;
30 unsigned int depth;
31 };
32
33 #ifdef GIT_THREADS
34
35 #define GIT_PACKBUILDER__MUTEX_OP(pb, mtx, op) do { \
36 int result = git_mutex_##op(&(pb)->mtx); \
37 assert(!result); \
38 GIT_UNUSED(result); \
39 } while (0)
40
41 #else
42
43 #define GIT_PACKBUILDER__MUTEX_OP(pb,mtx,op) GIT_UNUSED(pb)
44
45 #endif /* GIT_THREADS */
46
47 #define git_packbuilder__cache_lock(pb) GIT_PACKBUILDER__MUTEX_OP(pb, cache_mutex, lock)
48 #define git_packbuilder__cache_unlock(pb) GIT_PACKBUILDER__MUTEX_OP(pb, cache_mutex, unlock)
49 #define git_packbuilder__progress_lock(pb) GIT_PACKBUILDER__MUTEX_OP(pb, progress_mutex, lock)
50 #define git_packbuilder__progress_unlock(pb) GIT_PACKBUILDER__MUTEX_OP(pb, progress_mutex, unlock)
51
52 static unsigned name_hash(const char *name)
53 {
54 unsigned c, hash = 0;
55
56 if (!name)
57 return 0;
58
59 /*
60 * This effectively just creates a sortable number from the
61 * last sixteen non-whitespace characters. Last characters
62 * count "most", so things that end in ".c" sort together.
63 */
64 while ((c = *name++) != 0) {
65 if (git__isspace(c))
66 continue;
67 hash = (hash >> 2) + (c << 24);
68 }
69 return hash;
70 }
71
72 static int packbuilder_config(git_packbuilder *pb)
73 {
74 git_config *config;
75 int ret;
76
77 if (git_repository_config__weakptr(&config, pb->repo) < 0)
78 return -1;
79
80 #define config_get(key, dst, default) \
81 ret = git_config_get_int64((int64_t *)&dst, config, key); \
82 if (ret == GIT_ENOTFOUND) \
83 dst = default; \
84 else if (ret < 0) \
85 return -1;
86
87 config_get("pack.deltaCacheSize", pb->max_delta_cache_size,
88 GIT_PACK_DELTA_CACHE_SIZE);
89 config_get("pack.deltaCacheLimit", pb->cache_max_small_delta_size,
90 GIT_PACK_DELTA_CACHE_LIMIT);
91 config_get("pack.deltaCacheSize", pb->big_file_threshold,
92 GIT_PACK_BIG_FILE_THRESHOLD);
93 config_get("pack.windowMemory", pb->window_memory_limit, 0);
94
95 #undef config_get
96
97 return 0;
98 }
99
100 int git_packbuilder_new(git_packbuilder **out, git_repository *repo)
101 {
102 git_packbuilder *pb;
103
104 *out = NULL;
105
106 pb = git__calloc(sizeof(*pb), 1);
107 GITERR_CHECK_ALLOC(pb);
108
109 pb->object_ix = git_oidmap_alloc();
110
111 if (!pb->object_ix)
112 goto on_error;
113
114 pb->repo = repo;
115 pb->nr_threads = 1; /* do not spawn any thread by default */
116 pb->ctx = git_hash_new_ctx();
117
118 if (!pb->ctx ||
119 git_repository_odb(&pb->odb, repo) < 0 ||
120 packbuilder_config(pb) < 0)
121 goto on_error;
122
123 #ifdef GIT_THREADS
124
125 if (git_mutex_init(&pb->cache_mutex) ||
126 git_mutex_init(&pb->progress_mutex) ||
127 git_cond_init(&pb->progress_cond))
128 goto on_error;
129
130 #endif
131
132 *out = pb;
133 return 0;
134
135 on_error:
136 git_packbuilder_free(pb);
137 return -1;
138 }
139
140 void git_packbuilder_set_threads(git_packbuilder *pb, unsigned int n)
141 {
142 assert(pb);
143 pb->nr_threads = n;
144 }
145
146 static void rehash(git_packbuilder *pb)
147 {
148 git_pobject *po;
149 khiter_t pos;
150 unsigned int i;
151 int ret;
152
153 kh_clear(oid, pb->object_ix);
154 for (i = 0, po = pb->object_list; i < pb->nr_objects; i++, po++) {
155 pos = kh_put(oid, pb->object_ix, &po->id, &ret);
156 kh_value(pb->object_ix, pos) = po;
157 }
158 }
159
160 int git_packbuilder_insert(git_packbuilder *pb, const git_oid *oid,
161 const char *name)
162 {
163 git_pobject *po;
164 khiter_t pos;
165 int ret;
166
167 assert(pb && oid);
168
169 /* If the object already exists in the hash table, then we don't
170 * have any work to do */
171 pos = kh_get(oid, pb->object_ix, oid);
172 if (pos != kh_end(pb->object_ix))
173 return 0;
174
175 if (pb->nr_objects >= pb->nr_alloc) {
176 pb->nr_alloc = (pb->nr_alloc + 1024) * 3 / 2;
177 pb->object_list = git__realloc(pb->object_list,
178 pb->nr_alloc * sizeof(*po));
179 GITERR_CHECK_ALLOC(pb->object_list);
180 rehash(pb);
181 }
182
183 po = pb->object_list + pb->nr_objects;
184 memset(po, 0x0, sizeof(*po));
185
186 if (git_odb_read_header(&po->size, &po->type, pb->odb, oid) < 0)
187 return -1;
188
189 pb->nr_objects++;
190 git_oid_cpy(&po->id, oid);
191 po->hash = name_hash(name);
192
193 pos = kh_put(oid, pb->object_ix, &po->id, &ret);
194 assert(ret != 0);
195 kh_value(pb->object_ix, pos) = po;
196
197 pb->done = false;
198 return 0;
199 }
200
201 /*
202 * The per-object header is a pretty dense thing, which is
203 * - first byte: low four bits are "size",
204 * then three bits of "type",
205 * with the high bit being "size continues".
206 * - each byte afterwards: low seven bits are size continuation,
207 * with the high bit being "size continues"
208 */
209 static int gen_pack_object_header(
210 unsigned char *hdr,
211 unsigned long size,
212 git_otype type)
213 {
214 unsigned char *hdr_base;
215 unsigned char c;
216
217 assert(type >= GIT_OBJ_COMMIT && type <= GIT_OBJ_REF_DELTA);
218
219 /* TODO: add support for chunked objects; see git.git 6c0d19b1 */
220
221 c = (unsigned char)((type << 4) | (size & 15));
222 size >>= 4;
223 hdr_base = hdr;
224
225 while (size) {
226 *hdr++ = c | 0x80;
227 c = size & 0x7f;
228 size >>= 7;
229 }
230 *hdr++ = c;
231
232 return hdr - hdr_base;
233 }
234
235 static int get_delta(void **out, git_odb *odb, git_pobject *po)
236 {
237 git_odb_object *src = NULL, *trg = NULL;
238 unsigned long delta_size;
239 void *delta_buf;
240
241 *out = NULL;
242
243 if (git_odb_read(&src, odb, &po->delta->id) < 0 ||
244 git_odb_read(&trg, odb, &po->id) < 0)
245 goto on_error;
246
247 delta_buf = git_delta(git_odb_object_data(src), git_odb_object_size(src),
248 git_odb_object_data(trg), git_odb_object_size(trg),
249 &delta_size, 0);
250
251 if (!delta_buf || delta_size != po->delta_size) {
252 giterr_set(GITERR_INVALID, "Delta size changed");
253 goto on_error;
254 }
255
256 *out = delta_buf;
257
258 git_odb_object_free(src);
259 git_odb_object_free(trg);
260 return 0;
261
262 on_error:
263 git_odb_object_free(src);
264 git_odb_object_free(trg);
265 return -1;
266 }
267
268 static int write_object(git_buf *buf, git_packbuilder *pb, git_pobject *po)
269 {
270 git_odb_object *obj = NULL;
271 git_buf zbuf = GIT_BUF_INIT;
272 git_otype type;
273 unsigned char hdr[10];
274 unsigned int hdr_len;
275 unsigned long size;
276 void *data;
277
278 if (po->delta) {
279 if (po->delta_data)
280 data = po->delta_data;
281 else if (get_delta(&data, pb->odb, po) < 0)
282 goto on_error;
283 size = po->delta_size;
284 type = GIT_OBJ_REF_DELTA;
285 } else {
286 if (git_odb_read(&obj, pb->odb, &po->id))
287 goto on_error;
288
289 data = (void *)git_odb_object_data(obj);
290 size = git_odb_object_size(obj);
291 type = git_odb_object_type(obj);
292 }
293
294 /* Write header */
295 hdr_len = gen_pack_object_header(hdr, size, type);
296
297 if (git_buf_put(buf, (char *)hdr, hdr_len) < 0)
298 goto on_error;
299
300 git_hash_update(pb->ctx, hdr, hdr_len);
301
302 if (type == GIT_OBJ_REF_DELTA) {
303 if (git_buf_put(buf, (char *)po->delta->id.id,
304 GIT_OID_RAWSZ) < 0)
305 goto on_error;
306
307 git_hash_update(pb->ctx, po->delta->id.id, GIT_OID_RAWSZ);
308 }
309
310 /* Write data */
311 if (po->z_delta_size)
312 size = po->z_delta_size;
313 else if (git__compress(&zbuf, data, size) < 0)
314 goto on_error;
315 else {
316 if (po->delta)
317 git__free(data);
318 data = zbuf.ptr;
319 size = zbuf.size;
320 }
321
322 if (git_buf_put(buf, data, size) < 0)
323 goto on_error;
324
325 git_hash_update(pb->ctx, data, size);
326
327 if (po->delta_data)
328 git__free(po->delta_data);
329
330 git_odb_object_free(obj);
331 git_buf_free(&zbuf);
332
333 pb->nr_written++;
334 return 0;
335
336 on_error:
337 git_odb_object_free(obj);
338 git_buf_free(&zbuf);
339 return -1;
340 }
341
342 enum write_one_status {
343 WRITE_ONE_SKIP = -1, /* already written */
344 WRITE_ONE_BREAK = 0, /* writing this will bust the limit; not written */
345 WRITE_ONE_WRITTEN = 1, /* normal */
346 WRITE_ONE_RECURSIVE = 2 /* already scheduled to be written */
347 };
348
349 static int write_one(git_buf *buf, git_packbuilder *pb, git_pobject *po,
350 enum write_one_status *status)
351 {
352 if (po->recursing) {
353 *status = WRITE_ONE_RECURSIVE;
354 return 0;
355 } else if (po->written) {
356 *status = WRITE_ONE_SKIP;
357 return 0;
358 }
359
360 if (po->delta) {
361 po->recursing = 1;
362 if (write_one(buf, pb, po->delta, status) < 0)
363 return -1;
364 switch (*status) {
365 case WRITE_ONE_RECURSIVE:
366 /* we cannot depend on this one */
367 po->delta = NULL;
368 break;
369 default:
370 break;
371 }
372 }
373
374 po->written = 1;
375 po->recursing = 0;
376 return write_object(buf, pb, po);
377 }
378
379 GIT_INLINE(void) add_to_write_order(git_pobject **wo, unsigned int *endp,
380 git_pobject *po)
381 {
382 if (po->filled)
383 return;
384 wo[(*endp)++] = po;
385 po->filled = 1;
386 }
387
388 static void add_descendants_to_write_order(git_pobject **wo, unsigned int *endp,
389 git_pobject *po)
390 {
391 int add_to_order = 1;
392 while (po) {
393 if (add_to_order) {
394 git_pobject *s;
395 /* add this node... */
396 add_to_write_order(wo, endp, po);
397 /* all its siblings... */
398 for (s = po->delta_sibling; s; s = s->delta_sibling) {
399 add_to_write_order(wo, endp, s);
400 }
401 }
402 /* drop down a level to add left subtree nodes if possible */
403 if (po->delta_child) {
404 add_to_order = 1;
405 po = po->delta_child;
406 } else {
407 add_to_order = 0;
408 /* our sibling might have some children, it is next */
409 if (po->delta_sibling) {
410 po = po->delta_sibling;
411 continue;
412 }
413 /* go back to our parent node */
414 po = po->delta;
415 while (po && !po->delta_sibling) {
416 /* we're on the right side of a subtree, keep
417 * going up until we can go right again */
418 po = po->delta;
419 }
420 if (!po) {
421 /* done- we hit our original root node */
422 return;
423 }
424 /* pass it off to sibling at this level */
425 po = po->delta_sibling;
426 }
427 };
428 }
429
430 static void add_family_to_write_order(git_pobject **wo, unsigned int *endp,
431 git_pobject *po)
432 {
433 git_pobject *root;
434
435 for (root = po; root->delta; root = root->delta)
436 ; /* nothing */
437 add_descendants_to_write_order(wo, endp, root);
438 }
439
440 static int cb_tag_foreach(const char *name, git_oid *oid, void *data)
441 {
442 git_packbuilder *pb = data;
443 git_pobject *po;
444 khiter_t pos;
445
446 GIT_UNUSED(name);
447
448 pos = kh_get(oid, pb->object_ix, oid);
449 if (pos == kh_end(pb->object_ix))
450 return 0;
451
452 po = kh_value(pb->object_ix, pos);
453 po->tagged = 1;
454
455 /* TODO: peel objects */
456
457 return 0;
458 }
459
460 static git_pobject **compute_write_order(git_packbuilder *pb)
461 {
462 unsigned int i, wo_end, last_untagged;
463
464 git_pobject **wo = git__malloc(sizeof(*wo) * pb->nr_objects);
465
466 for (i = 0; i < pb->nr_objects; i++) {
467 git_pobject *po = pb->object_list + i;
468 po->tagged = 0;
469 po->filled = 0;
470 po->delta_child = NULL;
471 po->delta_sibling = NULL;
472 }
473
474 /*
475 * Fully connect delta_child/delta_sibling network.
476 * Make sure delta_sibling is sorted in the original
477 * recency order.
478 */
479 for (i = pb->nr_objects; i > 0;) {
480 git_pobject *po = &pb->object_list[--i];
481 if (!po->delta)
482 continue;
483 /* Mark me as the first child */
484 po->delta_sibling = po->delta->delta_child;
485 po->delta->delta_child = po;
486 }
487
488 /*
489 * Mark objects that are at the tip of tags.
490 */
491 if (git_tag_foreach(pb->repo, &cb_tag_foreach, pb) < 0)
492 return NULL;
493
494 /*
495 * Give the objects in the original recency order until
496 * we see a tagged tip.
497 */
498 for (i = wo_end = 0; i < pb->nr_objects; i++) {
499 git_pobject *po = pb->object_list + i;
500 if (po->tagged)
501 break;
502 add_to_write_order(wo, &wo_end, po);
503 }
504 last_untagged = i;
505
506 /*
507 * Then fill all the tagged tips.
508 */
509 for (; i < pb->nr_objects; i++) {
510 git_pobject *po = pb->object_list + i;
511 if (po->tagged)
512 add_to_write_order(wo, &wo_end, po);
513 }
514
515 /*
516 * And then all remaining commits and tags.
517 */
518 for (i = last_untagged; i < pb->nr_objects; i++) {
519 git_pobject *po = pb->object_list + i;
520 if (po->type != GIT_OBJ_COMMIT &&
521 po->type != GIT_OBJ_TAG)
522 continue;
523 add_to_write_order(wo, &wo_end, po);
524 }
525
526 /*
527 * And then all the trees.
528 */
529 for (i = last_untagged; i < pb->nr_objects; i++) {
530 git_pobject *po = pb->object_list + i;
531 if (po->type != GIT_OBJ_TREE)
532 continue;
533 add_to_write_order(wo, &wo_end, po);
534 }
535
536 /*
537 * Finally all the rest in really tight order
538 */
539 for (i = last_untagged; i < pb->nr_objects; i++) {
540 git_pobject *po = pb->object_list + i;
541 if (!po->filled)
542 add_family_to_write_order(wo, &wo_end, po);
543 }
544
545 if (wo_end != pb->nr_objects) {
546 giterr_set(GITERR_INVALID, "invalid write order");
547 return NULL;
548 }
549
550 return wo;
551 }
552
553 static int write_pack(git_packbuilder *pb,
554 int (*cb)(void *buf, size_t size, void *data),
555 void *data)
556 {
557 git_pobject **write_order;
558 git_pobject *po;
559 git_buf buf = GIT_BUF_INIT;
560 enum write_one_status status;
561 struct git_pack_header ph;
562 unsigned int i = 0;
563
564 write_order = compute_write_order(pb);
565 if (write_order == NULL)
566 goto on_error;
567
568 /* Write pack header */
569 ph.hdr_signature = htonl(PACK_SIGNATURE);
570 ph.hdr_version = htonl(PACK_VERSION);
571 ph.hdr_entries = htonl(pb->nr_objects);
572
573 if (cb(&ph, sizeof(ph), data) < 0)
574 goto on_error;
575
576 git_hash_update(pb->ctx, &ph, sizeof(ph));
577
578 pb->nr_remaining = pb->nr_objects;
579 do {
580 pb->nr_written = 0;
581 for ( ; i < pb->nr_objects; ++i) {
582 po = write_order[i];
583 if (write_one(&buf, pb, po, &status) < 0)
584 goto on_error;
585 if (cb(buf.ptr, buf.size, data) < 0)
586 goto on_error;
587 git_buf_clear(&buf);
588 }
589
590 pb->nr_remaining -= pb->nr_written;
591 } while (pb->nr_remaining && i < pb->nr_objects);
592
593 git__free(write_order);
594 git_buf_free(&buf);
595 git_hash_final(&pb->pack_oid, pb->ctx);
596
597 return cb(pb->pack_oid.id, GIT_OID_RAWSZ, data);
598
599 on_error:
600 git__free(write_order);
601 git_buf_free(&buf);
602 return -1;
603 }
604
605 static int send_pack_file(void *buf, size_t size, void *data)
606 {
607 git_transport *t = (git_transport *)data;
608 return gitno_send(t, buf, size, 0);
609 }
610
611 static int write_pack_buf(void *buf, size_t size, void *data)
612 {
613 git_buf *b = (git_buf *)data;
614 return git_buf_put(b, buf, size);
615 }
616
617 static int write_pack_to_file(void *buf, size_t size, void *data)
618 {
619 git_filebuf *file = (git_filebuf *)data;
620 return git_filebuf_write(file, buf, size);
621 }
622
623 static int write_pack_file(git_packbuilder *pb, const char *path)
624 {
625 git_filebuf file = GIT_FILEBUF_INIT;
626
627 if (git_filebuf_open(&file, path, 0) < 0 ||
628 write_pack(pb, &write_pack_to_file, &file) < 0 ||
629 git_filebuf_commit(&file, GIT_PACK_FILE_MODE) < 0) {
630 git_filebuf_cleanup(&file);
631 return -1;
632 }
633
634 return 0;
635 }
636
637 static int type_size_sort(const void *_a, const void *_b)
638 {
639 const git_pobject *a = (git_pobject *)_a;
640 const git_pobject *b = (git_pobject *)_b;
641
642 if (a->type > b->type)
643 return -1;
644 if (a->type < b->type)
645 return 1;
646 if (a->hash > b->hash)
647 return -1;
648 if (a->hash < b->hash)
649 return 1;
650 /*
651 * TODO
652 *
653 if (a->preferred_base > b->preferred_base)
654 return -1;
655 if (a->preferred_base < b->preferred_base)
656 return 1;
657 */
658 if (a->size > b->size)
659 return -1;
660 if (a->size < b->size)
661 return 1;
662 return a < b ? -1 : (a > b); /* newest first */
663 }
664
665 static int delta_cacheable(git_packbuilder *pb, unsigned long src_size,
666 unsigned long trg_size, unsigned long delta_size)
667 {
668 if (pb->max_delta_cache_size &&
669 pb->delta_cache_size + delta_size > pb->max_delta_cache_size)
670 return 0;
671
672 if (delta_size < pb->cache_max_small_delta_size)
673 return 1;
674
675 /* cache delta, if objects are large enough compared to delta size */
676 if ((src_size >> 20) + (trg_size >> 21) > (delta_size >> 10))
677 return 1;
678
679 return 0;
680 }
681
682 static int try_delta(git_packbuilder *pb, struct unpacked *trg,
683 struct unpacked *src, unsigned int max_depth,
684 unsigned long *mem_usage, int *ret)
685 {
686 git_pobject *trg_object = trg->object;
687 git_pobject *src_object = src->object;
688 git_odb_object *obj;
689 unsigned long trg_size, src_size, delta_size,
690 sizediff, max_size, sz;
691 unsigned int ref_depth;
692 void *delta_buf;
693
694 /* Don't bother doing diffs between different types */
695 if (trg_object->type != src_object->type) {
696 *ret = -1;
697 return 0;
698 }
699
700 *ret = 0;
701
702 /* TODO: support reuse-delta */
703
704 /* Let's not bust the allowed depth. */
705 if (src->depth >= max_depth)
706 return 0;
707
708 /* Now some size filtering heuristics. */
709 trg_size = trg_object->size;
710 if (!trg_object->delta) {
711 max_size = trg_size/2 - 20;
712 ref_depth = 1;
713 } else {
714 max_size = trg_object->delta_size;
715 ref_depth = trg->depth;
716 }
717
718 max_size = (uint64_t)max_size * (max_depth - src->depth) /
719 (max_depth - ref_depth + 1);
720 if (max_size == 0)
721 return 0;
722
723 src_size = src_object->size;
724 sizediff = src_size < trg_size ? trg_size - src_size : 0;
725 if (sizediff >= max_size)
726 return 0;
727 if (trg_size < src_size / 32)
728 return 0;
729
730 /* Load data if not already done */
731 if (!trg->data) {
732 if (git_odb_read(&obj, pb->odb, &trg_object->id) < 0)
733 return -1;
734
735 sz = git_odb_object_size(obj);
736 trg->data = git__malloc(sz);
737 GITERR_CHECK_ALLOC(trg->data);
738 memcpy(trg->data, git_odb_object_data(obj), sz);
739
740 git_odb_object_free(obj);
741
742 if (sz != trg_size) {
743 giterr_set(GITERR_INVALID,
744 "Inconsistent target object length");
745 return -1;
746 }
747
748 *mem_usage += sz;
749 }
750 if (!src->data) {
751 if (git_odb_read(&obj, pb->odb, &src_object->id) < 0)
752 return -1;
753
754 sz = git_odb_object_size(obj);
755 src->data = git__malloc(sz);
756 GITERR_CHECK_ALLOC(src->data);
757 memcpy(src->data, git_odb_object_data(obj), sz);
758
759 git_odb_object_free(obj);
760
761 if (sz != src_size) {
762 giterr_set(GITERR_INVALID,
763 "Inconsistent source object length");
764 return -1;
765 }
766
767 *mem_usage += sz;
768 }
769 if (!src->index) {
770 src->index = git_delta_create_index(src->data, src_size);
771 if (!src->index)
772 return 0; /* suboptimal pack - out of memory */
773
774 *mem_usage += git_delta_sizeof_index(src->index);
775 }
776
777 delta_buf = git_delta_create(src->index, trg->data, trg_size,
778 &delta_size, max_size);
779 if (!delta_buf)
780 return 0;
781
782 if (trg_object->delta) {
783 /* Prefer only shallower same-sized deltas. */
784 if (delta_size == trg_object->delta_size &&
785 src->depth + 1 >= trg->depth) {
786 git__free(delta_buf);
787 return 0;
788 }
789 }
790
791 git_packbuilder__cache_lock(pb);
792 if (trg_object->delta_data) {
793 git__free(trg_object->delta_data);
794 pb->delta_cache_size -= trg_object->delta_size;
795 trg_object->delta_data = NULL;
796 }
797 if (delta_cacheable(pb, src_size, trg_size, delta_size)) {
798 pb->delta_cache_size += delta_size;
799 git_packbuilder__cache_unlock(pb);
800
801 trg_object->delta_data = git__realloc(delta_buf, delta_size);
802 GITERR_CHECK_ALLOC(trg_object->delta_data);
803 } else {
804 /* create delta when writing the pack */
805 git_packbuilder__cache_unlock(pb);
806 git__free(delta_buf);
807 }
808
809 trg_object->delta = src_object;
810 trg_object->delta_size = delta_size;
811 trg->depth = src->depth + 1;
812
813 *ret = 1;
814 return 0;
815 }
816
817 static unsigned int check_delta_limit(git_pobject *me, unsigned int n)
818 {
819 git_pobject *child = me->delta_child;
820 unsigned int m = n;
821
822 while (child) {
823 unsigned int c = check_delta_limit(child, n + 1);
824 if (m < c)
825 m = c;
826 child = child->delta_sibling;
827 }
828 return m;
829 }
830
831 static unsigned long free_unpacked(struct unpacked *n)
832 {
833 unsigned long freed_mem = git_delta_sizeof_index(n->index);
834 git_delta_free_index(n->index);
835 n->index = NULL;
836 if (n->data) {
837 freed_mem += n->object->size;
838 git__free(n->data);
839 n->data = NULL;
840 }
841 n->object = NULL;
842 n->depth = 0;
843 return freed_mem;
844 }
845
846 static int find_deltas(git_packbuilder *pb, git_pobject **list,
847 unsigned int *list_size, unsigned int window,
848 unsigned int depth)
849 {
850 git_pobject *po;
851 git_buf zbuf = GIT_BUF_INIT;
852 struct unpacked *array;
853 uint32_t idx = 0, count = 0;
854 unsigned long mem_usage = 0;
855 unsigned int i;
856 int error = -1;
857
858 array = git__calloc(window, sizeof(struct unpacked));
859 GITERR_CHECK_ALLOC(array);
860
861 for (;;) {
862 struct unpacked *n = array + idx;
863 unsigned int max_depth;
864 int j, best_base = -1;
865
866 git_packbuilder__progress_lock(pb);
867 if (!*list_size) {
868 git_packbuilder__progress_unlock(pb);
869 break;
870 }
871
872 po = *list++;
873 (*list_size)--;
874 git_packbuilder__progress_unlock(pb);
875
876 mem_usage -= free_unpacked(n);
877 n->object = po;
878
879 while (pb->window_memory_limit &&
880 mem_usage > pb->window_memory_limit &&
881 count > 1) {
882 uint32_t tail = (idx + window - count) % window;
883 mem_usage -= free_unpacked(array + tail);
884 count--;
885 }
886
887 /*
888 * If the current object is at pack edge, take the depth the
889 * objects that depend on the current object into account
890 * otherwise they would become too deep.
891 */
892 max_depth = depth;
893 if (po->delta_child) {
894 max_depth -= check_delta_limit(po, 0);
895 if (max_depth <= 0)
896 goto next;
897 }
898
899 j = window;
900 while (--j > 0) {
901 int ret;
902 uint32_t other_idx = idx + j;
903 struct unpacked *m;
904
905 if (other_idx >= window)
906 other_idx -= window;
907
908 m = array + other_idx;
909 if (!m->object)
910 break;
911
912 if (try_delta(pb, n, m, max_depth, &mem_usage, &ret) < 0)
913 goto on_error;
914 if (ret < 0)
915 break;
916 else if (ret > 0)
917 best_base = other_idx;
918 }
919
920 /*
921 * If we decided to cache the delta data, then it is best
922 * to compress it right away. First because we have to do
923 * it anyway, and doing it here while we're threaded will
924 * save a lot of time in the non threaded write phase,
925 * as well as allow for caching more deltas within
926 * the same cache size limit.
927 * ...
928 * But only if not writing to stdout, since in that case
929 * the network is most likely throttling writes anyway,
930 * and therefore it is best to go to the write phase ASAP
931 * instead, as we can afford spending more time compressing
932 * between writes at that moment.
933 */
934 if (po->delta_data) {
935 if (git__compress(&zbuf, po->delta_data, po->delta_size) < 0)
936 goto on_error;
937
938 git__free(po->delta_data);
939 po->delta_data = git__malloc(zbuf.size);
940 GITERR_CHECK_ALLOC(po->delta_data);
941
942 memcpy(po->delta_data, zbuf.ptr, zbuf.size);
943 po->z_delta_size = zbuf.size;
944 git_buf_clear(&zbuf);
945
946 git_packbuilder__cache_lock(pb);
947 pb->delta_cache_size -= po->delta_size;
948 pb->delta_cache_size += po->z_delta_size;
949 git_packbuilder__cache_unlock(pb);
950 }
951
952 /*
953 * If we made n a delta, and if n is already at max
954 * depth, leaving it in the window is pointless. we
955 * should evict it first.
956 */
957 if (po->delta && max_depth <= n->depth)
958 continue;
959
960 /*
961 * Move the best delta base up in the window, after the
962 * currently deltified object, to keep it longer. It will
963 * be the first base object to be attempted next.
964 */
965 if (po->delta) {
966 struct unpacked swap = array[best_base];
967 int dist = (window + idx - best_base) % window;
968 int dst = best_base;
969 while (dist--) {
970 int src = (dst + 1) % window;
971 array[dst] = array[src];
972 dst = src;
973 }
974 array[dst] = swap;
975 }
976
977 next:
978 idx++;
979 if (count + 1 < window)
980 count++;
981 if (idx >= window)
982 idx = 0;
983 }
984 error = 0;
985
986 on_error:
987 for (i = 0; i < window; ++i) {
988 git__free(array[i].index);
989 git__free(array[i].data);
990 }
991 git__free(array);
992 git_buf_free(&zbuf);
993
994 return error;
995 }
996
997 #ifdef GIT_THREADS
998
999 struct thread_params {
1000 git_thread thread;
1001 git_packbuilder *pb;
1002
1003 git_pobject **list;
1004
1005 git_cond cond;
1006 git_mutex mutex;
1007
1008 unsigned int list_size;
1009 unsigned int remaining;
1010
1011 int window;
1012 int depth;
1013 int working;
1014 int data_ready;
1015 };
1016
1017 static void *threaded_find_deltas(void *arg)
1018 {
1019 struct thread_params *me = arg;
1020
1021 while (me->remaining) {
1022 if (find_deltas(me->pb, me->list, &me->remaining,
1023 me->window, me->depth) < 0) {
1024 ; /* TODO */
1025 }
1026
1027 git_packbuilder__progress_lock(me->pb);
1028 me->working = 0;
1029 git_cond_signal(&me->pb->progress_cond);
1030 git_packbuilder__progress_unlock(me->pb);
1031
1032 /*
1033 * We must not set ->data_ready before we wait on the
1034 * condition because the main thread may have set it to 1
1035 * before we get here. In order to be sure that new
1036 * work is available if we see 1 in ->data_ready, it
1037 * was initialized to 0 before this thread was spawned
1038 * and we reset it to 0 right away.
1039 */
1040 git_mutex_lock(&me->mutex);
1041 while (!me->data_ready)
1042 git_cond_wait(&me->cond, &me->mutex);
1043 me->data_ready = 0;
1044 git_mutex_unlock(&me->mutex);
1045 }
1046 /* leave ->working 1 so that this doesn't get more work assigned */
1047 return NULL;
1048 }
1049
1050 static int ll_find_deltas(git_packbuilder *pb, git_pobject **list,
1051 unsigned int list_size, unsigned int window,
1052 unsigned int depth)
1053 {
1054 struct thread_params *p;
1055 int i, ret, active_threads = 0;
1056
1057 if (!pb->nr_threads)
1058 pb->nr_threads = git_online_cpus();
1059
1060 if (pb->nr_threads <= 1) {
1061 find_deltas(pb, list, &list_size, window, depth);
1062 return 0;
1063 }
1064
1065 p = git__malloc(pb->nr_threads * sizeof(*p));
1066 GITERR_CHECK_ALLOC(p);
1067
1068 /* Partition the work among the threads */
1069 for (i = 0; i < pb->nr_threads; ++i) {
1070 unsigned sub_size = list_size / (pb->nr_threads - i);
1071
1072 /* don't use too small segments or no deltas will be found */
1073 if (sub_size < 2*window && i+1 < pb->nr_threads)
1074 sub_size = 0;
1075
1076 p[i].pb = pb;
1077 p[i].window = window;
1078 p[i].depth = depth;
1079 p[i].working = 1;
1080 p[i].data_ready = 0;
1081
1082 /* try to split chunks on "path" boundaries */
1083 while (sub_size && sub_size < list_size &&
1084 list[sub_size]->hash &&
1085 list[sub_size]->hash == list[sub_size-1]->hash)
1086 sub_size++;
1087
1088 p[i].list = list;
1089 p[i].list_size = sub_size;
1090 p[i].remaining = sub_size;
1091
1092 list += sub_size;
1093 list_size -= sub_size;
1094 }
1095
1096 /* Start work threads */
1097 for (i = 0; i < pb->nr_threads; ++i) {
1098 if (!p[i].list_size)
1099 continue;
1100
1101 git_mutex_init(&p[i].mutex);
1102 git_cond_init(&p[i].cond);
1103
1104 ret = git_thread_create(&p[i].thread, NULL,
1105 threaded_find_deltas, &p[i]);
1106 if (ret) {
1107 giterr_set(GITERR_THREAD, "unable to create thread");
1108 return -1;
1109 }
1110 active_threads++;
1111 }
1112
1113 /*
1114 * Now let's wait for work completion. Each time a thread is done
1115 * with its work, we steal half of the remaining work from the
1116 * thread with the largest number of unprocessed objects and give
1117 * it to that newly idle thread. This ensure good load balancing
1118 * until the remaining object list segments are simply too short
1119 * to be worth splitting anymore.
1120 */
1121 while (active_threads) {
1122 struct thread_params *target = NULL;
1123 struct thread_params *victim = NULL;
1124 unsigned sub_size = 0;
1125
1126 /* Start by locating a thread that has transitioned its
1127 * 'working' flag from 1 -> 0. This indicates that it is
1128 * ready to receive more work using our work-stealing
1129 * algorithm. */
1130 git_packbuilder__progress_lock(pb);
1131 for (;;) {
1132 for (i = 0; !target && i < pb->nr_threads; i++)
1133 if (!p[i].working)
1134 target = &p[i];
1135 if (target)
1136 break;
1137 git_cond_wait(&pb->progress_cond, &pb->progress_mutex);
1138 }
1139
1140 /* At this point we hold the progress lock and have located
1141 * a thread to receive more work. We still need to locate a
1142 * thread from which to steal work (the victim). */
1143 for (i = 0; i < pb->nr_threads; i++)
1144 if (p[i].remaining > 2*window &&
1145 (!victim || victim->remaining < p[i].remaining))
1146 victim = &p[i];
1147
1148 if (victim) {
1149 sub_size = victim->remaining / 2;
1150 list = victim->list + victim->list_size - sub_size;
1151 while (sub_size && list[0]->hash &&
1152 list[0]->hash == list[-1]->hash) {
1153 list++;
1154 sub_size--;
1155 }
1156 if (!sub_size) {
1157 /*
1158 * It is possible for some "paths" to have
1159 * so many objects that no hash boundary
1160 * might be found. Let's just steal the
1161 * exact half in that case.
1162 */
1163 sub_size = victim->remaining / 2;
1164 list -= sub_size;
1165 }
1166 target->list = list;
1167 victim->list_size -= sub_size;
1168 victim->remaining -= sub_size;
1169 }
1170 target->list_size = sub_size;
1171 target->remaining = sub_size;
1172 target->working = 1;
1173 git_packbuilder__progress_unlock(pb);
1174
1175 git_mutex_lock(&target->mutex);
1176 target->data_ready = 1;
1177 git_cond_signal(&target->cond);
1178 git_mutex_unlock(&target->mutex);
1179
1180 if (!sub_size) {
1181 git_thread_join(target->thread, NULL);
1182 git_cond_free(&target->cond);
1183 git_mutex_free(&target->mutex);
1184 active_threads--;
1185 }
1186 }
1187
1188 git__free(p);
1189 return 0;
1190 }
1191
1192 #else
1193 #define ll_find_deltas(pb, l, ls, w, d) find_deltas(pb, l, &ls, w, d)
1194 #endif
1195
1196 static int prepare_pack(git_packbuilder *pb)
1197 {
1198 git_pobject **delta_list;
1199 unsigned int i, n = 0;
1200
1201 if (pb->nr_objects == 0 || pb->done)
1202 return 0; /* nothing to do */
1203
1204 delta_list = git__malloc(pb->nr_objects * sizeof(*delta_list));
1205 GITERR_CHECK_ALLOC(delta_list);
1206
1207 for (i = 0; i < pb->nr_objects; ++i) {
1208 git_pobject *po = pb->object_list + i;
1209
1210 /* Make sure the item is within our size limits */
1211 if (po->size < 50 || po->size > pb->big_file_threshold)
1212 continue;
1213
1214 delta_list[n++] = po;
1215 }
1216
1217 if (n > 1) {
1218 git__tsort((void **)delta_list, n, type_size_sort);
1219 if (ll_find_deltas(pb, delta_list, n,
1220 GIT_PACK_WINDOW + 1,
1221 GIT_PACK_DEPTH) < 0) {
1222 git__free(delta_list);
1223 return -1;
1224 }
1225 }
1226
1227 pb->done = true;
1228 git__free(delta_list);
1229 return 0;
1230 }
1231
1232 #define PREPARE_PACK if (prepare_pack(pb) < 0) { return -1; }
1233
1234 int git_packbuilder_send(git_packbuilder *pb, git_transport *t)
1235 {
1236 PREPARE_PACK;
1237 return write_pack(pb, &send_pack_file, t);
1238 }
1239
1240 int git_packbuilder_write_buf(git_buf *buf, git_packbuilder *pb)
1241 {
1242 PREPARE_PACK;
1243 return write_pack(pb, &write_pack_buf, buf);
1244 }
1245
1246 int git_packbuilder_write(git_packbuilder *pb, const char *path)
1247 {
1248 PREPARE_PACK;
1249 return write_pack_file(pb, path);
1250 }
1251
1252 #undef PREPARE_PACK
1253
1254 static int cb_tree_walk(const char *root, const git_tree_entry *entry, void *payload)
1255 {
1256 git_packbuilder *pb = payload;
1257 git_buf buf = GIT_BUF_INIT;
1258
1259 git_buf_puts(&buf, root);
1260 git_buf_puts(&buf, git_tree_entry_name(entry));
1261
1262 if (git_packbuilder_insert(pb, git_tree_entry_id(entry),
1263 git_buf_cstr(&buf)) < 0) {
1264 git_buf_free(&buf);
1265 return -1;
1266 }
1267
1268 git_buf_free(&buf);
1269 return 0;
1270 }
1271
1272 int git_packbuilder_insert_tree(git_packbuilder *pb, const git_oid *oid)
1273 {
1274 git_tree *tree;
1275
1276 if (git_tree_lookup(&tree, pb->repo, oid) < 0 ||
1277 git_packbuilder_insert(pb, oid, NULL) < 0)
1278 return -1;
1279
1280 if (git_tree_walk(tree, cb_tree_walk, GIT_TREEWALK_PRE, pb) < 0) {
1281 git_tree_free(tree);
1282 return -1;
1283 }
1284
1285 git_tree_free(tree);
1286 return 0;
1287 }
1288
1289 void git_packbuilder_free(git_packbuilder *pb)
1290 {
1291 if (pb == NULL)
1292 return;
1293
1294 #ifdef GIT_THREADS
1295
1296 git_mutex_free(&pb->cache_mutex);
1297 git_mutex_free(&pb->progress_mutex);
1298 git_cond_free(&pb->progress_cond);
1299
1300 #endif
1301
1302 if (pb->odb)
1303 git_odb_free(pb->odb);
1304
1305 if (pb->ctx)
1306 git_hash_free_ctx(pb->ctx);
1307
1308 if (pb->object_ix)
1309 git_oidmap_free(pb->object_ix);
1310
1311 if (pb->object_list)
1312 git__free(pb->object_list);
1313
1314 git__free(pb);
1315 }