]> git.proxmox.com Git - libgit2.git/blob - src/pack-objects.c
packbuilder: report progress during deltification
[libgit2.git] / src / pack-objects.c
1 /*
2 * Copyright (C) the libgit2 contributors. All rights reserved.
3 *
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
6 */
7
8 #include "pack-objects.h"
9
10 #include "zstream.h"
11 #include "delta.h"
12 #include "iterator.h"
13 #include "netops.h"
14 #include "pack.h"
15 #include "thread-utils.h"
16 #include "tree.h"
17 #include "util.h"
18 #include "revwalk.h"
19 #include "commit_list.h"
20
21 #include "git2/pack.h"
22 #include "git2/commit.h"
23 #include "git2/tag.h"
24 #include "git2/indexer.h"
25 #include "git2/config.h"
26
27 struct unpacked {
28 git_pobject *object;
29 void *data;
30 struct git_delta_index *index;
31 int depth;
32 };
33
34 struct tree_walk_context {
35 git_packbuilder *pb;
36 git_buf buf;
37 };
38
39 struct pack_write_context {
40 git_indexer *indexer;
41 git_transfer_progress *stats;
42 };
43
44 GIT__USE_OIDMAP;
45
46 #ifdef GIT_THREADS
47
48 #define GIT_PACKBUILDER__MUTEX_OP(pb, mtx, op) do { \
49 int result = git_mutex_##op(&(pb)->mtx); \
50 assert(!result); \
51 GIT_UNUSED(result); \
52 } while (0)
53
54 #else
55
56 #define GIT_PACKBUILDER__MUTEX_OP(pb,mtx,op) GIT_UNUSED(pb)
57
58 #endif /* GIT_THREADS */
59
60 #define git_packbuilder__cache_lock(pb) GIT_PACKBUILDER__MUTEX_OP(pb, cache_mutex, lock)
61 #define git_packbuilder__cache_unlock(pb) GIT_PACKBUILDER__MUTEX_OP(pb, cache_mutex, unlock)
62 #define git_packbuilder__progress_lock(pb) GIT_PACKBUILDER__MUTEX_OP(pb, progress_mutex, lock)
63 #define git_packbuilder__progress_unlock(pb) GIT_PACKBUILDER__MUTEX_OP(pb, progress_mutex, unlock)
64
65 /* The minimal interval between progress updates (in seconds). */
66 #define MIN_PROGRESS_UPDATE_INTERVAL 0.5
67
68 /* Size of the buffer to feed to zlib */
69 #define COMPRESS_BUFLEN (1024 * 1024)
70
71 static unsigned name_hash(const char *name)
72 {
73 unsigned c, hash = 0;
74
75 if (!name)
76 return 0;
77
78 /*
79 * This effectively just creates a sortable number from the
80 * last sixteen non-whitespace characters. Last characters
81 * count "most", so things that end in ".c" sort together.
82 */
83 while ((c = *name++) != 0) {
84 if (git__isspace(c))
85 continue;
86 hash = (hash >> 2) + (c << 24);
87 }
88 return hash;
89 }
90
91 static int packbuilder_config(git_packbuilder *pb)
92 {
93 git_config *config;
94 int ret;
95 int64_t val;
96
97 if ((ret = git_repository_config_snapshot(&config, pb->repo)) < 0)
98 return ret;
99
100 #define config_get(KEY,DST,DFLT) do { \
101 ret = git_config_get_int64(&val, config, KEY); \
102 if (!ret) (DST) = val; \
103 else if (ret == GIT_ENOTFOUND) (DST) = (DFLT); \
104 else if (ret < 0) return -1; } while (0)
105
106 config_get("pack.deltaCacheSize", pb->max_delta_cache_size,
107 GIT_PACK_DELTA_CACHE_SIZE);
108 config_get("pack.deltaCacheLimit", pb->cache_max_small_delta_size,
109 GIT_PACK_DELTA_CACHE_LIMIT);
110 config_get("pack.deltaCacheSize", pb->big_file_threshold,
111 GIT_PACK_BIG_FILE_THRESHOLD);
112 config_get("pack.windowMemory", pb->window_memory_limit, 0);
113
114 #undef config_get
115
116 git_config_free(config);
117
118 return 0;
119 }
120
121 int git_packbuilder_new(git_packbuilder **out, git_repository *repo)
122 {
123 git_packbuilder *pb;
124
125 *out = NULL;
126
127 pb = git__calloc(1, sizeof(*pb));
128 GITERR_CHECK_ALLOC(pb);
129
130 pb->object_ix = git_oidmap_alloc();
131 if (!pb->object_ix)
132 goto on_error;
133
134 pb->walk_objects = git_oidmap_alloc();
135 if (!pb->walk_objects)
136 goto on_error;
137
138 if (git_pool_init(&pb->object_pool, sizeof(git_walk_object), 0) < 0)
139 goto on_error;
140
141 pb->repo = repo;
142 pb->nr_threads = 1; /* do not spawn any thread by default */
143
144 if (git_hash_ctx_init(&pb->ctx) < 0 ||
145 git_zstream_init(&pb->zstream) < 0 ||
146 git_repository_odb(&pb->odb, repo) < 0 ||
147 packbuilder_config(pb) < 0)
148 goto on_error;
149
150 #ifdef GIT_THREADS
151
152 if (git_mutex_init(&pb->cache_mutex) ||
153 git_mutex_init(&pb->progress_mutex) ||
154 git_cond_init(&pb->progress_cond))
155 {
156 giterr_set(GITERR_OS, "Failed to initialize packbuilder mutex");
157 goto on_error;
158 }
159
160 #endif
161
162 *out = pb;
163 return 0;
164
165 on_error:
166 git_packbuilder_free(pb);
167 return -1;
168 }
169
170 unsigned int git_packbuilder_set_threads(git_packbuilder *pb, unsigned int n)
171 {
172 assert(pb);
173
174 #ifdef GIT_THREADS
175 pb->nr_threads = n;
176 #else
177 GIT_UNUSED(n);
178 assert(1 == pb->nr_threads);
179 #endif
180
181 return pb->nr_threads;
182 }
183
184 static void rehash(git_packbuilder *pb)
185 {
186 git_pobject *po;
187 khiter_t pos;
188 unsigned int i;
189 int ret;
190
191 kh_clear(oid, pb->object_ix);
192 for (i = 0, po = pb->object_list; i < pb->nr_objects; i++, po++) {
193 pos = kh_put(oid, pb->object_ix, &po->id, &ret);
194 kh_value(pb->object_ix, pos) = po;
195 }
196 }
197
198 int git_packbuilder_insert(git_packbuilder *pb, const git_oid *oid,
199 const char *name)
200 {
201 git_pobject *po;
202 khiter_t pos;
203 size_t newsize;
204 int ret;
205
206 assert(pb && oid);
207
208 /* If the object already exists in the hash table, then we don't
209 * have any work to do */
210 pos = kh_get(oid, pb->object_ix, oid);
211 if (pos != kh_end(pb->object_ix))
212 return 0;
213
214 if (pb->nr_objects >= pb->nr_alloc) {
215 GITERR_CHECK_ALLOC_ADD(&newsize, pb->nr_alloc, 1024);
216 GITERR_CHECK_ALLOC_MULTIPLY(&newsize, newsize, 3 / 2);
217
218 if (!git__is_uint32(newsize)) {
219 giterr_set(GITERR_NOMEMORY, "Packfile too large to fit in memory.");
220 return -1;
221 }
222
223 pb->nr_alloc = (uint32_t)newsize;
224
225 pb->object_list = git__reallocarray(pb->object_list,
226 pb->nr_alloc, sizeof(*po));
227 GITERR_CHECK_ALLOC(pb->object_list);
228 rehash(pb);
229 }
230
231 po = pb->object_list + pb->nr_objects;
232 memset(po, 0x0, sizeof(*po));
233
234 if ((ret = git_odb_read_header(&po->size, &po->type, pb->odb, oid)) < 0)
235 return ret;
236
237 pb->nr_objects++;
238 git_oid_cpy(&po->id, oid);
239 po->hash = name_hash(name);
240
241 pos = kh_put(oid, pb->object_ix, &po->id, &ret);
242 if (ret < 0) {
243 giterr_set_oom();
244 return ret;
245 }
246 assert(ret != 0);
247 kh_value(pb->object_ix, pos) = po;
248
249 pb->done = false;
250
251 if (pb->progress_cb) {
252 double current_time = git__timer();
253 double elapsed = current_time - pb->last_progress_report_time;
254
255 if (elapsed >= MIN_PROGRESS_UPDATE_INTERVAL) {
256 pb->last_progress_report_time = current_time;
257
258 ret = pb->progress_cb(
259 GIT_PACKBUILDER_ADDING_OBJECTS,
260 pb->nr_objects, 0, pb->progress_cb_payload);
261
262 if (ret)
263 return giterr_set_after_callback(ret);
264 }
265 }
266
267 return 0;
268 }
269
270 static int get_delta(void **out, git_odb *odb, git_pobject *po)
271 {
272 git_odb_object *src = NULL, *trg = NULL;
273 unsigned long delta_size;
274 void *delta_buf;
275
276 *out = NULL;
277
278 if (git_odb_read(&src, odb, &po->delta->id) < 0 ||
279 git_odb_read(&trg, odb, &po->id) < 0)
280 goto on_error;
281
282 delta_buf = git_delta(
283 git_odb_object_data(src), (unsigned long)git_odb_object_size(src),
284 git_odb_object_data(trg), (unsigned long)git_odb_object_size(trg),
285 &delta_size, 0);
286
287 if (!delta_buf || delta_size != po->delta_size) {
288 giterr_set(GITERR_INVALID, "Delta size changed");
289 goto on_error;
290 }
291
292 *out = delta_buf;
293
294 git_odb_object_free(src);
295 git_odb_object_free(trg);
296 return 0;
297
298 on_error:
299 git_odb_object_free(src);
300 git_odb_object_free(trg);
301 return -1;
302 }
303
304 static int write_object(
305 git_packbuilder *pb,
306 git_pobject *po,
307 int (*write_cb)(void *buf, size_t size, void *cb_data),
308 void *cb_data)
309 {
310 git_odb_object *obj = NULL;
311 git_otype type;
312 unsigned char hdr[10], *zbuf = NULL;
313 void *data = NULL;
314 size_t hdr_len, zbuf_len = COMPRESS_BUFLEN, data_len;
315 int error;
316
317 /*
318 * If we have a delta base, let's use the delta to save space.
319 * Otherwise load the whole object. 'data' ends up pointing to
320 * whatever data we want to put into the packfile.
321 */
322 if (po->delta) {
323 if (po->delta_data)
324 data = po->delta_data;
325 else if ((error = get_delta(&data, pb->odb, po)) < 0)
326 goto done;
327
328 data_len = po->delta_size;
329 type = GIT_OBJ_REF_DELTA;
330 } else {
331 if ((error = git_odb_read(&obj, pb->odb, &po->id)) < 0)
332 goto done;
333
334 data = (void *)git_odb_object_data(obj);
335 data_len = git_odb_object_size(obj);
336 type = git_odb_object_type(obj);
337 }
338
339 /* Write header */
340 hdr_len = git_packfile__object_header(hdr, data_len, type);
341
342 if ((error = write_cb(hdr, hdr_len, cb_data)) < 0 ||
343 (error = git_hash_update(&pb->ctx, hdr, hdr_len)) < 0)
344 goto done;
345
346 if (type == GIT_OBJ_REF_DELTA) {
347 if ((error = write_cb(po->delta->id.id, GIT_OID_RAWSZ, cb_data)) < 0 ||
348 (error = git_hash_update(&pb->ctx, po->delta->id.id, GIT_OID_RAWSZ)) < 0)
349 goto done;
350 }
351
352 /* Write data */
353 if (po->z_delta_size) {
354 data_len = po->z_delta_size;
355
356 if ((error = write_cb(data, data_len, cb_data)) < 0 ||
357 (error = git_hash_update(&pb->ctx, data, data_len)) < 0)
358 goto done;
359 } else {
360 zbuf = git__malloc(zbuf_len);
361 GITERR_CHECK_ALLOC(zbuf);
362
363 git_zstream_reset(&pb->zstream);
364 git_zstream_set_input(&pb->zstream, data, data_len);
365
366 while (!git_zstream_done(&pb->zstream)) {
367 if ((error = git_zstream_get_output(zbuf, &zbuf_len, &pb->zstream)) < 0 ||
368 (error = write_cb(zbuf, zbuf_len, cb_data)) < 0 ||
369 (error = git_hash_update(&pb->ctx, zbuf, zbuf_len)) < 0)
370 goto done;
371
372 zbuf_len = COMPRESS_BUFLEN; /* reuse buffer */
373 }
374 }
375
376 /*
377 * If po->delta is true, data is a delta and it is our
378 * responsibility to free it (otherwise it's a git_object's
379 * data). We set po->delta_data to NULL in case we got the
380 * data from there instead of get_delta(). If we didn't,
381 * there's no harm.
382 */
383 if (po->delta) {
384 git__free(data);
385 po->delta_data = NULL;
386 }
387
388 pb->nr_written++;
389
390 done:
391 git__free(zbuf);
392 git_odb_object_free(obj);
393 return error;
394 }
395
396 enum write_one_status {
397 WRITE_ONE_SKIP = -1, /* already written */
398 WRITE_ONE_BREAK = 0, /* writing this will bust the limit; not written */
399 WRITE_ONE_WRITTEN = 1, /* normal */
400 WRITE_ONE_RECURSIVE = 2 /* already scheduled to be written */
401 };
402
403 static int write_one(
404 enum write_one_status *status,
405 git_packbuilder *pb,
406 git_pobject *po,
407 int (*write_cb)(void *buf, size_t size, void *cb_data),
408 void *cb_data)
409 {
410 int error;
411
412 if (po->recursing) {
413 *status = WRITE_ONE_RECURSIVE;
414 return 0;
415 } else if (po->written) {
416 *status = WRITE_ONE_SKIP;
417 return 0;
418 }
419
420 if (po->delta) {
421 po->recursing = 1;
422
423 if ((error = write_one(status, pb, po->delta, write_cb, cb_data)) < 0)
424 return error;
425
426 /* we cannot depend on this one */
427 if (*status == WRITE_ONE_RECURSIVE)
428 po->delta = NULL;
429 }
430
431 *status = WRITE_ONE_WRITTEN;
432 po->written = 1;
433 po->recursing = 0;
434
435 return write_object(pb, po, write_cb, cb_data);
436 }
437
438 GIT_INLINE(void) add_to_write_order(git_pobject **wo, unsigned int *endp,
439 git_pobject *po)
440 {
441 if (po->filled)
442 return;
443 wo[(*endp)++] = po;
444 po->filled = 1;
445 }
446
447 static void add_descendants_to_write_order(git_pobject **wo, unsigned int *endp,
448 git_pobject *po)
449 {
450 int add_to_order = 1;
451 while (po) {
452 if (add_to_order) {
453 git_pobject *s;
454 /* add this node... */
455 add_to_write_order(wo, endp, po);
456 /* all its siblings... */
457 for (s = po->delta_sibling; s; s = s->delta_sibling) {
458 add_to_write_order(wo, endp, s);
459 }
460 }
461 /* drop down a level to add left subtree nodes if possible */
462 if (po->delta_child) {
463 add_to_order = 1;
464 po = po->delta_child;
465 } else {
466 add_to_order = 0;
467 /* our sibling might have some children, it is next */
468 if (po->delta_sibling) {
469 po = po->delta_sibling;
470 continue;
471 }
472 /* go back to our parent node */
473 po = po->delta;
474 while (po && !po->delta_sibling) {
475 /* we're on the right side of a subtree, keep
476 * going up until we can go right again */
477 po = po->delta;
478 }
479 if (!po) {
480 /* done- we hit our original root node */
481 return;
482 }
483 /* pass it off to sibling at this level */
484 po = po->delta_sibling;
485 }
486 };
487 }
488
489 static void add_family_to_write_order(git_pobject **wo, unsigned int *endp,
490 git_pobject *po)
491 {
492 git_pobject *root;
493
494 for (root = po; root->delta; root = root->delta)
495 ; /* nothing */
496 add_descendants_to_write_order(wo, endp, root);
497 }
498
499 static int cb_tag_foreach(const char *name, git_oid *oid, void *data)
500 {
501 git_packbuilder *pb = data;
502 git_pobject *po;
503 khiter_t pos;
504
505 GIT_UNUSED(name);
506
507 pos = kh_get(oid, pb->object_ix, oid);
508 if (pos == kh_end(pb->object_ix))
509 return 0;
510
511 po = kh_value(pb->object_ix, pos);
512 po->tagged = 1;
513
514 /* TODO: peel objects */
515
516 return 0;
517 }
518
519 static git_pobject **compute_write_order(git_packbuilder *pb)
520 {
521 unsigned int i, wo_end, last_untagged;
522 git_pobject **wo;
523
524 if ((wo = git__mallocarray(pb->nr_objects, sizeof(*wo))) == NULL)
525 return NULL;
526
527 for (i = 0; i < pb->nr_objects; i++) {
528 git_pobject *po = pb->object_list + i;
529 po->tagged = 0;
530 po->filled = 0;
531 po->delta_child = NULL;
532 po->delta_sibling = NULL;
533 }
534
535 /*
536 * Fully connect delta_child/delta_sibling network.
537 * Make sure delta_sibling is sorted in the original
538 * recency order.
539 */
540 for (i = pb->nr_objects; i > 0;) {
541 git_pobject *po = &pb->object_list[--i];
542 if (!po->delta)
543 continue;
544 /* Mark me as the first child */
545 po->delta_sibling = po->delta->delta_child;
546 po->delta->delta_child = po;
547 }
548
549 /*
550 * Mark objects that are at the tip of tags.
551 */
552 if (git_tag_foreach(pb->repo, &cb_tag_foreach, pb) < 0) {
553 git__free(wo);
554 return NULL;
555 }
556
557 /*
558 * Give the objects in the original recency order until
559 * we see a tagged tip.
560 */
561 for (i = wo_end = 0; i < pb->nr_objects; i++) {
562 git_pobject *po = pb->object_list + i;
563 if (po->tagged)
564 break;
565 add_to_write_order(wo, &wo_end, po);
566 }
567 last_untagged = i;
568
569 /*
570 * Then fill all the tagged tips.
571 */
572 for (; i < pb->nr_objects; i++) {
573 git_pobject *po = pb->object_list + i;
574 if (po->tagged)
575 add_to_write_order(wo, &wo_end, po);
576 }
577
578 /*
579 * And then all remaining commits and tags.
580 */
581 for (i = last_untagged; i < pb->nr_objects; i++) {
582 git_pobject *po = pb->object_list + i;
583 if (po->type != GIT_OBJ_COMMIT &&
584 po->type != GIT_OBJ_TAG)
585 continue;
586 add_to_write_order(wo, &wo_end, po);
587 }
588
589 /*
590 * And then all the trees.
591 */
592 for (i = last_untagged; i < pb->nr_objects; i++) {
593 git_pobject *po = pb->object_list + i;
594 if (po->type != GIT_OBJ_TREE)
595 continue;
596 add_to_write_order(wo, &wo_end, po);
597 }
598
599 /*
600 * Finally all the rest in really tight order
601 */
602 for (i = last_untagged; i < pb->nr_objects; i++) {
603 git_pobject *po = pb->object_list + i;
604 if (!po->filled)
605 add_family_to_write_order(wo, &wo_end, po);
606 }
607
608 if (wo_end != pb->nr_objects) {
609 giterr_set(GITERR_INVALID, "invalid write order");
610 return NULL;
611 }
612
613 return wo;
614 }
615
616 static int write_pack(git_packbuilder *pb,
617 int (*write_cb)(void *buf, size_t size, void *cb_data),
618 void *cb_data)
619 {
620 git_pobject **write_order;
621 git_pobject *po;
622 enum write_one_status status;
623 struct git_pack_header ph;
624 git_oid entry_oid;
625 unsigned int i = 0;
626 int error = 0;
627
628 write_order = compute_write_order(pb);
629 if (write_order == NULL) {
630 error = -1;
631 goto done;
632 }
633
634 /* Write pack header */
635 ph.hdr_signature = htonl(PACK_SIGNATURE);
636 ph.hdr_version = htonl(PACK_VERSION);
637 ph.hdr_entries = htonl(pb->nr_objects);
638
639 if ((error = write_cb(&ph, sizeof(ph), cb_data)) < 0 ||
640 (error = git_hash_update(&pb->ctx, &ph, sizeof(ph))) < 0)
641 goto done;
642
643 pb->nr_remaining = pb->nr_objects;
644 do {
645 pb->nr_written = 0;
646 for ( ; i < pb->nr_objects; ++i) {
647 po = write_order[i];
648
649 if ((error = write_one(&status, pb, po, write_cb, cb_data)) < 0)
650 goto done;
651 }
652
653 pb->nr_remaining -= pb->nr_written;
654 } while (pb->nr_remaining && i < pb->nr_objects);
655
656 if ((error = git_hash_final(&entry_oid, &pb->ctx)) < 0)
657 goto done;
658
659 error = write_cb(entry_oid.id, GIT_OID_RAWSZ, cb_data);
660
661 done:
662 /* if callback cancelled writing, we must still free delta_data */
663 for ( ; i < pb->nr_objects; ++i) {
664 po = write_order[i];
665 if (po->delta_data) {
666 git__free(po->delta_data);
667 po->delta_data = NULL;
668 }
669 }
670
671 git__free(write_order);
672 return error;
673 }
674
675 static int write_pack_buf(void *buf, size_t size, void *data)
676 {
677 git_buf *b = (git_buf *)data;
678 return git_buf_put(b, buf, size);
679 }
680
681 static int type_size_sort(const void *_a, const void *_b)
682 {
683 const git_pobject *a = (git_pobject *)_a;
684 const git_pobject *b = (git_pobject *)_b;
685
686 if (a->type > b->type)
687 return -1;
688 if (a->type < b->type)
689 return 1;
690 if (a->hash > b->hash)
691 return -1;
692 if (a->hash < b->hash)
693 return 1;
694 /*
695 * TODO
696 *
697 if (a->preferred_base > b->preferred_base)
698 return -1;
699 if (a->preferred_base < b->preferred_base)
700 return 1;
701 */
702 if (a->size > b->size)
703 return -1;
704 if (a->size < b->size)
705 return 1;
706 return a < b ? -1 : (a > b); /* newest first */
707 }
708
709 static int delta_cacheable(git_packbuilder *pb, unsigned long src_size,
710 unsigned long trg_size, unsigned long delta_size)
711 {
712 if (pb->max_delta_cache_size &&
713 pb->delta_cache_size + delta_size > pb->max_delta_cache_size)
714 return 0;
715
716 if (delta_size < pb->cache_max_small_delta_size)
717 return 1;
718
719 /* cache delta, if objects are large enough compared to delta size */
720 if ((src_size >> 20) + (trg_size >> 21) > (delta_size >> 10))
721 return 1;
722
723 return 0;
724 }
725
726 static int try_delta(git_packbuilder *pb, struct unpacked *trg,
727 struct unpacked *src, int max_depth,
728 unsigned long *mem_usage, int *ret)
729 {
730 git_pobject *trg_object = trg->object;
731 git_pobject *src_object = src->object;
732 git_odb_object *obj;
733 unsigned long trg_size, src_size, delta_size,
734 sizediff, max_size, sz;
735 unsigned int ref_depth;
736 void *delta_buf;
737
738 /* Don't bother doing diffs between different types */
739 if (trg_object->type != src_object->type) {
740 *ret = -1;
741 return 0;
742 }
743
744 *ret = 0;
745
746 /* TODO: support reuse-delta */
747
748 /* Let's not bust the allowed depth. */
749 if (src->depth >= max_depth)
750 return 0;
751
752 /* Now some size filtering heuristics. */
753 trg_size = (unsigned long)trg_object->size;
754 if (!trg_object->delta) {
755 max_size = trg_size/2 - 20;
756 ref_depth = 1;
757 } else {
758 max_size = trg_object->delta_size;
759 ref_depth = trg->depth;
760 }
761
762 max_size = (uint64_t)max_size * (max_depth - src->depth) /
763 (max_depth - ref_depth + 1);
764 if (max_size == 0)
765 return 0;
766
767 src_size = (unsigned long)src_object->size;
768 sizediff = src_size < trg_size ? trg_size - src_size : 0;
769 if (sizediff >= max_size)
770 return 0;
771 if (trg_size < src_size / 32)
772 return 0;
773
774 /* Load data if not already done */
775 if (!trg->data) {
776 if (git_odb_read(&obj, pb->odb, &trg_object->id) < 0)
777 return -1;
778
779 sz = (unsigned long)git_odb_object_size(obj);
780 trg->data = git__malloc(sz);
781 GITERR_CHECK_ALLOC(trg->data);
782 memcpy(trg->data, git_odb_object_data(obj), sz);
783
784 git_odb_object_free(obj);
785
786 if (sz != trg_size) {
787 giterr_set(GITERR_INVALID,
788 "Inconsistent target object length");
789 return -1;
790 }
791
792 *mem_usage += sz;
793 }
794 if (!src->data) {
795 size_t obj_sz;
796
797 if (git_odb_read(&obj, pb->odb, &src_object->id) < 0 ||
798 !git__is_ulong(obj_sz = git_odb_object_size(obj)))
799 return -1;
800
801 sz = (unsigned long)obj_sz;
802 src->data = git__malloc(sz);
803 GITERR_CHECK_ALLOC(src->data);
804 memcpy(src->data, git_odb_object_data(obj), sz);
805
806 git_odb_object_free(obj);
807
808 if (sz != src_size) {
809 giterr_set(GITERR_INVALID,
810 "Inconsistent source object length");
811 return -1;
812 }
813
814 *mem_usage += sz;
815 }
816 if (!src->index) {
817 src->index = git_delta_create_index(src->data, src_size);
818 if (!src->index)
819 return 0; /* suboptimal pack - out of memory */
820
821 *mem_usage += git_delta_sizeof_index(src->index);
822 }
823
824 delta_buf = git_delta_create(src->index, trg->data, trg_size,
825 &delta_size, max_size);
826 if (!delta_buf)
827 return 0;
828
829 if (trg_object->delta) {
830 /* Prefer only shallower same-sized deltas. */
831 if (delta_size == trg_object->delta_size &&
832 src->depth + 1 >= trg->depth) {
833 git__free(delta_buf);
834 return 0;
835 }
836 }
837
838 git_packbuilder__cache_lock(pb);
839 if (trg_object->delta_data) {
840 git__free(trg_object->delta_data);
841 pb->delta_cache_size -= trg_object->delta_size;
842 trg_object->delta_data = NULL;
843 }
844 if (delta_cacheable(pb, src_size, trg_size, delta_size)) {
845 bool overflow = git__add_uint64_overflow(
846 &pb->delta_cache_size, pb->delta_cache_size, delta_size);
847
848 git_packbuilder__cache_unlock(pb);
849
850 if (overflow ||
851 !(trg_object->delta_data = git__realloc(delta_buf, delta_size)))
852 return -1;
853 } else {
854 /* create delta when writing the pack */
855 git_packbuilder__cache_unlock(pb);
856 git__free(delta_buf);
857 }
858
859 trg_object->delta = src_object;
860 trg_object->delta_size = delta_size;
861 trg->depth = src->depth + 1;
862
863 *ret = 1;
864 return 0;
865 }
866
867 static unsigned int check_delta_limit(git_pobject *me, unsigned int n)
868 {
869 git_pobject *child = me->delta_child;
870 unsigned int m = n;
871
872 while (child) {
873 unsigned int c = check_delta_limit(child, n + 1);
874 if (m < c)
875 m = c;
876 child = child->delta_sibling;
877 }
878 return m;
879 }
880
881 static unsigned long free_unpacked(struct unpacked *n)
882 {
883 unsigned long freed_mem = git_delta_sizeof_index(n->index);
884 git_delta_free_index(n->index);
885 n->index = NULL;
886 if (n->data) {
887 freed_mem += (unsigned long)n->object->size;
888 git__free(n->data);
889 n->data = NULL;
890 }
891 n->object = NULL;
892 n->depth = 0;
893 return freed_mem;
894 }
895
896 static int report_delta_progress(git_packbuilder *pb, uint32_t count, bool force)
897 {
898 int ret;
899
900 if (pb->progress_cb) {
901 double current_time = git__timer();
902 double elapsed = current_time - pb->last_progress_report_time;
903
904 if (force || elapsed >= MIN_PROGRESS_UPDATE_INTERVAL) {
905 pb->last_progress_report_time = current_time;
906
907 ret = pb->progress_cb(
908 GIT_PACKBUILDER_DELTAFICATION,
909 count, pb->nr_objects, pb->progress_cb_payload);
910
911 if (ret)
912 return giterr_set_after_callback(ret);
913 }
914 }
915
916 return 0;
917 }
918
919 static int find_deltas(git_packbuilder *pb, git_pobject **list,
920 unsigned int *list_size, unsigned int window,
921 int depth)
922 {
923 git_pobject *po;
924 git_buf zbuf = GIT_BUF_INIT;
925 struct unpacked *array;
926 uint32_t idx = 0, count = 0;
927 unsigned long mem_usage = 0;
928 unsigned int i;
929 int error = -1;
930
931 array = git__calloc(window, sizeof(struct unpacked));
932 GITERR_CHECK_ALLOC(array);
933
934 for (;;) {
935 struct unpacked *n = array + idx;
936 int max_depth, j, best_base = -1;
937
938 git_packbuilder__progress_lock(pb);
939 if (!*list_size) {
940 git_packbuilder__progress_unlock(pb);
941 break;
942 }
943
944 pb->nr_deltified += 1;
945 report_delta_progress(pb, pb->nr_deltified, false);
946
947 po = *list++;
948 (*list_size)--;
949 git_packbuilder__progress_unlock(pb);
950
951 mem_usage -= free_unpacked(n);
952 n->object = po;
953
954 while (pb->window_memory_limit &&
955 mem_usage > pb->window_memory_limit &&
956 count > 1) {
957 uint32_t tail = (idx + window - count) % window;
958 mem_usage -= free_unpacked(array + tail);
959 count--;
960 }
961
962 /*
963 * If the current object is at pack edge, take the depth the
964 * objects that depend on the current object into account
965 * otherwise they would become too deep.
966 */
967 max_depth = depth;
968 if (po->delta_child) {
969 max_depth -= check_delta_limit(po, 0);
970 if (max_depth <= 0)
971 goto next;
972 }
973
974 j = window;
975 while (--j > 0) {
976 int ret;
977 uint32_t other_idx = idx + j;
978 struct unpacked *m;
979
980 if (other_idx >= window)
981 other_idx -= window;
982
983 m = array + other_idx;
984 if (!m->object)
985 break;
986
987 if (try_delta(pb, n, m, max_depth, &mem_usage, &ret) < 0)
988 goto on_error;
989 if (ret < 0)
990 break;
991 else if (ret > 0)
992 best_base = other_idx;
993 }
994
995 /*
996 * If we decided to cache the delta data, then it is best
997 * to compress it right away. First because we have to do
998 * it anyway, and doing it here while we're threaded will
999 * save a lot of time in the non threaded write phase,
1000 * as well as allow for caching more deltas within
1001 * the same cache size limit.
1002 * ...
1003 * But only if not writing to stdout, since in that case
1004 * the network is most likely throttling writes anyway,
1005 * and therefore it is best to go to the write phase ASAP
1006 * instead, as we can afford spending more time compressing
1007 * between writes at that moment.
1008 */
1009 if (po->delta_data) {
1010 if (git_zstream_deflatebuf(&zbuf, po->delta_data, po->delta_size) < 0)
1011 goto on_error;
1012
1013 git__free(po->delta_data);
1014 po->delta_data = git__malloc(zbuf.size);
1015 GITERR_CHECK_ALLOC(po->delta_data);
1016
1017 memcpy(po->delta_data, zbuf.ptr, zbuf.size);
1018 po->z_delta_size = (unsigned long)zbuf.size;
1019 git_buf_clear(&zbuf);
1020
1021 git_packbuilder__cache_lock(pb);
1022 pb->delta_cache_size -= po->delta_size;
1023 pb->delta_cache_size += po->z_delta_size;
1024 git_packbuilder__cache_unlock(pb);
1025 }
1026
1027 /*
1028 * If we made n a delta, and if n is already at max
1029 * depth, leaving it in the window is pointless. we
1030 * should evict it first.
1031 */
1032 if (po->delta && max_depth <= n->depth)
1033 continue;
1034
1035 /*
1036 * Move the best delta base up in the window, after the
1037 * currently deltified object, to keep it longer. It will
1038 * be the first base object to be attempted next.
1039 */
1040 if (po->delta) {
1041 struct unpacked swap = array[best_base];
1042 int dist = (window + idx - best_base) % window;
1043 int dst = best_base;
1044 while (dist--) {
1045 int src = (dst + 1) % window;
1046 array[dst] = array[src];
1047 dst = src;
1048 }
1049 array[dst] = swap;
1050 }
1051
1052 next:
1053 idx++;
1054 if (count + 1 < window)
1055 count++;
1056 if (idx >= window)
1057 idx = 0;
1058 }
1059 error = 0;
1060
1061 on_error:
1062 for (i = 0; i < window; ++i) {
1063 git__free(array[i].index);
1064 git__free(array[i].data);
1065 }
1066 git__free(array);
1067 git_buf_free(&zbuf);
1068
1069 return error;
1070 }
1071
1072 #ifdef GIT_THREADS
1073
1074 struct thread_params {
1075 git_thread thread;
1076 git_packbuilder *pb;
1077
1078 git_pobject **list;
1079
1080 git_cond cond;
1081 git_mutex mutex;
1082
1083 unsigned int list_size;
1084 unsigned int remaining;
1085
1086 int window;
1087 int depth;
1088 int working;
1089 int data_ready;
1090 };
1091
1092 static void *threaded_find_deltas(void *arg)
1093 {
1094 struct thread_params *me = arg;
1095
1096 while (me->remaining) {
1097 if (find_deltas(me->pb, me->list, &me->remaining,
1098 me->window, me->depth) < 0) {
1099 ; /* TODO */
1100 }
1101
1102 git_packbuilder__progress_lock(me->pb);
1103 me->working = 0;
1104 git_cond_signal(&me->pb->progress_cond);
1105 git_packbuilder__progress_unlock(me->pb);
1106
1107 if (git_mutex_lock(&me->mutex)) {
1108 giterr_set(GITERR_THREAD, "unable to lock packfile condition mutex");
1109 return NULL;
1110 }
1111
1112 while (!me->data_ready)
1113 git_cond_wait(&me->cond, &me->mutex);
1114
1115 /*
1116 * We must not set ->data_ready before we wait on the
1117 * condition because the main thread may have set it to 1
1118 * before we get here. In order to be sure that new
1119 * work is available if we see 1 in ->data_ready, it
1120 * was initialized to 0 before this thread was spawned
1121 * and we reset it to 0 right away.
1122 */
1123 me->data_ready = 0;
1124 git_mutex_unlock(&me->mutex);
1125 }
1126 /* leave ->working 1 so that this doesn't get more work assigned */
1127 return NULL;
1128 }
1129
1130 static int ll_find_deltas(git_packbuilder *pb, git_pobject **list,
1131 unsigned int list_size, unsigned int window,
1132 int depth)
1133 {
1134 struct thread_params *p;
1135 int i, ret, active_threads = 0;
1136
1137 if (!pb->nr_threads)
1138 pb->nr_threads = git_online_cpus();
1139
1140 if (pb->nr_threads <= 1) {
1141 find_deltas(pb, list, &list_size, window, depth);
1142 return 0;
1143 }
1144
1145 p = git__mallocarray(pb->nr_threads, sizeof(*p));
1146 GITERR_CHECK_ALLOC(p);
1147
1148 /* Partition the work among the threads */
1149 for (i = 0; i < pb->nr_threads; ++i) {
1150 unsigned sub_size = list_size / (pb->nr_threads - i);
1151
1152 /* don't use too small segments or no deltas will be found */
1153 if (sub_size < 2*window && i+1 < pb->nr_threads)
1154 sub_size = 0;
1155
1156 p[i].pb = pb;
1157 p[i].window = window;
1158 p[i].depth = depth;
1159 p[i].working = 1;
1160 p[i].data_ready = 0;
1161
1162 /* try to split chunks on "path" boundaries */
1163 while (sub_size && sub_size < list_size &&
1164 list[sub_size]->hash &&
1165 list[sub_size]->hash == list[sub_size-1]->hash)
1166 sub_size++;
1167
1168 p[i].list = list;
1169 p[i].list_size = sub_size;
1170 p[i].remaining = sub_size;
1171
1172 list += sub_size;
1173 list_size -= sub_size;
1174 }
1175
1176 /* Start work threads */
1177 for (i = 0; i < pb->nr_threads; ++i) {
1178 if (!p[i].list_size)
1179 continue;
1180
1181 git_mutex_init(&p[i].mutex);
1182 git_cond_init(&p[i].cond);
1183
1184 ret = git_thread_create(&p[i].thread, NULL,
1185 threaded_find_deltas, &p[i]);
1186 if (ret) {
1187 giterr_set(GITERR_THREAD, "unable to create thread");
1188 return -1;
1189 }
1190 active_threads++;
1191 }
1192
1193 /*
1194 * Now let's wait for work completion. Each time a thread is done
1195 * with its work, we steal half of the remaining work from the
1196 * thread with the largest number of unprocessed objects and give
1197 * it to that newly idle thread. This ensure good load balancing
1198 * until the remaining object list segments are simply too short
1199 * to be worth splitting anymore.
1200 */
1201 while (active_threads) {
1202 struct thread_params *target = NULL;
1203 struct thread_params *victim = NULL;
1204 unsigned sub_size = 0;
1205
1206 /* Start by locating a thread that has transitioned its
1207 * 'working' flag from 1 -> 0. This indicates that it is
1208 * ready to receive more work using our work-stealing
1209 * algorithm. */
1210 git_packbuilder__progress_lock(pb);
1211 for (;;) {
1212 for (i = 0; !target && i < pb->nr_threads; i++)
1213 if (!p[i].working)
1214 target = &p[i];
1215 if (target)
1216 break;
1217 git_cond_wait(&pb->progress_cond, &pb->progress_mutex);
1218 }
1219
1220 /* At this point we hold the progress lock and have located
1221 * a thread to receive more work. We still need to locate a
1222 * thread from which to steal work (the victim). */
1223 for (i = 0; i < pb->nr_threads; i++)
1224 if (p[i].remaining > 2*window &&
1225 (!victim || victim->remaining < p[i].remaining))
1226 victim = &p[i];
1227
1228 if (victim) {
1229 sub_size = victim->remaining / 2;
1230 list = victim->list + victim->list_size - sub_size;
1231 while (sub_size && list[0]->hash &&
1232 list[0]->hash == list[-1]->hash) {
1233 list++;
1234 sub_size--;
1235 }
1236 if (!sub_size) {
1237 /*
1238 * It is possible for some "paths" to have
1239 * so many objects that no hash boundary
1240 * might be found. Let's just steal the
1241 * exact half in that case.
1242 */
1243 sub_size = victim->remaining / 2;
1244 list -= sub_size;
1245 }
1246 target->list = list;
1247 victim->list_size -= sub_size;
1248 victim->remaining -= sub_size;
1249 }
1250 target->list_size = sub_size;
1251 target->remaining = sub_size;
1252 target->working = 1;
1253 git_packbuilder__progress_unlock(pb);
1254
1255 if (git_mutex_lock(&target->mutex)) {
1256 giterr_set(GITERR_THREAD, "unable to lock packfile condition mutex");
1257 git__free(p);
1258 return -1;
1259 }
1260
1261 target->data_ready = 1;
1262 git_cond_signal(&target->cond);
1263 git_mutex_unlock(&target->mutex);
1264
1265 if (!sub_size) {
1266 git_thread_join(&target->thread, NULL);
1267 git_cond_free(&target->cond);
1268 git_mutex_free(&target->mutex);
1269 active_threads--;
1270 }
1271 }
1272
1273 git__free(p);
1274 return 0;
1275 }
1276
1277 #else
1278 #define ll_find_deltas(pb, l, ls, w, d) find_deltas(pb, l, &ls, w, d)
1279 #endif
1280
1281 static int prepare_pack(git_packbuilder *pb)
1282 {
1283 git_pobject **delta_list;
1284 unsigned int i, n = 0;
1285
1286 if (pb->nr_objects == 0 || pb->done)
1287 return 0; /* nothing to do */
1288
1289 /*
1290 * Although we do not report progress during deltafication, we
1291 * at least report that we are in the deltafication stage
1292 */
1293 if (pb->progress_cb)
1294 pb->progress_cb(GIT_PACKBUILDER_DELTAFICATION, 0, pb->nr_objects, pb->progress_cb_payload);
1295
1296 delta_list = git__mallocarray(pb->nr_objects, sizeof(*delta_list));
1297 GITERR_CHECK_ALLOC(delta_list);
1298
1299 for (i = 0; i < pb->nr_objects; ++i) {
1300 git_pobject *po = pb->object_list + i;
1301
1302 /* Make sure the item is within our size limits */
1303 if (po->size < 50 || po->size > pb->big_file_threshold)
1304 continue;
1305
1306 delta_list[n++] = po;
1307 }
1308
1309 if (n > 1) {
1310 git__tsort((void **)delta_list, n, type_size_sort);
1311 if (ll_find_deltas(pb, delta_list, n,
1312 GIT_PACK_WINDOW + 1,
1313 GIT_PACK_DEPTH) < 0) {
1314 git__free(delta_list);
1315 return -1;
1316 }
1317 }
1318
1319 report_delta_progress(pb, pb->nr_objects, true);
1320
1321 pb->done = true;
1322 git__free(delta_list);
1323 return 0;
1324 }
1325
1326 #define PREPARE_PACK if (prepare_pack(pb) < 0) { return -1; }
1327
1328 int git_packbuilder_foreach(git_packbuilder *pb, int (*cb)(void *buf, size_t size, void *payload), void *payload)
1329 {
1330 PREPARE_PACK;
1331 return write_pack(pb, cb, payload);
1332 }
1333
1334 int git_packbuilder_write_buf(git_buf *buf, git_packbuilder *pb)
1335 {
1336 PREPARE_PACK;
1337 git_buf_sanitize(buf);
1338 return write_pack(pb, &write_pack_buf, buf);
1339 }
1340
1341 static int write_cb(void *buf, size_t len, void *payload)
1342 {
1343 struct pack_write_context *ctx = payload;
1344 return git_indexer_append(ctx->indexer, buf, len, ctx->stats);
1345 }
1346
1347 int git_packbuilder_write(
1348 git_packbuilder *pb,
1349 const char *path,
1350 unsigned int mode,
1351 git_transfer_progress_cb progress_cb,
1352 void *progress_cb_payload)
1353 {
1354 git_indexer *indexer;
1355 git_transfer_progress stats;
1356 struct pack_write_context ctx;
1357
1358 PREPARE_PACK;
1359
1360 if (git_indexer_new(
1361 &indexer, path, mode, pb->odb, progress_cb, progress_cb_payload) < 0)
1362 return -1;
1363
1364 ctx.indexer = indexer;
1365 ctx.stats = &stats;
1366
1367 if (git_packbuilder_foreach(pb, write_cb, &ctx) < 0 ||
1368 git_indexer_commit(indexer, &stats) < 0) {
1369 git_indexer_free(indexer);
1370 return -1;
1371 }
1372
1373 git_oid_cpy(&pb->pack_oid, git_indexer_hash(indexer));
1374
1375 git_indexer_free(indexer);
1376 return 0;
1377 }
1378
1379 #undef PREPARE_PACK
1380
1381 const git_oid *git_packbuilder_hash(git_packbuilder *pb)
1382 {
1383 return &pb->pack_oid;
1384 }
1385
1386
1387 static int cb_tree_walk(
1388 const char *root, const git_tree_entry *entry, void *payload)
1389 {
1390 int error;
1391 struct tree_walk_context *ctx = payload;
1392
1393 /* A commit inside a tree represents a submodule commit and should be skipped. */
1394 if (git_tree_entry_type(entry) == GIT_OBJ_COMMIT)
1395 return 0;
1396
1397 if (!(error = git_buf_sets(&ctx->buf, root)) &&
1398 !(error = git_buf_puts(&ctx->buf, git_tree_entry_name(entry))))
1399 error = git_packbuilder_insert(
1400 ctx->pb, git_tree_entry_id(entry), git_buf_cstr(&ctx->buf));
1401
1402 return error;
1403 }
1404
1405 int git_packbuilder_insert_commit(git_packbuilder *pb, const git_oid *oid)
1406 {
1407 git_commit *commit;
1408
1409 if (git_commit_lookup(&commit, pb->repo, oid) < 0 ||
1410 git_packbuilder_insert(pb, oid, NULL) < 0)
1411 return -1;
1412
1413 if (git_packbuilder_insert_tree(pb, git_commit_tree_id(commit)) < 0)
1414 return -1;
1415
1416 git_commit_free(commit);
1417 return 0;
1418 }
1419
1420 int git_packbuilder_insert_tree(git_packbuilder *pb, const git_oid *oid)
1421 {
1422 int error;
1423 git_tree *tree = NULL;
1424 struct tree_walk_context context = { pb, GIT_BUF_INIT };
1425
1426 if (!(error = git_tree_lookup(&tree, pb->repo, oid)) &&
1427 !(error = git_packbuilder_insert(pb, oid, NULL)))
1428 error = git_tree_walk(tree, GIT_TREEWALK_PRE, cb_tree_walk, &context);
1429
1430 git_tree_free(tree);
1431 git_buf_free(&context.buf);
1432 return error;
1433 }
1434
1435 int git_packbuilder_insert_recur(git_packbuilder *pb, const git_oid *id, const char *name)
1436 {
1437 git_object *obj;
1438 int error;
1439
1440 assert(pb && id);
1441
1442 if ((error = git_object_lookup(&obj, pb->repo, id, GIT_OBJ_ANY)) < 0)
1443 return error;
1444
1445 switch (git_object_type(obj)) {
1446 case GIT_OBJ_BLOB:
1447 error = git_packbuilder_insert(pb, id, name);
1448 break;
1449 case GIT_OBJ_TREE:
1450 error = git_packbuilder_insert_tree(pb, id);
1451 break;
1452 case GIT_OBJ_COMMIT:
1453 error = git_packbuilder_insert_commit(pb, id);
1454 break;
1455 case GIT_OBJ_TAG:
1456 if ((error = git_packbuilder_insert(pb, id, name)) < 0)
1457 goto cleanup;
1458 error = git_packbuilder_insert_recur(pb, git_tag_target_id((git_tag *) obj), NULL);
1459 break;
1460
1461 default:
1462 giterr_set(GITERR_INVALID, "unknown object type");
1463 error = -1;
1464 }
1465
1466 cleanup:
1467 git_object_free(obj);
1468 return error;
1469 }
1470
1471 uint32_t git_packbuilder_object_count(git_packbuilder *pb)
1472 {
1473 return pb->nr_objects;
1474 }
1475
1476 uint32_t git_packbuilder_written(git_packbuilder *pb)
1477 {
1478 return pb->nr_written;
1479 }
1480
1481 int lookup_walk_object(git_walk_object **out, git_packbuilder *pb, const git_oid *id)
1482 {
1483 git_walk_object *obj;
1484
1485 obj = git_pool_mallocz(&pb->object_pool, 1);
1486 if (!obj) {
1487 giterr_set_oom();
1488 return -1;
1489 }
1490
1491 git_oid_cpy(&obj->id, id);
1492
1493 *out = obj;
1494 return 0;
1495 }
1496
1497 static int retrieve_object(git_walk_object **out, git_packbuilder *pb, const git_oid *id)
1498 {
1499 int error;
1500 khiter_t pos;
1501 git_walk_object *obj;
1502
1503 pos = git_oidmap_lookup_index(pb->walk_objects, id);
1504 if (git_oidmap_valid_index(pb->walk_objects, pos)) {
1505 obj = git_oidmap_value_at(pb->walk_objects, pos);
1506 } else {
1507 if ((error = lookup_walk_object(&obj, pb, id)) < 0)
1508 return error;
1509
1510 git_oidmap_insert(pb->walk_objects, &obj->id, obj, error);
1511 }
1512
1513 *out = obj;
1514 return 0;
1515 }
1516
1517 static int mark_blob_uninteresting(git_packbuilder *pb, const git_oid *id)
1518 {
1519 int error;
1520 git_walk_object *obj;
1521
1522 if ((error = retrieve_object(&obj, pb, id)) < 0)
1523 return error;
1524
1525 obj->uninteresting = 1;
1526
1527 return 0;
1528 }
1529
1530 static int mark_tree_uninteresting(git_packbuilder *pb, const git_oid *id)
1531 {
1532 git_walk_object *obj;
1533 git_tree *tree;
1534 int error;
1535 size_t i;
1536
1537 if ((error = retrieve_object(&obj, pb, id)) < 0)
1538 return error;
1539
1540 if (obj->uninteresting)
1541 return 0;
1542
1543 obj->uninteresting = 1;
1544
1545 if ((error = git_tree_lookup(&tree, pb->repo, id)) < 0)
1546 return error;
1547
1548 for (i = 0; i < git_tree_entrycount(tree); i++) {
1549 const git_tree_entry *entry = git_tree_entry_byindex(tree, i);
1550 const git_oid *entry_id = git_tree_entry_id(entry);
1551 switch (git_tree_entry_type(entry)) {
1552 case GIT_OBJ_TREE:
1553 if ((error = mark_tree_uninteresting(pb, entry_id)) < 0)
1554 goto cleanup;
1555 break;
1556 case GIT_OBJ_BLOB:
1557 if ((error = mark_blob_uninteresting(pb, entry_id)) < 0)
1558 goto cleanup;
1559 break;
1560 default:
1561 /* it's a submodule or something unknown, we don't want it */
1562 ;
1563 }
1564 }
1565
1566 cleanup:
1567 git_tree_free(tree);
1568 return error;
1569 }
1570
1571 /*
1572 * Mark the edges of the graph uninteresting. Since we start from a
1573 * git_revwalk, the commits are already uninteresting, but we need to
1574 * mark the trees and blobs.
1575 */
1576 static int mark_edges_uninteresting(git_packbuilder *pb, git_commit_list *commits)
1577 {
1578 int error;
1579 git_commit_list *list;
1580 git_commit *commit;
1581
1582 for (list = commits; list; list = list->next) {
1583 if (!list->item->uninteresting)
1584 continue;
1585
1586 if ((error = git_commit_lookup(&commit, pb->repo, &list->item->oid)) < 0)
1587 return error;
1588
1589 error = mark_tree_uninteresting(pb, git_commit_tree_id(commit));
1590 git_commit_free(commit);
1591
1592 if (error < 0)
1593 return error;
1594 }
1595
1596 return 0;
1597 }
1598
1599 int insert_tree(git_packbuilder *pb, git_tree *tree)
1600 {
1601 size_t i;
1602 int error;
1603 git_tree *subtree;
1604 git_walk_object *obj;
1605 const char *name;
1606
1607 if ((error = retrieve_object(&obj, pb, git_tree_id(tree))) < 0)
1608 return error;
1609
1610 if (obj->seen)
1611 return 0;
1612
1613 obj->seen = 1;
1614
1615 if ((error = git_packbuilder_insert(pb, &obj->id, NULL)))
1616 return error;
1617
1618 for (i = 0; i < git_tree_entrycount(tree); i++) {
1619 const git_tree_entry *entry = git_tree_entry_byindex(tree, i);
1620 const git_oid *entry_id = git_tree_entry_id(entry);
1621 switch (git_tree_entry_type(entry)) {
1622 case GIT_OBJ_TREE:
1623 if ((error = git_tree_lookup(&subtree, pb->repo, entry_id)) < 0)
1624 return error;
1625
1626 error = insert_tree(pb, subtree);
1627 git_tree_free(subtree);
1628
1629 if (error < 0)
1630 return error;
1631
1632 break;
1633 case GIT_OBJ_BLOB:
1634 name = git_tree_entry_name(entry);
1635 if ((error = git_packbuilder_insert(pb, entry_id, name)) < 0)
1636 return error;
1637 break;
1638 default:
1639 /* it's a submodule or something unknown, we don't want it */
1640 ;
1641 }
1642 }
1643
1644
1645 return error;
1646 }
1647
1648 int insert_commit(git_packbuilder *pb, git_walk_object *obj)
1649 {
1650 int error;
1651 git_commit *commit = NULL;
1652 git_tree *tree = NULL;
1653
1654 obj->seen = 1;
1655
1656 if ((error = git_packbuilder_insert(pb, &obj->id, NULL)) < 0)
1657 return error;
1658
1659 if ((error = git_commit_lookup(&commit, pb->repo, &obj->id)) < 0)
1660 return error;
1661
1662 if ((error = git_tree_lookup(&tree, pb->repo, git_commit_tree_id(commit))) < 0)
1663 goto cleanup;
1664
1665 if ((error = insert_tree(pb, tree)) < 0)
1666 goto cleanup;
1667
1668 cleanup:
1669 git_commit_free(commit);
1670 git_tree_free(tree);
1671 return error;
1672 }
1673
1674 int git_packbuilder_insert_walk(git_packbuilder *pb, git_revwalk *walk)
1675 {
1676 int error;
1677 git_oid id;
1678 git_walk_object *obj;
1679
1680 assert(pb && walk);
1681
1682 if ((error = mark_edges_uninteresting(pb, walk->user_input)) < 0)
1683 return error;
1684
1685 /*
1686 * TODO: git marks the parents of the edges
1687 * uninteresting. This may provide a speed advantage, but does
1688 * seem to assume the remote does not have a single-commit
1689 * history on the other end.
1690 */
1691
1692 /* walk down each tree up to the blobs and insert them, stopping when uninteresting */
1693 while ((error = git_revwalk_next(&id, walk)) == 0) {
1694 if ((error = retrieve_object(&obj, pb, &id)) < 0)
1695 return error;
1696
1697 if (obj->seen || obj->uninteresting)
1698 continue;
1699
1700 if ((error = insert_commit(pb, obj)) < 0)
1701 return error;
1702 }
1703
1704 if (error == GIT_ITEROVER)
1705 error = 0;
1706
1707 return 0;
1708 }
1709
1710 int git_packbuilder_set_callbacks(git_packbuilder *pb, git_packbuilder_progress progress_cb, void *progress_cb_payload)
1711 {
1712 if (!pb)
1713 return -1;
1714
1715 pb->progress_cb = progress_cb;
1716 pb->progress_cb_payload = progress_cb_payload;
1717
1718 return 0;
1719 }
1720
1721 void git_packbuilder_free(git_packbuilder *pb)
1722 {
1723 if (pb == NULL)
1724 return;
1725
1726 #ifdef GIT_THREADS
1727
1728 git_mutex_free(&pb->cache_mutex);
1729 git_mutex_free(&pb->progress_mutex);
1730 git_cond_free(&pb->progress_cond);
1731
1732 #endif
1733
1734 if (pb->odb)
1735 git_odb_free(pb->odb);
1736
1737 if (pb->object_ix)
1738 git_oidmap_free(pb->object_ix);
1739
1740 if (pb->object_list)
1741 git__free(pb->object_list);
1742
1743 git_oidmap_free(pb->walk_objects);
1744 git_pool_clear(&pb->object_pool);
1745
1746 git_hash_ctx_cleanup(&pb->ctx);
1747 git_zstream_free(&pb->zstream);
1748
1749 git__free(pb);
1750 }