]> git.proxmox.com Git - libgit2.git/blob - src/pack-objects.c
1aa6731b33ea84abacae5fae50e698d6a4251a84
[libgit2.git] / src / pack-objects.c
1 /*
2 * Copyright (C) the libgit2 contributors. All rights reserved.
3 *
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
6 */
7
8 #include "pack-objects.h"
9
10 #include "buf.h"
11 #include "zstream.h"
12 #include "delta.h"
13 #include "iterator.h"
14 #include "netops.h"
15 #include "pack.h"
16 #include "thread.h"
17 #include "tree.h"
18 #include "util.h"
19 #include "revwalk.h"
20 #include "commit_list.h"
21
22 #include "git2/pack.h"
23 #include "git2/commit.h"
24 #include "git2/tag.h"
25 #include "git2/indexer.h"
26 #include "git2/config.h"
27
28 struct unpacked {
29 git_pobject *object;
30 void *data;
31 struct git_delta_index *index;
32 size_t depth;
33 };
34
35 struct tree_walk_context {
36 git_packbuilder *pb;
37 git_str buf;
38 };
39
40 struct pack_write_context {
41 git_indexer *indexer;
42 git_indexer_progress *stats;
43 };
44
45 struct walk_object {
46 git_oid id;
47 unsigned int uninteresting:1,
48 seen:1;
49 };
50
51 #ifdef GIT_THREADS
52 # define GIT_PACKBUILDER__MUTEX_OP(pb, mtx, op) git_mutex_##op(&(pb)->mtx)
53 #else
54 # define GIT_PACKBUILDER__MUTEX_OP(pb, mtx, op) git__noop()
55 #endif
56
57 #define git_packbuilder__cache_lock(pb) GIT_PACKBUILDER__MUTEX_OP(pb, cache_mutex, lock)
58 #define git_packbuilder__cache_unlock(pb) GIT_PACKBUILDER__MUTEX_OP(pb, cache_mutex, unlock)
59 #define git_packbuilder__progress_lock(pb) GIT_PACKBUILDER__MUTEX_OP(pb, progress_mutex, lock)
60 #define git_packbuilder__progress_unlock(pb) GIT_PACKBUILDER__MUTEX_OP(pb, progress_mutex, unlock)
61
62 /* The minimal interval between progress updates (in seconds). */
63 #define MIN_PROGRESS_UPDATE_INTERVAL 0.5
64
65 /* Size of the buffer to feed to zlib */
66 #define COMPRESS_BUFLEN (1024 * 1024)
67
68 static unsigned name_hash(const char *name)
69 {
70 unsigned c, hash = 0;
71
72 if (!name)
73 return 0;
74
75 /*
76 * This effectively just creates a sortable number from the
77 * last sixteen non-whitespace characters. Last characters
78 * count "most", so things that end in ".c" sort together.
79 */
80 while ((c = *name++) != 0) {
81 if (git__isspace(c))
82 continue;
83 hash = (hash >> 2) + (c << 24);
84 }
85 return hash;
86 }
87
88 static int packbuilder_config(git_packbuilder *pb)
89 {
90 git_config *config;
91 int ret = 0;
92 int64_t val;
93
94 if ((ret = git_repository_config_snapshot(&config, pb->repo)) < 0)
95 return ret;
96
97 #define config_get(KEY,DST,DFLT) do { \
98 ret = git_config_get_int64(&val, config, KEY); \
99 if (!ret) { \
100 if (!git__is_sizet(val)) { \
101 git_error_set(GIT_ERROR_CONFIG, \
102 "configuration value '%s' is too large", KEY); \
103 ret = -1; \
104 goto out; \
105 } \
106 (DST) = (size_t)val; \
107 } else if (ret == GIT_ENOTFOUND) { \
108 (DST) = (DFLT); \
109 ret = 0; \
110 } else if (ret < 0) goto out; } while (0)
111
112 config_get("pack.deltaCacheSize", pb->max_delta_cache_size,
113 GIT_PACK_DELTA_CACHE_SIZE);
114 config_get("pack.deltaCacheLimit", pb->cache_max_small_delta_size,
115 GIT_PACK_DELTA_CACHE_LIMIT);
116 config_get("pack.deltaCacheSize", pb->big_file_threshold,
117 GIT_PACK_BIG_FILE_THRESHOLD);
118 config_get("pack.windowMemory", pb->window_memory_limit, 0);
119
120 #undef config_get
121
122 out:
123 git_config_free(config);
124
125 return ret;
126 }
127
128 int git_packbuilder_new(git_packbuilder **out, git_repository *repo)
129 {
130 git_packbuilder *pb;
131
132 *out = NULL;
133
134 pb = git__calloc(1, sizeof(*pb));
135 GIT_ERROR_CHECK_ALLOC(pb);
136
137 if (git_oidmap_new(&pb->object_ix) < 0 ||
138 git_oidmap_new(&pb->walk_objects) < 0 ||
139 git_pool_init(&pb->object_pool, sizeof(struct walk_object)) < 0)
140 goto on_error;
141
142 pb->repo = repo;
143 pb->nr_threads = 1; /* do not spawn any thread by default */
144
145 if (git_hash_ctx_init(&pb->ctx, GIT_HASH_ALGORITHM_SHA1) < 0 ||
146 git_zstream_init(&pb->zstream, GIT_ZSTREAM_DEFLATE) < 0 ||
147 git_repository_odb(&pb->odb, repo) < 0 ||
148 packbuilder_config(pb) < 0)
149 goto on_error;
150
151 #ifdef GIT_THREADS
152
153 if (git_mutex_init(&pb->cache_mutex) ||
154 git_mutex_init(&pb->progress_mutex) ||
155 git_cond_init(&pb->progress_cond))
156 {
157 git_error_set(GIT_ERROR_OS, "failed to initialize packbuilder mutex");
158 goto on_error;
159 }
160
161 #endif
162
163 *out = pb;
164 return 0;
165
166 on_error:
167 git_packbuilder_free(pb);
168 return -1;
169 }
170
171 unsigned int git_packbuilder_set_threads(git_packbuilder *pb, unsigned int n)
172 {
173 GIT_ASSERT_ARG(pb);
174
175 #ifdef GIT_THREADS
176 pb->nr_threads = n;
177 #else
178 GIT_UNUSED(n);
179 GIT_ASSERT(pb->nr_threads == 1);
180 #endif
181
182 return pb->nr_threads;
183 }
184
185 static int rehash(git_packbuilder *pb)
186 {
187 git_pobject *po;
188 size_t i;
189
190 git_oidmap_clear(pb->object_ix);
191
192 for (i = 0, po = pb->object_list; i < pb->nr_objects; i++, po++) {
193 if (git_oidmap_set(pb->object_ix, &po->id, po) < 0)
194 return -1;
195 }
196
197 return 0;
198 }
199
200 int git_packbuilder_insert(git_packbuilder *pb, const git_oid *oid,
201 const char *name)
202 {
203 git_pobject *po;
204 size_t newsize;
205 int ret;
206
207 GIT_ASSERT_ARG(pb);
208 GIT_ASSERT_ARG(oid);
209
210 /* If the object already exists in the hash table, then we don't
211 * have any work to do */
212 if (git_oidmap_exists(pb->object_ix, oid))
213 return 0;
214
215 if (pb->nr_objects >= pb->nr_alloc) {
216 GIT_ERROR_CHECK_ALLOC_ADD(&newsize, pb->nr_alloc, 1024);
217 GIT_ERROR_CHECK_ALLOC_MULTIPLY(&newsize, newsize / 2, 3);
218
219 if (!git__is_uint32(newsize)) {
220 git_error_set(GIT_ERROR_NOMEMORY, "packfile too large to fit in memory.");
221 return -1;
222 }
223
224 pb->nr_alloc = newsize;
225
226 pb->object_list = git__reallocarray(pb->object_list,
227 pb->nr_alloc, sizeof(*po));
228 GIT_ERROR_CHECK_ALLOC(pb->object_list);
229
230 if (rehash(pb) < 0)
231 return -1;
232 }
233
234 po = pb->object_list + pb->nr_objects;
235 memset(po, 0x0, sizeof(*po));
236
237 if ((ret = git_odb_read_header(&po->size, &po->type, pb->odb, oid)) < 0)
238 return ret;
239
240 pb->nr_objects++;
241 git_oid_cpy(&po->id, oid);
242 po->hash = name_hash(name);
243
244 if (git_oidmap_set(pb->object_ix, &po->id, po) < 0) {
245 git_error_set_oom();
246 return -1;
247 }
248
249 pb->done = false;
250
251 if (pb->progress_cb) {
252 double current_time = git__timer();
253 double elapsed = current_time - pb->last_progress_report_time;
254
255 if (elapsed < 0 || elapsed >= MIN_PROGRESS_UPDATE_INTERVAL) {
256 pb->last_progress_report_time = current_time;
257
258 ret = pb->progress_cb(
259 GIT_PACKBUILDER_ADDING_OBJECTS,
260 pb->nr_objects, 0, pb->progress_cb_payload);
261
262 if (ret)
263 return git_error_set_after_callback(ret);
264 }
265 }
266
267 return 0;
268 }
269
270 static int get_delta(void **out, git_odb *odb, git_pobject *po)
271 {
272 git_odb_object *src = NULL, *trg = NULL;
273 size_t delta_size;
274 void *delta_buf;
275 int error;
276
277 *out = NULL;
278
279 if (git_odb_read(&src, odb, &po->delta->id) < 0 ||
280 git_odb_read(&trg, odb, &po->id) < 0)
281 goto on_error;
282
283 error = git_delta(&delta_buf, &delta_size,
284 git_odb_object_data(src), git_odb_object_size(src),
285 git_odb_object_data(trg), git_odb_object_size(trg),
286 0);
287
288 if (error < 0 && error != GIT_EBUFS)
289 goto on_error;
290
291 if (error == GIT_EBUFS || delta_size != po->delta_size) {
292 git_error_set(GIT_ERROR_INVALID, "delta size changed");
293 goto on_error;
294 }
295
296 *out = delta_buf;
297
298 git_odb_object_free(src);
299 git_odb_object_free(trg);
300 return 0;
301
302 on_error:
303 git_odb_object_free(src);
304 git_odb_object_free(trg);
305 return -1;
306 }
307
308 static int write_object(
309 git_packbuilder *pb,
310 git_pobject *po,
311 int (*write_cb)(void *buf, size_t size, void *cb_data),
312 void *cb_data)
313 {
314 git_odb_object *obj = NULL;
315 git_object_t type;
316 unsigned char hdr[10], *zbuf = NULL;
317 void *data = NULL;
318 size_t hdr_len, zbuf_len = COMPRESS_BUFLEN, data_len;
319 int error;
320
321 /*
322 * If we have a delta base, let's use the delta to save space.
323 * Otherwise load the whole object. 'data' ends up pointing to
324 * whatever data we want to put into the packfile.
325 */
326 if (po->delta) {
327 if (po->delta_data)
328 data = po->delta_data;
329 else if ((error = get_delta(&data, pb->odb, po)) < 0)
330 goto done;
331
332 data_len = po->delta_size;
333 type = GIT_OBJECT_REF_DELTA;
334 } else {
335 if ((error = git_odb_read(&obj, pb->odb, &po->id)) < 0)
336 goto done;
337
338 data = (void *)git_odb_object_data(obj);
339 data_len = git_odb_object_size(obj);
340 type = git_odb_object_type(obj);
341 }
342
343 /* Write header */
344 if ((error = git_packfile__object_header(&hdr_len, hdr, data_len, type)) < 0 ||
345 (error = write_cb(hdr, hdr_len, cb_data)) < 0 ||
346 (error = git_hash_update(&pb->ctx, hdr, hdr_len)) < 0)
347 goto done;
348
349 if (type == GIT_OBJECT_REF_DELTA) {
350 if ((error = write_cb(po->delta->id.id, GIT_OID_RAWSZ, cb_data)) < 0 ||
351 (error = git_hash_update(&pb->ctx, po->delta->id.id, GIT_OID_RAWSZ)) < 0)
352 goto done;
353 }
354
355 /* Write data */
356 if (po->z_delta_size) {
357 data_len = po->z_delta_size;
358
359 if ((error = write_cb(data, data_len, cb_data)) < 0 ||
360 (error = git_hash_update(&pb->ctx, data, data_len)) < 0)
361 goto done;
362 } else {
363 zbuf = git__malloc(zbuf_len);
364 GIT_ERROR_CHECK_ALLOC(zbuf);
365
366 git_zstream_reset(&pb->zstream);
367
368 if ((error = git_zstream_set_input(&pb->zstream, data, data_len)) < 0)
369 goto done;
370
371 while (!git_zstream_done(&pb->zstream)) {
372 if ((error = git_zstream_get_output(zbuf, &zbuf_len, &pb->zstream)) < 0 ||
373 (error = write_cb(zbuf, zbuf_len, cb_data)) < 0 ||
374 (error = git_hash_update(&pb->ctx, zbuf, zbuf_len)) < 0)
375 goto done;
376
377 zbuf_len = COMPRESS_BUFLEN; /* reuse buffer */
378 }
379 }
380
381 /*
382 * If po->delta is true, data is a delta and it is our
383 * responsibility to free it (otherwise it's a git_object's
384 * data). We set po->delta_data to NULL in case we got the
385 * data from there instead of get_delta(). If we didn't,
386 * there's no harm.
387 */
388 if (po->delta) {
389 git__free(data);
390 po->delta_data = NULL;
391 }
392
393 pb->nr_written++;
394
395 done:
396 git__free(zbuf);
397 git_odb_object_free(obj);
398 return error;
399 }
400
401 enum write_one_status {
402 WRITE_ONE_SKIP = -1, /* already written */
403 WRITE_ONE_BREAK = 0, /* writing this will bust the limit; not written */
404 WRITE_ONE_WRITTEN = 1, /* normal */
405 WRITE_ONE_RECURSIVE = 2 /* already scheduled to be written */
406 };
407
408 static int write_one(
409 enum write_one_status *status,
410 git_packbuilder *pb,
411 git_pobject *po,
412 int (*write_cb)(void *buf, size_t size, void *cb_data),
413 void *cb_data)
414 {
415 int error;
416
417 if (po->recursing) {
418 *status = WRITE_ONE_RECURSIVE;
419 return 0;
420 } else if (po->written) {
421 *status = WRITE_ONE_SKIP;
422 return 0;
423 }
424
425 if (po->delta) {
426 po->recursing = 1;
427
428 if ((error = write_one(status, pb, po->delta, write_cb, cb_data)) < 0)
429 return error;
430
431 /* we cannot depend on this one */
432 if (*status == WRITE_ONE_RECURSIVE)
433 po->delta = NULL;
434 }
435
436 *status = WRITE_ONE_WRITTEN;
437 po->written = 1;
438 po->recursing = 0;
439
440 return write_object(pb, po, write_cb, cb_data);
441 }
442
443 GIT_INLINE(void) add_to_write_order(git_pobject **wo, size_t *endp,
444 git_pobject *po)
445 {
446 if (po->filled)
447 return;
448 wo[(*endp)++] = po;
449 po->filled = 1;
450 }
451
452 static void add_descendants_to_write_order(git_pobject **wo, size_t *endp,
453 git_pobject *po)
454 {
455 int add_to_order = 1;
456 while (po) {
457 if (add_to_order) {
458 git_pobject *s;
459 /* add this node... */
460 add_to_write_order(wo, endp, po);
461 /* all its siblings... */
462 for (s = po->delta_sibling; s; s = s->delta_sibling) {
463 add_to_write_order(wo, endp, s);
464 }
465 }
466 /* drop down a level to add left subtree nodes if possible */
467 if (po->delta_child) {
468 add_to_order = 1;
469 po = po->delta_child;
470 } else {
471 add_to_order = 0;
472 /* our sibling might have some children, it is next */
473 if (po->delta_sibling) {
474 po = po->delta_sibling;
475 continue;
476 }
477 /* go back to our parent node */
478 po = po->delta;
479 while (po && !po->delta_sibling) {
480 /* we're on the right side of a subtree, keep
481 * going up until we can go right again */
482 po = po->delta;
483 }
484 if (!po) {
485 /* done- we hit our original root node */
486 return;
487 }
488 /* pass it off to sibling at this level */
489 po = po->delta_sibling;
490 }
491 };
492 }
493
494 static void add_family_to_write_order(git_pobject **wo, size_t *endp,
495 git_pobject *po)
496 {
497 git_pobject *root;
498
499 for (root = po; root->delta; root = root->delta)
500 ; /* nothing */
501 add_descendants_to_write_order(wo, endp, root);
502 }
503
504 static int cb_tag_foreach(const char *name, git_oid *oid, void *data)
505 {
506 git_packbuilder *pb = data;
507 git_pobject *po;
508
509 GIT_UNUSED(name);
510
511 if ((po = git_oidmap_get(pb->object_ix, oid)) == NULL)
512 return 0;
513
514 po->tagged = 1;
515
516 /* TODO: peel objects */
517
518 return 0;
519 }
520
521 static int compute_write_order(git_pobject ***out, git_packbuilder *pb)
522 {
523 size_t i, wo_end, last_untagged;
524 git_pobject **wo;
525
526 *out = NULL;
527
528 if (!pb->nr_objects)
529 return 0;
530
531 if ((wo = git__mallocarray(pb->nr_objects, sizeof(*wo))) == NULL)
532 return -1;
533
534 for (i = 0; i < pb->nr_objects; i++) {
535 git_pobject *po = pb->object_list + i;
536 po->tagged = 0;
537 po->filled = 0;
538 po->delta_child = NULL;
539 po->delta_sibling = NULL;
540 }
541
542 /*
543 * Fully connect delta_child/delta_sibling network.
544 * Make sure delta_sibling is sorted in the original
545 * recency order.
546 */
547 for (i = pb->nr_objects; i > 0;) {
548 git_pobject *po = &pb->object_list[--i];
549 if (!po->delta)
550 continue;
551 /* Mark me as the first child */
552 po->delta_sibling = po->delta->delta_child;
553 po->delta->delta_child = po;
554 }
555
556 /*
557 * Mark objects that are at the tip of tags.
558 */
559 if (git_tag_foreach(pb->repo, &cb_tag_foreach, pb) < 0) {
560 git__free(wo);
561 return -1;
562 }
563
564 /*
565 * Give the objects in the original recency order until
566 * we see a tagged tip.
567 */
568 for (i = wo_end = 0; i < pb->nr_objects; i++) {
569 git_pobject *po = pb->object_list + i;
570 if (po->tagged)
571 break;
572 add_to_write_order(wo, &wo_end, po);
573 }
574 last_untagged = i;
575
576 /*
577 * Then fill all the tagged tips.
578 */
579 for (; i < pb->nr_objects; i++) {
580 git_pobject *po = pb->object_list + i;
581 if (po->tagged)
582 add_to_write_order(wo, &wo_end, po);
583 }
584
585 /*
586 * And then all remaining commits and tags.
587 */
588 for (i = last_untagged; i < pb->nr_objects; i++) {
589 git_pobject *po = pb->object_list + i;
590 if (po->type != GIT_OBJECT_COMMIT &&
591 po->type != GIT_OBJECT_TAG)
592 continue;
593 add_to_write_order(wo, &wo_end, po);
594 }
595
596 /*
597 * And then all the trees.
598 */
599 for (i = last_untagged; i < pb->nr_objects; i++) {
600 git_pobject *po = pb->object_list + i;
601 if (po->type != GIT_OBJECT_TREE)
602 continue;
603 add_to_write_order(wo, &wo_end, po);
604 }
605
606 /*
607 * Finally all the rest in really tight order
608 */
609 for (i = last_untagged; i < pb->nr_objects; i++) {
610 git_pobject *po = pb->object_list + i;
611 if (!po->filled)
612 add_family_to_write_order(wo, &wo_end, po);
613 }
614
615 if (wo_end != pb->nr_objects) {
616 git__free(wo);
617 git_error_set(GIT_ERROR_INVALID, "invalid write order");
618 return -1;
619 }
620
621 *out = wo;
622 return 0;
623 }
624
625 static int write_pack(git_packbuilder *pb,
626 int (*write_cb)(void *buf, size_t size, void *cb_data),
627 void *cb_data)
628 {
629 git_pobject **write_order;
630 git_pobject *po;
631 enum write_one_status status;
632 struct git_pack_header ph;
633 git_oid entry_oid;
634 size_t i = 0;
635 int error;
636
637 if ((error = compute_write_order(&write_order, pb)) < 0)
638 return error;
639
640 if (!git__is_uint32(pb->nr_objects)) {
641 git_error_set(GIT_ERROR_INVALID, "too many objects");
642 error = -1;
643 goto done;
644 }
645
646 /* Write pack header */
647 ph.hdr_signature = htonl(PACK_SIGNATURE);
648 ph.hdr_version = htonl(PACK_VERSION);
649 ph.hdr_entries = htonl(pb->nr_objects);
650
651 if ((error = write_cb(&ph, sizeof(ph), cb_data)) < 0 ||
652 (error = git_hash_update(&pb->ctx, &ph, sizeof(ph))) < 0)
653 goto done;
654
655 pb->nr_remaining = pb->nr_objects;
656 do {
657 pb->nr_written = 0;
658 for ( ; i < pb->nr_objects; ++i) {
659 po = write_order[i];
660
661 if ((error = write_one(&status, pb, po, write_cb, cb_data)) < 0)
662 goto done;
663 }
664
665 pb->nr_remaining -= pb->nr_written;
666 } while (pb->nr_remaining && i < pb->nr_objects);
667
668 if ((error = git_hash_final(entry_oid.id, &pb->ctx)) < 0)
669 goto done;
670
671 error = write_cb(entry_oid.id, GIT_OID_RAWSZ, cb_data);
672
673 done:
674 /* if callback cancelled writing, we must still free delta_data */
675 for ( ; i < pb->nr_objects; ++i) {
676 po = write_order[i];
677 if (po->delta_data) {
678 git__free(po->delta_data);
679 po->delta_data = NULL;
680 }
681 }
682
683 git__free(write_order);
684 return error;
685 }
686
687 static int write_pack_buf(void *buf, size_t size, void *data)
688 {
689 git_str *b = (git_str *)data;
690 return git_str_put(b, buf, size);
691 }
692
693 static int type_size_sort(const void *_a, const void *_b)
694 {
695 const git_pobject *a = (git_pobject *)_a;
696 const git_pobject *b = (git_pobject *)_b;
697
698 if (a->type > b->type)
699 return -1;
700 if (a->type < b->type)
701 return 1;
702 if (a->hash > b->hash)
703 return -1;
704 if (a->hash < b->hash)
705 return 1;
706 /*
707 * TODO
708 *
709 if (a->preferred_base > b->preferred_base)
710 return -1;
711 if (a->preferred_base < b->preferred_base)
712 return 1;
713 */
714 if (a->size > b->size)
715 return -1;
716 if (a->size < b->size)
717 return 1;
718 return a < b ? -1 : (a > b); /* newest first */
719 }
720
721 static int delta_cacheable(
722 git_packbuilder *pb,
723 size_t src_size,
724 size_t trg_size,
725 size_t delta_size)
726 {
727 size_t new_size;
728
729 if (git__add_sizet_overflow(&new_size, pb->delta_cache_size, delta_size))
730 return 0;
731
732 if (pb->max_delta_cache_size && new_size > pb->max_delta_cache_size)
733 return 0;
734
735 if (delta_size < pb->cache_max_small_delta_size)
736 return 1;
737
738 /* cache delta, if objects are large enough compared to delta size */
739 if ((src_size >> 20) + (trg_size >> 21) > (delta_size >> 10))
740 return 1;
741
742 return 0;
743 }
744
745 static int try_delta(git_packbuilder *pb, struct unpacked *trg,
746 struct unpacked *src, size_t max_depth,
747 size_t *mem_usage, int *ret)
748 {
749 git_pobject *trg_object = trg->object;
750 git_pobject *src_object = src->object;
751 git_odb_object *obj;
752 size_t trg_size, src_size, delta_size, sizediff, max_size, sz;
753 size_t ref_depth;
754 void *delta_buf;
755
756 /* Don't bother doing diffs between different types */
757 if (trg_object->type != src_object->type) {
758 *ret = -1;
759 return 0;
760 }
761
762 *ret = 0;
763
764 /* TODO: support reuse-delta */
765
766 /* Let's not bust the allowed depth. */
767 if (src->depth >= max_depth)
768 return 0;
769
770 /* Now some size filtering heuristics. */
771 trg_size = trg_object->size;
772 if (!trg_object->delta) {
773 max_size = trg_size/2 - 20;
774 ref_depth = 1;
775 } else {
776 max_size = trg_object->delta_size;
777 ref_depth = trg->depth;
778 }
779
780 max_size = (uint64_t)max_size * (max_depth - src->depth) /
781 (max_depth - ref_depth + 1);
782 if (max_size == 0)
783 return 0;
784
785 src_size = src_object->size;
786 sizediff = src_size < trg_size ? trg_size - src_size : 0;
787 if (sizediff >= max_size)
788 return 0;
789 if (trg_size < src_size / 32)
790 return 0;
791
792 /* Load data if not already done */
793 if (!trg->data) {
794 if (git_odb_read(&obj, pb->odb, &trg_object->id) < 0)
795 return -1;
796
797 sz = git_odb_object_size(obj);
798 trg->data = git__malloc(sz);
799 GIT_ERROR_CHECK_ALLOC(trg->data);
800 memcpy(trg->data, git_odb_object_data(obj), sz);
801
802 git_odb_object_free(obj);
803
804 if (sz != trg_size) {
805 git_error_set(GIT_ERROR_INVALID,
806 "inconsistent target object length");
807 return -1;
808 }
809
810 *mem_usage += sz;
811 }
812 if (!src->data) {
813 size_t obj_sz;
814
815 if (git_odb_read(&obj, pb->odb, &src_object->id) < 0 ||
816 !git__is_ulong(obj_sz = git_odb_object_size(obj)))
817 return -1;
818
819 sz = obj_sz;
820 src->data = git__malloc(sz);
821 GIT_ERROR_CHECK_ALLOC(src->data);
822 memcpy(src->data, git_odb_object_data(obj), sz);
823
824 git_odb_object_free(obj);
825
826 if (sz != src_size) {
827 git_error_set(GIT_ERROR_INVALID,
828 "inconsistent source object length");
829 return -1;
830 }
831
832 *mem_usage += sz;
833 }
834 if (!src->index) {
835 if (git_delta_index_init(&src->index, src->data, src_size) < 0)
836 return 0; /* suboptimal pack - out of memory */
837
838 *mem_usage += git_delta_index_size(src->index);
839 }
840
841 if (git_delta_create_from_index(&delta_buf, &delta_size, src->index, trg->data, trg_size,
842 max_size) < 0)
843 return 0;
844
845 if (trg_object->delta) {
846 /* Prefer only shallower same-sized deltas. */
847 if (delta_size == trg_object->delta_size &&
848 src->depth + 1 >= trg->depth) {
849 git__free(delta_buf);
850 return 0;
851 }
852 }
853
854 GIT_ASSERT(git_packbuilder__cache_lock(pb) == 0);
855
856 if (trg_object->delta_data) {
857 git__free(trg_object->delta_data);
858 GIT_ASSERT(pb->delta_cache_size >= trg_object->delta_size);
859 pb->delta_cache_size -= trg_object->delta_size;
860 trg_object->delta_data = NULL;
861 }
862 if (delta_cacheable(pb, src_size, trg_size, delta_size)) {
863 bool overflow = git__add_sizet_overflow(
864 &pb->delta_cache_size, pb->delta_cache_size, delta_size);
865
866 GIT_ASSERT(git_packbuilder__cache_unlock(pb) == 0);
867
868 if (overflow) {
869 git__free(delta_buf);
870 return -1;
871 }
872
873 trg_object->delta_data = git__realloc(delta_buf, delta_size);
874 GIT_ERROR_CHECK_ALLOC(trg_object->delta_data);
875 } else {
876 /* create delta when writing the pack */
877 GIT_ASSERT(git_packbuilder__cache_unlock(pb) == 0);
878 git__free(delta_buf);
879 }
880
881 trg_object->delta = src_object;
882 trg_object->delta_size = delta_size;
883 trg->depth = src->depth + 1;
884
885 *ret = 1;
886 return 0;
887 }
888
889 static size_t check_delta_limit(git_pobject *me, size_t n)
890 {
891 git_pobject *child = me->delta_child;
892 size_t m = n;
893
894 while (child) {
895 size_t c = check_delta_limit(child, n + 1);
896 if (m < c)
897 m = c;
898 child = child->delta_sibling;
899 }
900 return m;
901 }
902
903 static size_t free_unpacked(struct unpacked *n)
904 {
905 size_t freed_mem = 0;
906
907 if (n->index) {
908 freed_mem += git_delta_index_size(n->index);
909 git_delta_index_free(n->index);
910 }
911 n->index = NULL;
912
913 if (n->data) {
914 freed_mem += n->object->size;
915 git__free(n->data);
916 n->data = NULL;
917 }
918 n->object = NULL;
919 n->depth = 0;
920 return freed_mem;
921 }
922
923 static int report_delta_progress(
924 git_packbuilder *pb, uint32_t count, bool force)
925 {
926 int ret;
927
928 if (pb->progress_cb) {
929 double current_time = git__timer();
930 double elapsed = current_time - pb->last_progress_report_time;
931
932 if (force || elapsed < 0 || elapsed >= MIN_PROGRESS_UPDATE_INTERVAL) {
933 pb->last_progress_report_time = current_time;
934
935 ret = pb->progress_cb(
936 GIT_PACKBUILDER_DELTAFICATION,
937 count, pb->nr_objects, pb->progress_cb_payload);
938
939 if (ret)
940 return git_error_set_after_callback(ret);
941 }
942 }
943
944 return 0;
945 }
946
947 static int find_deltas(git_packbuilder *pb, git_pobject **list,
948 size_t *list_size, size_t window, size_t depth)
949 {
950 git_pobject *po;
951 git_str zbuf = GIT_STR_INIT;
952 struct unpacked *array;
953 size_t idx = 0, count = 0;
954 size_t mem_usage = 0;
955 size_t i;
956 int error = -1;
957
958 array = git__calloc(window, sizeof(struct unpacked));
959 GIT_ERROR_CHECK_ALLOC(array);
960
961 for (;;) {
962 struct unpacked *n = array + idx;
963 size_t max_depth, j, best_base = SIZE_MAX;
964
965 GIT_ASSERT(git_packbuilder__progress_lock(pb) == 0);
966 if (!*list_size) {
967 GIT_ASSERT(git_packbuilder__progress_unlock(pb) == 0);
968 break;
969 }
970
971 pb->nr_deltified += 1;
972 report_delta_progress(pb, pb->nr_deltified, false);
973
974 po = *list++;
975 (*list_size)--;
976 GIT_ASSERT(git_packbuilder__progress_unlock(pb) == 0);
977
978 mem_usage -= free_unpacked(n);
979 n->object = po;
980
981 while (pb->window_memory_limit &&
982 mem_usage > pb->window_memory_limit &&
983 count > 1) {
984 size_t tail = (idx + window - count) % window;
985 mem_usage -= free_unpacked(array + tail);
986 count--;
987 }
988
989 /*
990 * If the current object is at pack edge, take the depth the
991 * objects that depend on the current object into account
992 * otherwise they would become too deep.
993 */
994 max_depth = depth;
995 if (po->delta_child) {
996 size_t delta_limit = check_delta_limit(po, 0);
997
998 if (delta_limit > max_depth)
999 goto next;
1000
1001 max_depth -= delta_limit;
1002 }
1003
1004 j = window;
1005 while (--j > 0) {
1006 int ret;
1007 size_t other_idx = idx + j;
1008 struct unpacked *m;
1009
1010 if (other_idx >= window)
1011 other_idx -= window;
1012
1013 m = array + other_idx;
1014 if (!m->object)
1015 break;
1016
1017 if (try_delta(pb, n, m, max_depth, &mem_usage, &ret) < 0)
1018 goto on_error;
1019 if (ret < 0)
1020 break;
1021 else if (ret > 0)
1022 best_base = other_idx;
1023 }
1024
1025 /*
1026 * If we decided to cache the delta data, then it is best
1027 * to compress it right away. First because we have to do
1028 * it anyway, and doing it here while we're threaded will
1029 * save a lot of time in the non threaded write phase,
1030 * as well as allow for caching more deltas within
1031 * the same cache size limit.
1032 * ...
1033 * But only if not writing to stdout, since in that case
1034 * the network is most likely throttling writes anyway,
1035 * and therefore it is best to go to the write phase ASAP
1036 * instead, as we can afford spending more time compressing
1037 * between writes at that moment.
1038 */
1039 if (po->delta_data) {
1040 if (git_zstream_deflatebuf(&zbuf, po->delta_data, po->delta_size) < 0)
1041 goto on_error;
1042
1043 git__free(po->delta_data);
1044 po->delta_data = git__malloc(zbuf.size);
1045 GIT_ERROR_CHECK_ALLOC(po->delta_data);
1046
1047 memcpy(po->delta_data, zbuf.ptr, zbuf.size);
1048 po->z_delta_size = zbuf.size;
1049 git_str_clear(&zbuf);
1050
1051 GIT_ASSERT(git_packbuilder__cache_lock(pb) == 0);
1052 pb->delta_cache_size -= po->delta_size;
1053 pb->delta_cache_size += po->z_delta_size;
1054 GIT_ASSERT(git_packbuilder__cache_unlock(pb) == 0);
1055 }
1056
1057 /*
1058 * If we made n a delta, and if n is already at max
1059 * depth, leaving it in the window is pointless. we
1060 * should evict it first.
1061 */
1062 if (po->delta && max_depth <= n->depth)
1063 continue;
1064
1065 /*
1066 * Move the best delta base up in the window, after the
1067 * currently deltified object, to keep it longer. It will
1068 * be the first base object to be attempted next.
1069 */
1070 if (po->delta) {
1071 struct unpacked swap = array[best_base];
1072 size_t dist = (window + idx - best_base) % window;
1073 size_t dst = best_base;
1074 while (dist--) {
1075 size_t src = (dst + 1) % window;
1076 array[dst] = array[src];
1077 dst = src;
1078 }
1079 array[dst] = swap;
1080 }
1081
1082 next:
1083 idx++;
1084 if (count + 1 < window)
1085 count++;
1086 if (idx >= window)
1087 idx = 0;
1088 }
1089 error = 0;
1090
1091 on_error:
1092 for (i = 0; i < window; ++i) {
1093 git__free(array[i].index);
1094 git__free(array[i].data);
1095 }
1096 git__free(array);
1097 git_str_dispose(&zbuf);
1098
1099 return error;
1100 }
1101
1102 #ifdef GIT_THREADS
1103
1104 struct thread_params {
1105 git_thread thread;
1106 git_packbuilder *pb;
1107
1108 git_pobject **list;
1109
1110 git_cond cond;
1111 git_mutex mutex;
1112
1113 size_t list_size;
1114 size_t remaining;
1115
1116 size_t window;
1117 size_t depth;
1118 size_t working;
1119 size_t data_ready;
1120 };
1121
1122 static void *threaded_find_deltas(void *arg)
1123 {
1124 struct thread_params *me = arg;
1125
1126 while (me->remaining) {
1127 if (find_deltas(me->pb, me->list, &me->remaining,
1128 me->window, me->depth) < 0) {
1129 ; /* TODO */
1130 }
1131
1132 GIT_ASSERT_WITH_RETVAL(git_packbuilder__progress_lock(me->pb) == 0, NULL);
1133 me->working = 0;
1134 git_cond_signal(&me->pb->progress_cond);
1135 GIT_ASSERT_WITH_RETVAL(git_packbuilder__progress_unlock(me->pb) == 0, NULL);
1136
1137 if (git_mutex_lock(&me->mutex)) {
1138 git_error_set(GIT_ERROR_THREAD, "unable to lock packfile condition mutex");
1139 return NULL;
1140 }
1141
1142 while (!me->data_ready)
1143 git_cond_wait(&me->cond, &me->mutex);
1144
1145 /*
1146 * We must not set ->data_ready before we wait on the
1147 * condition because the main thread may have set it to 1
1148 * before we get here. In order to be sure that new
1149 * work is available if we see 1 in ->data_ready, it
1150 * was initialized to 0 before this thread was spawned
1151 * and we reset it to 0 right away.
1152 */
1153 me->data_ready = 0;
1154 git_mutex_unlock(&me->mutex);
1155 }
1156 /* leave ->working 1 so that this doesn't get more work assigned */
1157 return NULL;
1158 }
1159
1160 static int ll_find_deltas(git_packbuilder *pb, git_pobject **list,
1161 size_t list_size, size_t window, size_t depth)
1162 {
1163 struct thread_params *p;
1164 size_t i;
1165 int ret, active_threads = 0;
1166
1167 if (!pb->nr_threads)
1168 pb->nr_threads = git__online_cpus();
1169
1170 if (pb->nr_threads <= 1) {
1171 find_deltas(pb, list, &list_size, window, depth);
1172 return 0;
1173 }
1174
1175 p = git__mallocarray(pb->nr_threads, sizeof(*p));
1176 GIT_ERROR_CHECK_ALLOC(p);
1177
1178 /* Partition the work among the threads */
1179 for (i = 0; i < pb->nr_threads; ++i) {
1180 size_t sub_size = list_size / (pb->nr_threads - i);
1181
1182 /* don't use too small segments or no deltas will be found */
1183 if (sub_size < 2*window && i+1 < pb->nr_threads)
1184 sub_size = 0;
1185
1186 p[i].pb = pb;
1187 p[i].window = window;
1188 p[i].depth = depth;
1189 p[i].working = 1;
1190 p[i].data_ready = 0;
1191
1192 /* try to split chunks on "path" boundaries */
1193 while (sub_size && sub_size < list_size &&
1194 list[sub_size]->hash &&
1195 list[sub_size]->hash == list[sub_size-1]->hash)
1196 sub_size++;
1197
1198 p[i].list = list;
1199 p[i].list_size = sub_size;
1200 p[i].remaining = sub_size;
1201
1202 list += sub_size;
1203 list_size -= sub_size;
1204 }
1205
1206 /* Start work threads */
1207 for (i = 0; i < pb->nr_threads; ++i) {
1208 if (!p[i].list_size)
1209 continue;
1210
1211 git_mutex_init(&p[i].mutex);
1212 git_cond_init(&p[i].cond);
1213
1214 ret = git_thread_create(&p[i].thread,
1215 threaded_find_deltas, &p[i]);
1216 if (ret) {
1217 git_error_set(GIT_ERROR_THREAD, "unable to create thread");
1218 return -1;
1219 }
1220 active_threads++;
1221 }
1222
1223 /*
1224 * Now let's wait for work completion. Each time a thread is done
1225 * with its work, we steal half of the remaining work from the
1226 * thread with the largest number of unprocessed objects and give
1227 * it to that newly idle thread. This ensure good load balancing
1228 * until the remaining object list segments are simply too short
1229 * to be worth splitting anymore.
1230 */
1231 while (active_threads) {
1232 struct thread_params *target = NULL;
1233 struct thread_params *victim = NULL;
1234 size_t sub_size = 0;
1235
1236 /* Start by locating a thread that has transitioned its
1237 * 'working' flag from 1 -> 0. This indicates that it is
1238 * ready to receive more work using our work-stealing
1239 * algorithm. */
1240 GIT_ASSERT(git_packbuilder__progress_lock(pb) == 0);
1241 for (;;) {
1242 for (i = 0; !target && i < pb->nr_threads; i++)
1243 if (!p[i].working)
1244 target = &p[i];
1245 if (target)
1246 break;
1247 git_cond_wait(&pb->progress_cond, &pb->progress_mutex);
1248 }
1249
1250 /* At this point we hold the progress lock and have located
1251 * a thread to receive more work. We still need to locate a
1252 * thread from which to steal work (the victim). */
1253 for (i = 0; i < pb->nr_threads; i++)
1254 if (p[i].remaining > 2*window &&
1255 (!victim || victim->remaining < p[i].remaining))
1256 victim = &p[i];
1257
1258 if (victim) {
1259 sub_size = victim->remaining / 2;
1260 list = victim->list + victim->list_size - sub_size;
1261 while (sub_size && list[0]->hash &&
1262 list[0]->hash == list[-1]->hash) {
1263 list++;
1264 sub_size--;
1265 }
1266 if (!sub_size) {
1267 /*
1268 * It is possible for some "paths" to have
1269 * so many objects that no hash boundary
1270 * might be found. Let's just steal the
1271 * exact half in that case.
1272 */
1273 sub_size = victim->remaining / 2;
1274 list -= sub_size;
1275 }
1276 target->list = list;
1277 victim->list_size -= sub_size;
1278 victim->remaining -= sub_size;
1279 }
1280 target->list_size = sub_size;
1281 target->remaining = sub_size;
1282 target->working = 1;
1283 GIT_ASSERT(git_packbuilder__progress_unlock(pb) == 0);
1284
1285 if (git_mutex_lock(&target->mutex)) {
1286 git_error_set(GIT_ERROR_THREAD, "unable to lock packfile condition mutex");
1287 git__free(p);
1288 return -1;
1289 }
1290
1291 target->data_ready = 1;
1292 git_cond_signal(&target->cond);
1293 git_mutex_unlock(&target->mutex);
1294
1295 if (!sub_size) {
1296 git_thread_join(&target->thread, NULL);
1297 git_cond_free(&target->cond);
1298 git_mutex_free(&target->mutex);
1299 active_threads--;
1300 }
1301 }
1302
1303 git__free(p);
1304 return 0;
1305 }
1306
1307 #else
1308 #define ll_find_deltas(pb, l, ls, w, d) find_deltas(pb, l, &ls, w, d)
1309 #endif
1310
1311 int git_packbuilder__prepare(git_packbuilder *pb)
1312 {
1313 git_pobject **delta_list;
1314 size_t i, n = 0;
1315
1316 if (pb->nr_objects == 0 || pb->done)
1317 return 0; /* nothing to do */
1318
1319 /*
1320 * Although we do not report progress during deltafication, we
1321 * at least report that we are in the deltafication stage
1322 */
1323 if (pb->progress_cb)
1324 pb->progress_cb(GIT_PACKBUILDER_DELTAFICATION, 0, pb->nr_objects, pb->progress_cb_payload);
1325
1326 delta_list = git__mallocarray(pb->nr_objects, sizeof(*delta_list));
1327 GIT_ERROR_CHECK_ALLOC(delta_list);
1328
1329 for (i = 0; i < pb->nr_objects; ++i) {
1330 git_pobject *po = pb->object_list + i;
1331
1332 /* Make sure the item is within our size limits */
1333 if (po->size < 50 || po->size > pb->big_file_threshold)
1334 continue;
1335
1336 delta_list[n++] = po;
1337 }
1338
1339 if (n > 1) {
1340 git__tsort((void **)delta_list, n, type_size_sort);
1341 if (ll_find_deltas(pb, delta_list, n,
1342 GIT_PACK_WINDOW + 1,
1343 GIT_PACK_DEPTH) < 0) {
1344 git__free(delta_list);
1345 return -1;
1346 }
1347 }
1348
1349 report_delta_progress(pb, pb->nr_objects, true);
1350
1351 pb->done = true;
1352 git__free(delta_list);
1353 return 0;
1354 }
1355
1356 #define PREPARE_PACK if (git_packbuilder__prepare(pb) < 0) { return -1; }
1357
1358 int git_packbuilder_foreach(git_packbuilder *pb, int (*cb)(void *buf, size_t size, void *payload), void *payload)
1359 {
1360 PREPARE_PACK;
1361 return write_pack(pb, cb, payload);
1362 }
1363
1364 int git_packbuilder__write_buf(git_str *buf, git_packbuilder *pb)
1365 {
1366 PREPARE_PACK;
1367
1368 return write_pack(pb, &write_pack_buf, buf);
1369 }
1370
1371 int git_packbuilder_write_buf(git_buf *buf, git_packbuilder *pb)
1372 {
1373 GIT_BUF_WRAP_PRIVATE(buf, git_packbuilder__write_buf, pb);
1374 }
1375
1376 static int write_cb(void *buf, size_t len, void *payload)
1377 {
1378 struct pack_write_context *ctx = payload;
1379 return git_indexer_append(ctx->indexer, buf, len, ctx->stats);
1380 }
1381
1382 int git_packbuilder_write(
1383 git_packbuilder *pb,
1384 const char *path,
1385 unsigned int mode,
1386 git_indexer_progress_cb progress_cb,
1387 void *progress_cb_payload)
1388 {
1389 int error = -1;
1390 git_str object_path = GIT_STR_INIT;
1391 git_indexer_options opts = GIT_INDEXER_OPTIONS_INIT;
1392 git_indexer *indexer = NULL;
1393 git_indexer_progress stats;
1394 struct pack_write_context ctx;
1395 int t;
1396
1397 PREPARE_PACK;
1398
1399 if (path == NULL) {
1400 if ((error = git_repository__item_path(&object_path, pb->repo, GIT_REPOSITORY_ITEM_OBJECTS)) < 0)
1401 goto cleanup;
1402 if ((error = git_str_joinpath(&object_path, git_str_cstr(&object_path), "pack")) < 0)
1403 goto cleanup;
1404 path = git_str_cstr(&object_path);
1405 }
1406
1407 opts.progress_cb = progress_cb;
1408 opts.progress_cb_payload = progress_cb_payload;
1409
1410 if ((error = git_indexer_new(&indexer, path, mode, pb->odb, &opts)) < 0)
1411 goto cleanup;
1412
1413 if (!git_repository__configmap_lookup(&t, pb->repo, GIT_CONFIGMAP_FSYNCOBJECTFILES) && t)
1414 git_indexer__set_fsync(indexer, 1);
1415
1416 ctx.indexer = indexer;
1417 ctx.stats = &stats;
1418
1419 if ((error = git_packbuilder_foreach(pb, write_cb, &ctx)) < 0)
1420 goto cleanup;
1421
1422 if ((error = git_indexer_commit(indexer, &stats)) < 0)
1423 goto cleanup;
1424
1425 #ifndef GIT_DEPRECATE_HARD
1426 git_oid_cpy(&pb->pack_oid, git_indexer_hash(indexer));
1427 #endif
1428
1429 pb->pack_name = git__strdup(git_indexer_name(indexer));
1430 GIT_ERROR_CHECK_ALLOC(pb->pack_name);
1431
1432 cleanup:
1433 git_indexer_free(indexer);
1434 git_str_dispose(&object_path);
1435 return error;
1436 }
1437
1438 #undef PREPARE_PACK
1439
1440 #ifndef GIT_DEPRECATE_HARD
1441 const git_oid *git_packbuilder_hash(git_packbuilder *pb)
1442 {
1443 return &pb->pack_oid;
1444 }
1445 #endif
1446
1447 const char *git_packbuilder_name(git_packbuilder *pb)
1448 {
1449 return pb->pack_name;
1450 }
1451
1452
1453 static int cb_tree_walk(
1454 const char *root, const git_tree_entry *entry, void *payload)
1455 {
1456 int error;
1457 struct tree_walk_context *ctx = payload;
1458
1459 /* A commit inside a tree represents a submodule commit and should be skipped. */
1460 if (git_tree_entry_type(entry) == GIT_OBJECT_COMMIT)
1461 return 0;
1462
1463 if (!(error = git_str_sets(&ctx->buf, root)) &&
1464 !(error = git_str_puts(&ctx->buf, git_tree_entry_name(entry))))
1465 error = git_packbuilder_insert(
1466 ctx->pb, git_tree_entry_id(entry), git_str_cstr(&ctx->buf));
1467
1468 return error;
1469 }
1470
1471 int git_packbuilder_insert_commit(git_packbuilder *pb, const git_oid *oid)
1472 {
1473 git_commit *commit;
1474
1475 if (git_commit_lookup(&commit, pb->repo, oid) < 0 ||
1476 git_packbuilder_insert(pb, oid, NULL) < 0)
1477 return -1;
1478
1479 if (git_packbuilder_insert_tree(pb, git_commit_tree_id(commit)) < 0)
1480 return -1;
1481
1482 git_commit_free(commit);
1483 return 0;
1484 }
1485
1486 int git_packbuilder_insert_tree(git_packbuilder *pb, const git_oid *oid)
1487 {
1488 int error;
1489 git_tree *tree = NULL;
1490 struct tree_walk_context context = { pb, GIT_STR_INIT };
1491
1492 if (!(error = git_tree_lookup(&tree, pb->repo, oid)) &&
1493 !(error = git_packbuilder_insert(pb, oid, NULL)))
1494 error = git_tree_walk(tree, GIT_TREEWALK_PRE, cb_tree_walk, &context);
1495
1496 git_tree_free(tree);
1497 git_str_dispose(&context.buf);
1498 return error;
1499 }
1500
1501 int git_packbuilder_insert_recur(git_packbuilder *pb, const git_oid *id, const char *name)
1502 {
1503 git_object *obj;
1504 int error;
1505
1506 GIT_ASSERT_ARG(pb);
1507 GIT_ASSERT_ARG(id);
1508
1509 if ((error = git_object_lookup(&obj, pb->repo, id, GIT_OBJECT_ANY)) < 0)
1510 return error;
1511
1512 switch (git_object_type(obj)) {
1513 case GIT_OBJECT_BLOB:
1514 error = git_packbuilder_insert(pb, id, name);
1515 break;
1516 case GIT_OBJECT_TREE:
1517 error = git_packbuilder_insert_tree(pb, id);
1518 break;
1519 case GIT_OBJECT_COMMIT:
1520 error = git_packbuilder_insert_commit(pb, id);
1521 break;
1522 case GIT_OBJECT_TAG:
1523 if ((error = git_packbuilder_insert(pb, id, name)) < 0)
1524 goto cleanup;
1525 error = git_packbuilder_insert_recur(pb, git_tag_target_id((git_tag *) obj), NULL);
1526 break;
1527
1528 default:
1529 git_error_set(GIT_ERROR_INVALID, "unknown object type");
1530 error = -1;
1531 }
1532
1533 cleanup:
1534 git_object_free(obj);
1535 return error;
1536 }
1537
1538 size_t git_packbuilder_object_count(git_packbuilder *pb)
1539 {
1540 return pb->nr_objects;
1541 }
1542
1543 size_t git_packbuilder_written(git_packbuilder *pb)
1544 {
1545 return pb->nr_written;
1546 }
1547
1548 static int lookup_walk_object(struct walk_object **out, git_packbuilder *pb, const git_oid *id)
1549 {
1550 struct walk_object *obj;
1551
1552 obj = git_pool_mallocz(&pb->object_pool, 1);
1553 if (!obj) {
1554 git_error_set_oom();
1555 return -1;
1556 }
1557
1558 git_oid_cpy(&obj->id, id);
1559
1560 *out = obj;
1561 return 0;
1562 }
1563
1564 static int retrieve_object(struct walk_object **out, git_packbuilder *pb, const git_oid *id)
1565 {
1566 struct walk_object *obj;
1567 int error;
1568
1569 if ((obj = git_oidmap_get(pb->walk_objects, id)) == NULL) {
1570 if ((error = lookup_walk_object(&obj, pb, id)) < 0)
1571 return error;
1572
1573 if ((error = git_oidmap_set(pb->walk_objects, &obj->id, obj)) < 0)
1574 return error;
1575 }
1576
1577 *out = obj;
1578 return 0;
1579 }
1580
1581 static int mark_blob_uninteresting(git_packbuilder *pb, const git_oid *id)
1582 {
1583 int error;
1584 struct walk_object *obj;
1585
1586 if ((error = retrieve_object(&obj, pb, id)) < 0)
1587 return error;
1588
1589 obj->uninteresting = 1;
1590
1591 return 0;
1592 }
1593
1594 static int mark_tree_uninteresting(git_packbuilder *pb, const git_oid *id)
1595 {
1596 struct walk_object *obj;
1597 git_tree *tree;
1598 int error;
1599 size_t i;
1600
1601 if ((error = retrieve_object(&obj, pb, id)) < 0)
1602 return error;
1603
1604 if (obj->uninteresting)
1605 return 0;
1606
1607 obj->uninteresting = 1;
1608
1609 if ((error = git_tree_lookup(&tree, pb->repo, id)) < 0)
1610 return error;
1611
1612 for (i = 0; i < git_tree_entrycount(tree); i++) {
1613 const git_tree_entry *entry = git_tree_entry_byindex(tree, i);
1614 const git_oid *entry_id = git_tree_entry_id(entry);
1615 switch (git_tree_entry_type(entry)) {
1616 case GIT_OBJECT_TREE:
1617 if ((error = mark_tree_uninteresting(pb, entry_id)) < 0)
1618 goto cleanup;
1619 break;
1620 case GIT_OBJECT_BLOB:
1621 if ((error = mark_blob_uninteresting(pb, entry_id)) < 0)
1622 goto cleanup;
1623 break;
1624 default:
1625 /* it's a submodule or something unknown, we don't want it */
1626 ;
1627 }
1628 }
1629
1630 cleanup:
1631 git_tree_free(tree);
1632 return error;
1633 }
1634
1635 /*
1636 * Mark the edges of the graph uninteresting. Since we start from a
1637 * git_revwalk, the commits are already uninteresting, but we need to
1638 * mark the trees and blobs.
1639 */
1640 static int mark_edges_uninteresting(git_packbuilder *pb, git_commit_list *commits)
1641 {
1642 int error;
1643 git_commit_list *list;
1644 git_commit *commit;
1645
1646 for (list = commits; list; list = list->next) {
1647 if (!list->item->uninteresting)
1648 continue;
1649
1650 if ((error = git_commit_lookup(&commit, pb->repo, &list->item->oid)) < 0)
1651 return error;
1652
1653 error = mark_tree_uninteresting(pb, git_commit_tree_id(commit));
1654 git_commit_free(commit);
1655
1656 if (error < 0)
1657 return error;
1658 }
1659
1660 return 0;
1661 }
1662
1663 static int pack_objects_insert_tree(git_packbuilder *pb, git_tree *tree)
1664 {
1665 size_t i;
1666 int error;
1667 git_tree *subtree;
1668 struct walk_object *obj;
1669 const char *name;
1670
1671 if ((error = retrieve_object(&obj, pb, git_tree_id(tree))) < 0)
1672 return error;
1673
1674 if (obj->seen || obj->uninteresting)
1675 return 0;
1676
1677 obj->seen = 1;
1678
1679 if ((error = git_packbuilder_insert(pb, &obj->id, NULL)))
1680 return error;
1681
1682 for (i = 0; i < git_tree_entrycount(tree); i++) {
1683 const git_tree_entry *entry = git_tree_entry_byindex(tree, i);
1684 const git_oid *entry_id = git_tree_entry_id(entry);
1685 switch (git_tree_entry_type(entry)) {
1686 case GIT_OBJECT_TREE:
1687 if ((error = git_tree_lookup(&subtree, pb->repo, entry_id)) < 0)
1688 return error;
1689
1690 error = pack_objects_insert_tree(pb, subtree);
1691 git_tree_free(subtree);
1692
1693 if (error < 0)
1694 return error;
1695
1696 break;
1697 case GIT_OBJECT_BLOB:
1698 if ((error = retrieve_object(&obj, pb, entry_id)) < 0)
1699 return error;
1700 if (obj->uninteresting)
1701 continue;
1702 name = git_tree_entry_name(entry);
1703 if ((error = git_packbuilder_insert(pb, entry_id, name)) < 0)
1704 return error;
1705 break;
1706 default:
1707 /* it's a submodule or something unknown, we don't want it */
1708 ;
1709 }
1710 }
1711
1712
1713 return error;
1714 }
1715
1716 static int pack_objects_insert_commit(git_packbuilder *pb, struct walk_object *obj)
1717 {
1718 int error;
1719 git_commit *commit = NULL;
1720 git_tree *tree = NULL;
1721
1722 obj->seen = 1;
1723
1724 if ((error = git_packbuilder_insert(pb, &obj->id, NULL)) < 0)
1725 return error;
1726
1727 if ((error = git_commit_lookup(&commit, pb->repo, &obj->id)) < 0)
1728 return error;
1729
1730 if ((error = git_tree_lookup(&tree, pb->repo, git_commit_tree_id(commit))) < 0)
1731 goto cleanup;
1732
1733 if ((error = pack_objects_insert_tree(pb, tree)) < 0)
1734 goto cleanup;
1735
1736 cleanup:
1737 git_commit_free(commit);
1738 git_tree_free(tree);
1739 return error;
1740 }
1741
1742 int git_packbuilder_insert_walk(git_packbuilder *pb, git_revwalk *walk)
1743 {
1744 int error;
1745 git_oid id;
1746 struct walk_object *obj;
1747
1748 GIT_ASSERT_ARG(pb);
1749 GIT_ASSERT_ARG(walk);
1750
1751 if ((error = mark_edges_uninteresting(pb, walk->user_input)) < 0)
1752 return error;
1753
1754 /*
1755 * TODO: git marks the parents of the edges
1756 * uninteresting. This may provide a speed advantage, but does
1757 * seem to assume the remote does not have a single-commit
1758 * history on the other end.
1759 */
1760
1761 /* walk down each tree up to the blobs and insert them, stopping when uninteresting */
1762 while ((error = git_revwalk_next(&id, walk)) == 0) {
1763 if ((error = retrieve_object(&obj, pb, &id)) < 0)
1764 return error;
1765
1766 if (obj->seen || obj->uninteresting)
1767 continue;
1768
1769 if ((error = pack_objects_insert_commit(pb, obj)) < 0)
1770 return error;
1771 }
1772
1773 if (error == GIT_ITEROVER)
1774 error = 0;
1775
1776 return error;
1777 }
1778
1779 int git_packbuilder_set_callbacks(git_packbuilder *pb, git_packbuilder_progress progress_cb, void *progress_cb_payload)
1780 {
1781 if (!pb)
1782 return -1;
1783
1784 pb->progress_cb = progress_cb;
1785 pb->progress_cb_payload = progress_cb_payload;
1786
1787 return 0;
1788 }
1789
1790 void git_packbuilder_free(git_packbuilder *pb)
1791 {
1792 if (pb == NULL)
1793 return;
1794
1795 #ifdef GIT_THREADS
1796
1797 git_mutex_free(&pb->cache_mutex);
1798 git_mutex_free(&pb->progress_mutex);
1799 git_cond_free(&pb->progress_cond);
1800
1801 #endif
1802
1803 if (pb->odb)
1804 git_odb_free(pb->odb);
1805
1806 if (pb->object_ix)
1807 git_oidmap_free(pb->object_ix);
1808
1809 if (pb->object_list)
1810 git__free(pb->object_list);
1811
1812 git_oidmap_free(pb->walk_objects);
1813 git_pool_clear(&pb->object_pool);
1814
1815 git_hash_ctx_cleanup(&pb->ctx);
1816 git_zstream_free(&pb->zstream);
1817
1818 git__free(pb->pack_name);
1819
1820 git__free(pb);
1821 }