]> git.proxmox.com Git - libgit2.git/blob - src/pack-objects.c
Add BD on ca-certificates
[libgit2.git] / src / pack-objects.c
1 /*
2 * Copyright (C) the libgit2 contributors. All rights reserved.
3 *
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
6 */
7
8 #include "pack-objects.h"
9
10 #include "zstream.h"
11 #include "delta.h"
12 #include "iterator.h"
13 #include "netops.h"
14 #include "pack.h"
15 #include "thread-utils.h"
16 #include "tree.h"
17 #include "util.h"
18 #include "revwalk.h"
19 #include "commit_list.h"
20
21 #include "git2/pack.h"
22 #include "git2/commit.h"
23 #include "git2/tag.h"
24 #include "git2/indexer.h"
25 #include "git2/config.h"
26
27 struct unpacked {
28 git_pobject *object;
29 void *data;
30 struct git_delta_index *index;
31 size_t depth;
32 };
33
34 struct tree_walk_context {
35 git_packbuilder *pb;
36 git_buf buf;
37 };
38
39 struct pack_write_context {
40 git_indexer *indexer;
41 git_transfer_progress *stats;
42 };
43
44 struct walk_object {
45 git_oid id;
46 unsigned int uninteresting:1,
47 seen:1;
48 };
49
50 #ifdef GIT_THREADS
51
52 #define GIT_PACKBUILDER__MUTEX_OP(pb, mtx, op) do { \
53 int result = git_mutex_##op(&(pb)->mtx); \
54 assert(!result); \
55 GIT_UNUSED(result); \
56 } while (0)
57
58 #else
59
60 #define GIT_PACKBUILDER__MUTEX_OP(pb,mtx,op) GIT_UNUSED(pb)
61
62 #endif /* GIT_THREADS */
63
64 #define git_packbuilder__cache_lock(pb) GIT_PACKBUILDER__MUTEX_OP(pb, cache_mutex, lock)
65 #define git_packbuilder__cache_unlock(pb) GIT_PACKBUILDER__MUTEX_OP(pb, cache_mutex, unlock)
66 #define git_packbuilder__progress_lock(pb) GIT_PACKBUILDER__MUTEX_OP(pb, progress_mutex, lock)
67 #define git_packbuilder__progress_unlock(pb) GIT_PACKBUILDER__MUTEX_OP(pb, progress_mutex, unlock)
68
69 /* The minimal interval between progress updates (in seconds). */
70 #define MIN_PROGRESS_UPDATE_INTERVAL 0.5
71
72 /* Size of the buffer to feed to zlib */
73 #define COMPRESS_BUFLEN (1024 * 1024)
74
75 static unsigned name_hash(const char *name)
76 {
77 unsigned c, hash = 0;
78
79 if (!name)
80 return 0;
81
82 /*
83 * This effectively just creates a sortable number from the
84 * last sixteen non-whitespace characters. Last characters
85 * count "most", so things that end in ".c" sort together.
86 */
87 while ((c = *name++) != 0) {
88 if (git__isspace(c))
89 continue;
90 hash = (hash >> 2) + (c << 24);
91 }
92 return hash;
93 }
94
95 static int packbuilder_config(git_packbuilder *pb)
96 {
97 git_config *config;
98 int ret = 0;
99 int64_t val;
100
101 if ((ret = git_repository_config_snapshot(&config, pb->repo)) < 0)
102 return ret;
103
104 #define config_get(KEY,DST,DFLT) do { \
105 ret = git_config_get_int64(&val, config, KEY); \
106 if (!ret) { \
107 if (!git__is_sizet(val)) { \
108 git_error_set(GIT_ERROR_CONFIG, \
109 "configuration value '%s' is too large", KEY); \
110 ret = -1; \
111 goto out; \
112 } \
113 (DST) = (size_t)val; \
114 } else if (ret == GIT_ENOTFOUND) { \
115 (DST) = (DFLT); \
116 ret = 0; \
117 } else if (ret < 0) goto out; } while (0)
118
119 config_get("pack.deltaCacheSize", pb->max_delta_cache_size,
120 GIT_PACK_DELTA_CACHE_SIZE);
121 config_get("pack.deltaCacheLimit", pb->cache_max_small_delta_size,
122 GIT_PACK_DELTA_CACHE_LIMIT);
123 config_get("pack.deltaCacheSize", pb->big_file_threshold,
124 GIT_PACK_BIG_FILE_THRESHOLD);
125 config_get("pack.windowMemory", pb->window_memory_limit, 0);
126
127 #undef config_get
128
129 out:
130 git_config_free(config);
131
132 return ret;
133 }
134
135 int git_packbuilder_new(git_packbuilder **out, git_repository *repo)
136 {
137 git_packbuilder *pb;
138
139 *out = NULL;
140
141 pb = git__calloc(1, sizeof(*pb));
142 GIT_ERROR_CHECK_ALLOC(pb);
143
144 pb->object_ix = git_oidmap_alloc();
145 if (!pb->object_ix)
146 goto on_error;
147
148 pb->walk_objects = git_oidmap_alloc();
149 if (!pb->walk_objects)
150 goto on_error;
151
152 git_pool_init(&pb->object_pool, sizeof(struct walk_object));
153
154 pb->repo = repo;
155 pb->nr_threads = 1; /* do not spawn any thread by default */
156
157 if (git_hash_ctx_init(&pb->ctx) < 0 ||
158 git_zstream_init(&pb->zstream, GIT_ZSTREAM_DEFLATE) < 0 ||
159 git_repository_odb(&pb->odb, repo) < 0 ||
160 packbuilder_config(pb) < 0)
161 goto on_error;
162
163 #ifdef GIT_THREADS
164
165 if (git_mutex_init(&pb->cache_mutex) ||
166 git_mutex_init(&pb->progress_mutex) ||
167 git_cond_init(&pb->progress_cond))
168 {
169 git_error_set(GIT_ERROR_OS, "failed to initialize packbuilder mutex");
170 goto on_error;
171 }
172
173 #endif
174
175 *out = pb;
176 return 0;
177
178 on_error:
179 git_packbuilder_free(pb);
180 return -1;
181 }
182
183 unsigned int git_packbuilder_set_threads(git_packbuilder *pb, unsigned int n)
184 {
185 assert(pb);
186
187 #ifdef GIT_THREADS
188 pb->nr_threads = n;
189 #else
190 GIT_UNUSED(n);
191 assert(1 == pb->nr_threads);
192 #endif
193
194 return pb->nr_threads;
195 }
196
197 static void rehash(git_packbuilder *pb)
198 {
199 git_pobject *po;
200 size_t pos, i;
201 int ret;
202
203 git_oidmap_clear(pb->object_ix);
204 for (i = 0, po = pb->object_list; i < pb->nr_objects; i++, po++) {
205 pos = git_oidmap_put(pb->object_ix, &po->id, &ret);
206 git_oidmap_set_value_at(pb->object_ix, pos, po);
207 }
208 }
209
210 int git_packbuilder_insert(git_packbuilder *pb, const git_oid *oid,
211 const char *name)
212 {
213 git_pobject *po;
214 size_t newsize, pos;
215 int ret;
216
217 assert(pb && oid);
218
219 /* If the object already exists in the hash table, then we don't
220 * have any work to do */
221 if (git_oidmap_exists(pb->object_ix, oid))
222 return 0;
223
224 if (pb->nr_objects >= pb->nr_alloc) {
225 GIT_ERROR_CHECK_ALLOC_ADD(&newsize, pb->nr_alloc, 1024);
226 GIT_ERROR_CHECK_ALLOC_MULTIPLY(&newsize, newsize, 3 / 2);
227
228 if (!git__is_uint32(newsize)) {
229 git_error_set(GIT_ERROR_NOMEMORY, "packfile too large to fit in memory.");
230 return -1;
231 }
232
233 pb->nr_alloc = newsize;
234
235 pb->object_list = git__reallocarray(pb->object_list,
236 pb->nr_alloc, sizeof(*po));
237 GIT_ERROR_CHECK_ALLOC(pb->object_list);
238 rehash(pb);
239 }
240
241 po = pb->object_list + pb->nr_objects;
242 memset(po, 0x0, sizeof(*po));
243
244 if ((ret = git_odb_read_header(&po->size, &po->type, pb->odb, oid)) < 0)
245 return ret;
246
247 pb->nr_objects++;
248 git_oid_cpy(&po->id, oid);
249 po->hash = name_hash(name);
250
251 pos = git_oidmap_put(pb->object_ix, &po->id, &ret);
252 if (ret < 0) {
253 git_error_set_oom();
254 return ret;
255 }
256 assert(ret != 0);
257 git_oidmap_set_value_at(pb->object_ix, pos, po);
258
259 pb->done = false;
260
261 if (pb->progress_cb) {
262 double current_time = git__timer();
263 double elapsed = current_time - pb->last_progress_report_time;
264
265 if (elapsed >= MIN_PROGRESS_UPDATE_INTERVAL) {
266 pb->last_progress_report_time = current_time;
267
268 ret = pb->progress_cb(
269 GIT_PACKBUILDER_ADDING_OBJECTS,
270 pb->nr_objects, 0, pb->progress_cb_payload);
271
272 if (ret)
273 return git_error_set_after_callback(ret);
274 }
275 }
276
277 return 0;
278 }
279
280 static int get_delta(void **out, git_odb *odb, git_pobject *po)
281 {
282 git_odb_object *src = NULL, *trg = NULL;
283 size_t delta_size;
284 void *delta_buf;
285 int error;
286
287 *out = NULL;
288
289 if (git_odb_read(&src, odb, &po->delta->id) < 0 ||
290 git_odb_read(&trg, odb, &po->id) < 0)
291 goto on_error;
292
293 error = git_delta(&delta_buf, &delta_size,
294 git_odb_object_data(src), git_odb_object_size(src),
295 git_odb_object_data(trg), git_odb_object_size(trg),
296 0);
297
298 if (error < 0 && error != GIT_EBUFS)
299 goto on_error;
300
301 if (error == GIT_EBUFS || delta_size != po->delta_size) {
302 git_error_set(GIT_ERROR_INVALID, "delta size changed");
303 goto on_error;
304 }
305
306 *out = delta_buf;
307
308 git_odb_object_free(src);
309 git_odb_object_free(trg);
310 return 0;
311
312 on_error:
313 git_odb_object_free(src);
314 git_odb_object_free(trg);
315 return -1;
316 }
317
318 static int write_object(
319 git_packbuilder *pb,
320 git_pobject *po,
321 int (*write_cb)(void *buf, size_t size, void *cb_data),
322 void *cb_data)
323 {
324 git_odb_object *obj = NULL;
325 git_object_t type;
326 unsigned char hdr[10], *zbuf = NULL;
327 void *data = NULL;
328 size_t hdr_len, zbuf_len = COMPRESS_BUFLEN, data_len;
329 int error;
330
331 /*
332 * If we have a delta base, let's use the delta to save space.
333 * Otherwise load the whole object. 'data' ends up pointing to
334 * whatever data we want to put into the packfile.
335 */
336 if (po->delta) {
337 if (po->delta_data)
338 data = po->delta_data;
339 else if ((error = get_delta(&data, pb->odb, po)) < 0)
340 goto done;
341
342 data_len = po->delta_size;
343 type = GIT_OBJECT_REF_DELTA;
344 } else {
345 if ((error = git_odb_read(&obj, pb->odb, &po->id)) < 0)
346 goto done;
347
348 data = (void *)git_odb_object_data(obj);
349 data_len = git_odb_object_size(obj);
350 type = git_odb_object_type(obj);
351 }
352
353 /* Write header */
354 hdr_len = git_packfile__object_header(hdr, data_len, type);
355
356 if ((error = write_cb(hdr, hdr_len, cb_data)) < 0 ||
357 (error = git_hash_update(&pb->ctx, hdr, hdr_len)) < 0)
358 goto done;
359
360 if (type == GIT_OBJECT_REF_DELTA) {
361 if ((error = write_cb(po->delta->id.id, GIT_OID_RAWSZ, cb_data)) < 0 ||
362 (error = git_hash_update(&pb->ctx, po->delta->id.id, GIT_OID_RAWSZ)) < 0)
363 goto done;
364 }
365
366 /* Write data */
367 if (po->z_delta_size) {
368 data_len = po->z_delta_size;
369
370 if ((error = write_cb(data, data_len, cb_data)) < 0 ||
371 (error = git_hash_update(&pb->ctx, data, data_len)) < 0)
372 goto done;
373 } else {
374 zbuf = git__malloc(zbuf_len);
375 GIT_ERROR_CHECK_ALLOC(zbuf);
376
377 git_zstream_reset(&pb->zstream);
378 git_zstream_set_input(&pb->zstream, data, data_len);
379
380 while (!git_zstream_done(&pb->zstream)) {
381 if ((error = git_zstream_get_output(zbuf, &zbuf_len, &pb->zstream)) < 0 ||
382 (error = write_cb(zbuf, zbuf_len, cb_data)) < 0 ||
383 (error = git_hash_update(&pb->ctx, zbuf, zbuf_len)) < 0)
384 goto done;
385
386 zbuf_len = COMPRESS_BUFLEN; /* reuse buffer */
387 }
388 }
389
390 /*
391 * If po->delta is true, data is a delta and it is our
392 * responsibility to free it (otherwise it's a git_object's
393 * data). We set po->delta_data to NULL in case we got the
394 * data from there instead of get_delta(). If we didn't,
395 * there's no harm.
396 */
397 if (po->delta) {
398 git__free(data);
399 po->delta_data = NULL;
400 }
401
402 pb->nr_written++;
403
404 done:
405 git__free(zbuf);
406 git_odb_object_free(obj);
407 return error;
408 }
409
410 enum write_one_status {
411 WRITE_ONE_SKIP = -1, /* already written */
412 WRITE_ONE_BREAK = 0, /* writing this will bust the limit; not written */
413 WRITE_ONE_WRITTEN = 1, /* normal */
414 WRITE_ONE_RECURSIVE = 2 /* already scheduled to be written */
415 };
416
417 static int write_one(
418 enum write_one_status *status,
419 git_packbuilder *pb,
420 git_pobject *po,
421 int (*write_cb)(void *buf, size_t size, void *cb_data),
422 void *cb_data)
423 {
424 int error;
425
426 if (po->recursing) {
427 *status = WRITE_ONE_RECURSIVE;
428 return 0;
429 } else if (po->written) {
430 *status = WRITE_ONE_SKIP;
431 return 0;
432 }
433
434 if (po->delta) {
435 po->recursing = 1;
436
437 if ((error = write_one(status, pb, po->delta, write_cb, cb_data)) < 0)
438 return error;
439
440 /* we cannot depend on this one */
441 if (*status == WRITE_ONE_RECURSIVE)
442 po->delta = NULL;
443 }
444
445 *status = WRITE_ONE_WRITTEN;
446 po->written = 1;
447 po->recursing = 0;
448
449 return write_object(pb, po, write_cb, cb_data);
450 }
451
452 GIT_INLINE(void) add_to_write_order(git_pobject **wo, size_t *endp,
453 git_pobject *po)
454 {
455 if (po->filled)
456 return;
457 wo[(*endp)++] = po;
458 po->filled = 1;
459 }
460
461 static void add_descendants_to_write_order(git_pobject **wo, size_t *endp,
462 git_pobject *po)
463 {
464 int add_to_order = 1;
465 while (po) {
466 if (add_to_order) {
467 git_pobject *s;
468 /* add this node... */
469 add_to_write_order(wo, endp, po);
470 /* all its siblings... */
471 for (s = po->delta_sibling; s; s = s->delta_sibling) {
472 add_to_write_order(wo, endp, s);
473 }
474 }
475 /* drop down a level to add left subtree nodes if possible */
476 if (po->delta_child) {
477 add_to_order = 1;
478 po = po->delta_child;
479 } else {
480 add_to_order = 0;
481 /* our sibling might have some children, it is next */
482 if (po->delta_sibling) {
483 po = po->delta_sibling;
484 continue;
485 }
486 /* go back to our parent node */
487 po = po->delta;
488 while (po && !po->delta_sibling) {
489 /* we're on the right side of a subtree, keep
490 * going up until we can go right again */
491 po = po->delta;
492 }
493 if (!po) {
494 /* done- we hit our original root node */
495 return;
496 }
497 /* pass it off to sibling at this level */
498 po = po->delta_sibling;
499 }
500 };
501 }
502
503 static void add_family_to_write_order(git_pobject **wo, size_t *endp,
504 git_pobject *po)
505 {
506 git_pobject *root;
507
508 for (root = po; root->delta; root = root->delta)
509 ; /* nothing */
510 add_descendants_to_write_order(wo, endp, root);
511 }
512
513 static int cb_tag_foreach(const char *name, git_oid *oid, void *data)
514 {
515 git_packbuilder *pb = data;
516 git_pobject *po;
517 size_t pos;
518
519 GIT_UNUSED(name);
520
521 pos = git_oidmap_lookup_index(pb->object_ix, oid);
522 if (!git_oidmap_valid_index(pb->object_ix, pos))
523 return 0;
524
525 po = git_oidmap_value_at(pb->object_ix, pos);
526 po->tagged = 1;
527
528 /* TODO: peel objects */
529
530 return 0;
531 }
532
533 static git_pobject **compute_write_order(git_packbuilder *pb)
534 {
535 size_t i, wo_end, last_untagged;
536 git_pobject **wo;
537
538 if ((wo = git__mallocarray(pb->nr_objects, sizeof(*wo))) == NULL)
539 return NULL;
540
541 for (i = 0; i < pb->nr_objects; i++) {
542 git_pobject *po = pb->object_list + i;
543 po->tagged = 0;
544 po->filled = 0;
545 po->delta_child = NULL;
546 po->delta_sibling = NULL;
547 }
548
549 /*
550 * Fully connect delta_child/delta_sibling network.
551 * Make sure delta_sibling is sorted in the original
552 * recency order.
553 */
554 for (i = pb->nr_objects; i > 0;) {
555 git_pobject *po = &pb->object_list[--i];
556 if (!po->delta)
557 continue;
558 /* Mark me as the first child */
559 po->delta_sibling = po->delta->delta_child;
560 po->delta->delta_child = po;
561 }
562
563 /*
564 * Mark objects that are at the tip of tags.
565 */
566 if (git_tag_foreach(pb->repo, &cb_tag_foreach, pb) < 0) {
567 git__free(wo);
568 return NULL;
569 }
570
571 /*
572 * Give the objects in the original recency order until
573 * we see a tagged tip.
574 */
575 for (i = wo_end = 0; i < pb->nr_objects; i++) {
576 git_pobject *po = pb->object_list + i;
577 if (po->tagged)
578 break;
579 add_to_write_order(wo, &wo_end, po);
580 }
581 last_untagged = i;
582
583 /*
584 * Then fill all the tagged tips.
585 */
586 for (; i < pb->nr_objects; i++) {
587 git_pobject *po = pb->object_list + i;
588 if (po->tagged)
589 add_to_write_order(wo, &wo_end, po);
590 }
591
592 /*
593 * And then all remaining commits and tags.
594 */
595 for (i = last_untagged; i < pb->nr_objects; i++) {
596 git_pobject *po = pb->object_list + i;
597 if (po->type != GIT_OBJECT_COMMIT &&
598 po->type != GIT_OBJECT_TAG)
599 continue;
600 add_to_write_order(wo, &wo_end, po);
601 }
602
603 /*
604 * And then all the trees.
605 */
606 for (i = last_untagged; i < pb->nr_objects; i++) {
607 git_pobject *po = pb->object_list + i;
608 if (po->type != GIT_OBJECT_TREE)
609 continue;
610 add_to_write_order(wo, &wo_end, po);
611 }
612
613 /*
614 * Finally all the rest in really tight order
615 */
616 for (i = last_untagged; i < pb->nr_objects; i++) {
617 git_pobject *po = pb->object_list + i;
618 if (!po->filled)
619 add_family_to_write_order(wo, &wo_end, po);
620 }
621
622 if (wo_end != pb->nr_objects) {
623 git__free(wo);
624 git_error_set(GIT_ERROR_INVALID, "invalid write order");
625 return NULL;
626 }
627
628 return wo;
629 }
630
631 static int write_pack(git_packbuilder *pb,
632 int (*write_cb)(void *buf, size_t size, void *cb_data),
633 void *cb_data)
634 {
635 git_pobject **write_order;
636 git_pobject *po;
637 enum write_one_status status;
638 struct git_pack_header ph;
639 git_oid entry_oid;
640 size_t i = 0;
641 int error = 0;
642
643 write_order = compute_write_order(pb);
644 if (write_order == NULL)
645 return -1;
646
647 if (!git__is_uint32(pb->nr_objects)) {
648 git_error_set(GIT_ERROR_INVALID, "too many objects");
649 return -1;
650 }
651
652 /* Write pack header */
653 ph.hdr_signature = htonl(PACK_SIGNATURE);
654 ph.hdr_version = htonl(PACK_VERSION);
655 ph.hdr_entries = htonl(pb->nr_objects);
656
657 if ((error = write_cb(&ph, sizeof(ph), cb_data)) < 0 ||
658 (error = git_hash_update(&pb->ctx, &ph, sizeof(ph))) < 0)
659 goto done;
660
661 pb->nr_remaining = pb->nr_objects;
662 do {
663 pb->nr_written = 0;
664 for ( ; i < pb->nr_objects; ++i) {
665 po = write_order[i];
666
667 if ((error = write_one(&status, pb, po, write_cb, cb_data)) < 0)
668 goto done;
669 }
670
671 pb->nr_remaining -= pb->nr_written;
672 } while (pb->nr_remaining && i < pb->nr_objects);
673
674 if ((error = git_hash_final(&entry_oid, &pb->ctx)) < 0)
675 goto done;
676
677 error = write_cb(entry_oid.id, GIT_OID_RAWSZ, cb_data);
678
679 done:
680 /* if callback cancelled writing, we must still free delta_data */
681 for ( ; i < pb->nr_objects; ++i) {
682 po = write_order[i];
683 if (po->delta_data) {
684 git__free(po->delta_data);
685 po->delta_data = NULL;
686 }
687 }
688
689 git__free(write_order);
690 return error;
691 }
692
693 static int write_pack_buf(void *buf, size_t size, void *data)
694 {
695 git_buf *b = (git_buf *)data;
696 return git_buf_put(b, buf, size);
697 }
698
699 static int type_size_sort(const void *_a, const void *_b)
700 {
701 const git_pobject *a = (git_pobject *)_a;
702 const git_pobject *b = (git_pobject *)_b;
703
704 if (a->type > b->type)
705 return -1;
706 if (a->type < b->type)
707 return 1;
708 if (a->hash > b->hash)
709 return -1;
710 if (a->hash < b->hash)
711 return 1;
712 /*
713 * TODO
714 *
715 if (a->preferred_base > b->preferred_base)
716 return -1;
717 if (a->preferred_base < b->preferred_base)
718 return 1;
719 */
720 if (a->size > b->size)
721 return -1;
722 if (a->size < b->size)
723 return 1;
724 return a < b ? -1 : (a > b); /* newest first */
725 }
726
727 static int delta_cacheable(
728 git_packbuilder *pb,
729 size_t src_size,
730 size_t trg_size,
731 size_t delta_size)
732 {
733 size_t new_size;
734
735 if (git__add_sizet_overflow(&new_size, pb->delta_cache_size, delta_size))
736 return 0;
737
738 if (pb->max_delta_cache_size && new_size > pb->max_delta_cache_size)
739 return 0;
740
741 if (delta_size < pb->cache_max_small_delta_size)
742 return 1;
743
744 /* cache delta, if objects are large enough compared to delta size */
745 if ((src_size >> 20) + (trg_size >> 21) > (delta_size >> 10))
746 return 1;
747
748 return 0;
749 }
750
751 static int try_delta(git_packbuilder *pb, struct unpacked *trg,
752 struct unpacked *src, size_t max_depth,
753 size_t *mem_usage, int *ret)
754 {
755 git_pobject *trg_object = trg->object;
756 git_pobject *src_object = src->object;
757 git_odb_object *obj;
758 size_t trg_size, src_size, delta_size, sizediff, max_size, sz;
759 size_t ref_depth;
760 void *delta_buf;
761
762 /* Don't bother doing diffs between different types */
763 if (trg_object->type != src_object->type) {
764 *ret = -1;
765 return 0;
766 }
767
768 *ret = 0;
769
770 /* TODO: support reuse-delta */
771
772 /* Let's not bust the allowed depth. */
773 if (src->depth >= max_depth)
774 return 0;
775
776 /* Now some size filtering heuristics. */
777 trg_size = trg_object->size;
778 if (!trg_object->delta) {
779 max_size = trg_size/2 - 20;
780 ref_depth = 1;
781 } else {
782 max_size = trg_object->delta_size;
783 ref_depth = trg->depth;
784 }
785
786 max_size = (uint64_t)max_size * (max_depth - src->depth) /
787 (max_depth - ref_depth + 1);
788 if (max_size == 0)
789 return 0;
790
791 src_size = src_object->size;
792 sizediff = src_size < trg_size ? trg_size - src_size : 0;
793 if (sizediff >= max_size)
794 return 0;
795 if (trg_size < src_size / 32)
796 return 0;
797
798 /* Load data if not already done */
799 if (!trg->data) {
800 if (git_odb_read(&obj, pb->odb, &trg_object->id) < 0)
801 return -1;
802
803 sz = git_odb_object_size(obj);
804 trg->data = git__malloc(sz);
805 GIT_ERROR_CHECK_ALLOC(trg->data);
806 memcpy(trg->data, git_odb_object_data(obj), sz);
807
808 git_odb_object_free(obj);
809
810 if (sz != trg_size) {
811 git_error_set(GIT_ERROR_INVALID,
812 "inconsistent target object length");
813 return -1;
814 }
815
816 *mem_usage += sz;
817 }
818 if (!src->data) {
819 size_t obj_sz;
820
821 if (git_odb_read(&obj, pb->odb, &src_object->id) < 0 ||
822 !git__is_ulong(obj_sz = git_odb_object_size(obj)))
823 return -1;
824
825 sz = obj_sz;
826 src->data = git__malloc(sz);
827 GIT_ERROR_CHECK_ALLOC(src->data);
828 memcpy(src->data, git_odb_object_data(obj), sz);
829
830 git_odb_object_free(obj);
831
832 if (sz != src_size) {
833 git_error_set(GIT_ERROR_INVALID,
834 "inconsistent source object length");
835 return -1;
836 }
837
838 *mem_usage += sz;
839 }
840 if (!src->index) {
841 if (git_delta_index_init(&src->index, src->data, src_size) < 0)
842 return 0; /* suboptimal pack - out of memory */
843
844 *mem_usage += git_delta_index_size(src->index);
845 }
846
847 if (git_delta_create_from_index(&delta_buf, &delta_size, src->index, trg->data, trg_size,
848 max_size) < 0)
849 return 0;
850
851 if (trg_object->delta) {
852 /* Prefer only shallower same-sized deltas. */
853 if (delta_size == trg_object->delta_size &&
854 src->depth + 1 >= trg->depth) {
855 git__free(delta_buf);
856 return 0;
857 }
858 }
859
860 git_packbuilder__cache_lock(pb);
861 if (trg_object->delta_data) {
862 git__free(trg_object->delta_data);
863 assert(pb->delta_cache_size >= trg_object->delta_size);
864 pb->delta_cache_size -= trg_object->delta_size;
865 trg_object->delta_data = NULL;
866 }
867 if (delta_cacheable(pb, src_size, trg_size, delta_size)) {
868 bool overflow = git__add_sizet_overflow(
869 &pb->delta_cache_size, pb->delta_cache_size, delta_size);
870
871 git_packbuilder__cache_unlock(pb);
872
873 if (overflow) {
874 git__free(delta_buf);
875 return -1;
876 }
877
878 trg_object->delta_data = git__realloc(delta_buf, delta_size);
879 GIT_ERROR_CHECK_ALLOC(trg_object->delta_data);
880 } else {
881 /* create delta when writing the pack */
882 git_packbuilder__cache_unlock(pb);
883 git__free(delta_buf);
884 }
885
886 trg_object->delta = src_object;
887 trg_object->delta_size = delta_size;
888 trg->depth = src->depth + 1;
889
890 *ret = 1;
891 return 0;
892 }
893
894 static size_t check_delta_limit(git_pobject *me, size_t n)
895 {
896 git_pobject *child = me->delta_child;
897 size_t m = n;
898
899 while (child) {
900 size_t c = check_delta_limit(child, n + 1);
901 if (m < c)
902 m = c;
903 child = child->delta_sibling;
904 }
905 return m;
906 }
907
908 static size_t free_unpacked(struct unpacked *n)
909 {
910 size_t freed_mem = 0;
911
912 if (n->index) {
913 freed_mem += git_delta_index_size(n->index);
914 git_delta_index_free(n->index);
915 }
916 n->index = NULL;
917
918 if (n->data) {
919 freed_mem += n->object->size;
920 git__free(n->data);
921 n->data = NULL;
922 }
923 n->object = NULL;
924 n->depth = 0;
925 return freed_mem;
926 }
927
928 static int report_delta_progress(
929 git_packbuilder *pb, uint32_t count, bool force)
930 {
931 int ret;
932
933 if (pb->progress_cb) {
934 double current_time = git__timer();
935 double elapsed = current_time - pb->last_progress_report_time;
936
937 if (force || elapsed >= MIN_PROGRESS_UPDATE_INTERVAL) {
938 pb->last_progress_report_time = current_time;
939
940 ret = pb->progress_cb(
941 GIT_PACKBUILDER_DELTAFICATION,
942 count, pb->nr_objects, pb->progress_cb_payload);
943
944 if (ret)
945 return git_error_set_after_callback(ret);
946 }
947 }
948
949 return 0;
950 }
951
952 static int find_deltas(git_packbuilder *pb, git_pobject **list,
953 size_t *list_size, size_t window, size_t depth)
954 {
955 git_pobject *po;
956 git_buf zbuf = GIT_BUF_INIT;
957 struct unpacked *array;
958 size_t idx = 0, count = 0;
959 size_t mem_usage = 0;
960 size_t i;
961 int error = -1;
962
963 array = git__calloc(window, sizeof(struct unpacked));
964 GIT_ERROR_CHECK_ALLOC(array);
965
966 for (;;) {
967 struct unpacked *n = array + idx;
968 size_t max_depth, j, best_base = SIZE_MAX;
969
970 git_packbuilder__progress_lock(pb);
971 if (!*list_size) {
972 git_packbuilder__progress_unlock(pb);
973 break;
974 }
975
976 pb->nr_deltified += 1;
977 report_delta_progress(pb, pb->nr_deltified, false);
978
979 po = *list++;
980 (*list_size)--;
981 git_packbuilder__progress_unlock(pb);
982
983 mem_usage -= free_unpacked(n);
984 n->object = po;
985
986 while (pb->window_memory_limit &&
987 mem_usage > pb->window_memory_limit &&
988 count > 1) {
989 size_t tail = (idx + window - count) % window;
990 mem_usage -= free_unpacked(array + tail);
991 count--;
992 }
993
994 /*
995 * If the current object is at pack edge, take the depth the
996 * objects that depend on the current object into account
997 * otherwise they would become too deep.
998 */
999 max_depth = depth;
1000 if (po->delta_child) {
1001 size_t delta_limit = check_delta_limit(po, 0);
1002
1003 if (delta_limit > max_depth)
1004 goto next;
1005
1006 max_depth -= delta_limit;
1007 }
1008
1009 j = window;
1010 while (--j > 0) {
1011 int ret;
1012 size_t other_idx = idx + j;
1013 struct unpacked *m;
1014
1015 if (other_idx >= window)
1016 other_idx -= window;
1017
1018 m = array + other_idx;
1019 if (!m->object)
1020 break;
1021
1022 if (try_delta(pb, n, m, max_depth, &mem_usage, &ret) < 0)
1023 goto on_error;
1024 if (ret < 0)
1025 break;
1026 else if (ret > 0)
1027 best_base = other_idx;
1028 }
1029
1030 /*
1031 * If we decided to cache the delta data, then it is best
1032 * to compress it right away. First because we have to do
1033 * it anyway, and doing it here while we're threaded will
1034 * save a lot of time in the non threaded write phase,
1035 * as well as allow for caching more deltas within
1036 * the same cache size limit.
1037 * ...
1038 * But only if not writing to stdout, since in that case
1039 * the network is most likely throttling writes anyway,
1040 * and therefore it is best to go to the write phase ASAP
1041 * instead, as we can afford spending more time compressing
1042 * between writes at that moment.
1043 */
1044 if (po->delta_data) {
1045 if (git_zstream_deflatebuf(&zbuf, po->delta_data, po->delta_size) < 0)
1046 goto on_error;
1047
1048 git__free(po->delta_data);
1049 po->delta_data = git__malloc(zbuf.size);
1050 GIT_ERROR_CHECK_ALLOC(po->delta_data);
1051
1052 memcpy(po->delta_data, zbuf.ptr, zbuf.size);
1053 po->z_delta_size = zbuf.size;
1054 git_buf_clear(&zbuf);
1055
1056 git_packbuilder__cache_lock(pb);
1057 pb->delta_cache_size -= po->delta_size;
1058 pb->delta_cache_size += po->z_delta_size;
1059 git_packbuilder__cache_unlock(pb);
1060 }
1061
1062 /*
1063 * If we made n a delta, and if n is already at max
1064 * depth, leaving it in the window is pointless. we
1065 * should evict it first.
1066 */
1067 if (po->delta && max_depth <= n->depth)
1068 continue;
1069
1070 /*
1071 * Move the best delta base up in the window, after the
1072 * currently deltified object, to keep it longer. It will
1073 * be the first base object to be attempted next.
1074 */
1075 if (po->delta) {
1076 struct unpacked swap = array[best_base];
1077 size_t dist = (window + idx - best_base) % window;
1078 size_t dst = best_base;
1079 while (dist--) {
1080 size_t src = (dst + 1) % window;
1081 array[dst] = array[src];
1082 dst = src;
1083 }
1084 array[dst] = swap;
1085 }
1086
1087 next:
1088 idx++;
1089 if (count + 1 < window)
1090 count++;
1091 if (idx >= window)
1092 idx = 0;
1093 }
1094 error = 0;
1095
1096 on_error:
1097 for (i = 0; i < window; ++i) {
1098 git__free(array[i].index);
1099 git__free(array[i].data);
1100 }
1101 git__free(array);
1102 git_buf_dispose(&zbuf);
1103
1104 return error;
1105 }
1106
1107 #ifdef GIT_THREADS
1108
1109 struct thread_params {
1110 git_thread thread;
1111 git_packbuilder *pb;
1112
1113 git_pobject **list;
1114
1115 git_cond cond;
1116 git_mutex mutex;
1117
1118 size_t list_size;
1119 size_t remaining;
1120
1121 size_t window;
1122 size_t depth;
1123 size_t working;
1124 size_t data_ready;
1125 };
1126
1127 static void *threaded_find_deltas(void *arg)
1128 {
1129 struct thread_params *me = arg;
1130
1131 while (me->remaining) {
1132 if (find_deltas(me->pb, me->list, &me->remaining,
1133 me->window, me->depth) < 0) {
1134 ; /* TODO */
1135 }
1136
1137 git_packbuilder__progress_lock(me->pb);
1138 me->working = 0;
1139 git_cond_signal(&me->pb->progress_cond);
1140 git_packbuilder__progress_unlock(me->pb);
1141
1142 if (git_mutex_lock(&me->mutex)) {
1143 git_error_set(GIT_ERROR_THREAD, "unable to lock packfile condition mutex");
1144 return NULL;
1145 }
1146
1147 while (!me->data_ready)
1148 git_cond_wait(&me->cond, &me->mutex);
1149
1150 /*
1151 * We must not set ->data_ready before we wait on the
1152 * condition because the main thread may have set it to 1
1153 * before we get here. In order to be sure that new
1154 * work is available if we see 1 in ->data_ready, it
1155 * was initialized to 0 before this thread was spawned
1156 * and we reset it to 0 right away.
1157 */
1158 me->data_ready = 0;
1159 git_mutex_unlock(&me->mutex);
1160 }
1161 /* leave ->working 1 so that this doesn't get more work assigned */
1162 return NULL;
1163 }
1164
1165 static int ll_find_deltas(git_packbuilder *pb, git_pobject **list,
1166 size_t list_size, size_t window, size_t depth)
1167 {
1168 struct thread_params *p;
1169 size_t i;
1170 int ret, active_threads = 0;
1171
1172 if (!pb->nr_threads)
1173 pb->nr_threads = git_online_cpus();
1174
1175 if (pb->nr_threads <= 1) {
1176 find_deltas(pb, list, &list_size, window, depth);
1177 return 0;
1178 }
1179
1180 p = git__mallocarray(pb->nr_threads, sizeof(*p));
1181 GIT_ERROR_CHECK_ALLOC(p);
1182
1183 /* Partition the work among the threads */
1184 for (i = 0; i < pb->nr_threads; ++i) {
1185 size_t sub_size = list_size / (pb->nr_threads - i);
1186
1187 /* don't use too small segments or no deltas will be found */
1188 if (sub_size < 2*window && i+1 < pb->nr_threads)
1189 sub_size = 0;
1190
1191 p[i].pb = pb;
1192 p[i].window = window;
1193 p[i].depth = depth;
1194 p[i].working = 1;
1195 p[i].data_ready = 0;
1196
1197 /* try to split chunks on "path" boundaries */
1198 while (sub_size && sub_size < list_size &&
1199 list[sub_size]->hash &&
1200 list[sub_size]->hash == list[sub_size-1]->hash)
1201 sub_size++;
1202
1203 p[i].list = list;
1204 p[i].list_size = sub_size;
1205 p[i].remaining = sub_size;
1206
1207 list += sub_size;
1208 list_size -= sub_size;
1209 }
1210
1211 /* Start work threads */
1212 for (i = 0; i < pb->nr_threads; ++i) {
1213 if (!p[i].list_size)
1214 continue;
1215
1216 git_mutex_init(&p[i].mutex);
1217 git_cond_init(&p[i].cond);
1218
1219 ret = git_thread_create(&p[i].thread,
1220 threaded_find_deltas, &p[i]);
1221 if (ret) {
1222 git_error_set(GIT_ERROR_THREAD, "unable to create thread");
1223 return -1;
1224 }
1225 active_threads++;
1226 }
1227
1228 /*
1229 * Now let's wait for work completion. Each time a thread is done
1230 * with its work, we steal half of the remaining work from the
1231 * thread with the largest number of unprocessed objects and give
1232 * it to that newly idle thread. This ensure good load balancing
1233 * until the remaining object list segments are simply too short
1234 * to be worth splitting anymore.
1235 */
1236 while (active_threads) {
1237 struct thread_params *target = NULL;
1238 struct thread_params *victim = NULL;
1239 size_t sub_size = 0;
1240
1241 /* Start by locating a thread that has transitioned its
1242 * 'working' flag from 1 -> 0. This indicates that it is
1243 * ready to receive more work using our work-stealing
1244 * algorithm. */
1245 git_packbuilder__progress_lock(pb);
1246 for (;;) {
1247 for (i = 0; !target && i < pb->nr_threads; i++)
1248 if (!p[i].working)
1249 target = &p[i];
1250 if (target)
1251 break;
1252 git_cond_wait(&pb->progress_cond, &pb->progress_mutex);
1253 }
1254
1255 /* At this point we hold the progress lock and have located
1256 * a thread to receive more work. We still need to locate a
1257 * thread from which to steal work (the victim). */
1258 for (i = 0; i < pb->nr_threads; i++)
1259 if (p[i].remaining > 2*window &&
1260 (!victim || victim->remaining < p[i].remaining))
1261 victim = &p[i];
1262
1263 if (victim) {
1264 sub_size = victim->remaining / 2;
1265 list = victim->list + victim->list_size - sub_size;
1266 while (sub_size && list[0]->hash &&
1267 list[0]->hash == list[-1]->hash) {
1268 list++;
1269 sub_size--;
1270 }
1271 if (!sub_size) {
1272 /*
1273 * It is possible for some "paths" to have
1274 * so many objects that no hash boundary
1275 * might be found. Let's just steal the
1276 * exact half in that case.
1277 */
1278 sub_size = victim->remaining / 2;
1279 list -= sub_size;
1280 }
1281 target->list = list;
1282 victim->list_size -= sub_size;
1283 victim->remaining -= sub_size;
1284 }
1285 target->list_size = sub_size;
1286 target->remaining = sub_size;
1287 target->working = 1;
1288 git_packbuilder__progress_unlock(pb);
1289
1290 if (git_mutex_lock(&target->mutex)) {
1291 git_error_set(GIT_ERROR_THREAD, "unable to lock packfile condition mutex");
1292 git__free(p);
1293 return -1;
1294 }
1295
1296 target->data_ready = 1;
1297 git_cond_signal(&target->cond);
1298 git_mutex_unlock(&target->mutex);
1299
1300 if (!sub_size) {
1301 git_thread_join(&target->thread, NULL);
1302 git_cond_free(&target->cond);
1303 git_mutex_free(&target->mutex);
1304 active_threads--;
1305 }
1306 }
1307
1308 git__free(p);
1309 return 0;
1310 }
1311
1312 #else
1313 #define ll_find_deltas(pb, l, ls, w, d) find_deltas(pb, l, &ls, w, d)
1314 #endif
1315
1316 static int prepare_pack(git_packbuilder *pb)
1317 {
1318 git_pobject **delta_list;
1319 size_t i, n = 0;
1320
1321 if (pb->nr_objects == 0 || pb->done)
1322 return 0; /* nothing to do */
1323
1324 /*
1325 * Although we do not report progress during deltafication, we
1326 * at least report that we are in the deltafication stage
1327 */
1328 if (pb->progress_cb)
1329 pb->progress_cb(GIT_PACKBUILDER_DELTAFICATION, 0, pb->nr_objects, pb->progress_cb_payload);
1330
1331 delta_list = git__mallocarray(pb->nr_objects, sizeof(*delta_list));
1332 GIT_ERROR_CHECK_ALLOC(delta_list);
1333
1334 for (i = 0; i < pb->nr_objects; ++i) {
1335 git_pobject *po = pb->object_list + i;
1336
1337 /* Make sure the item is within our size limits */
1338 if (po->size < 50 || po->size > pb->big_file_threshold)
1339 continue;
1340
1341 delta_list[n++] = po;
1342 }
1343
1344 if (n > 1) {
1345 git__tsort((void **)delta_list, n, type_size_sort);
1346 if (ll_find_deltas(pb, delta_list, n,
1347 GIT_PACK_WINDOW + 1,
1348 GIT_PACK_DEPTH) < 0) {
1349 git__free(delta_list);
1350 return -1;
1351 }
1352 }
1353
1354 report_delta_progress(pb, pb->nr_objects, true);
1355
1356 pb->done = true;
1357 git__free(delta_list);
1358 return 0;
1359 }
1360
1361 #define PREPARE_PACK if (prepare_pack(pb) < 0) { return -1; }
1362
1363 int git_packbuilder_foreach(git_packbuilder *pb, int (*cb)(void *buf, size_t size, void *payload), void *payload)
1364 {
1365 PREPARE_PACK;
1366 return write_pack(pb, cb, payload);
1367 }
1368
1369 int git_packbuilder_write_buf(git_buf *buf, git_packbuilder *pb)
1370 {
1371 PREPARE_PACK;
1372 git_buf_sanitize(buf);
1373 return write_pack(pb, &write_pack_buf, buf);
1374 }
1375
1376 static int write_cb(void *buf, size_t len, void *payload)
1377 {
1378 struct pack_write_context *ctx = payload;
1379 return git_indexer_append(ctx->indexer, buf, len, ctx->stats);
1380 }
1381
1382 int git_packbuilder_write(
1383 git_packbuilder *pb,
1384 const char *path,
1385 unsigned int mode,
1386 git_transfer_progress_cb progress_cb,
1387 void *progress_cb_payload)
1388 {
1389 git_indexer_options opts = GIT_INDEXER_OPTIONS_INIT;
1390 git_indexer *indexer;
1391 git_transfer_progress stats;
1392 struct pack_write_context ctx;
1393 int t;
1394
1395 PREPARE_PACK;
1396
1397 opts.progress_cb = progress_cb;
1398 opts.progress_cb_payload = progress_cb_payload;
1399
1400 if (git_indexer_new(
1401 &indexer, path, mode, pb->odb, &opts) < 0)
1402 return -1;
1403
1404 if (!git_repository__cvar(&t, pb->repo, GIT_CVAR_FSYNCOBJECTFILES) && t)
1405 git_indexer__set_fsync(indexer, 1);
1406
1407 ctx.indexer = indexer;
1408 ctx.stats = &stats;
1409
1410 if (git_packbuilder_foreach(pb, write_cb, &ctx) < 0 ||
1411 git_indexer_commit(indexer, &stats) < 0) {
1412 git_indexer_free(indexer);
1413 return -1;
1414 }
1415
1416 git_oid_cpy(&pb->pack_oid, git_indexer_hash(indexer));
1417
1418 git_indexer_free(indexer);
1419 return 0;
1420 }
1421
1422 #undef PREPARE_PACK
1423
1424 const git_oid *git_packbuilder_hash(git_packbuilder *pb)
1425 {
1426 return &pb->pack_oid;
1427 }
1428
1429
1430 static int cb_tree_walk(
1431 const char *root, const git_tree_entry *entry, void *payload)
1432 {
1433 int error;
1434 struct tree_walk_context *ctx = payload;
1435
1436 /* A commit inside a tree represents a submodule commit and should be skipped. */
1437 if (git_tree_entry_type(entry) == GIT_OBJECT_COMMIT)
1438 return 0;
1439
1440 if (!(error = git_buf_sets(&ctx->buf, root)) &&
1441 !(error = git_buf_puts(&ctx->buf, git_tree_entry_name(entry))))
1442 error = git_packbuilder_insert(
1443 ctx->pb, git_tree_entry_id(entry), git_buf_cstr(&ctx->buf));
1444
1445 return error;
1446 }
1447
1448 int git_packbuilder_insert_commit(git_packbuilder *pb, const git_oid *oid)
1449 {
1450 git_commit *commit;
1451
1452 if (git_commit_lookup(&commit, pb->repo, oid) < 0 ||
1453 git_packbuilder_insert(pb, oid, NULL) < 0)
1454 return -1;
1455
1456 if (git_packbuilder_insert_tree(pb, git_commit_tree_id(commit)) < 0)
1457 return -1;
1458
1459 git_commit_free(commit);
1460 return 0;
1461 }
1462
1463 int git_packbuilder_insert_tree(git_packbuilder *pb, const git_oid *oid)
1464 {
1465 int error;
1466 git_tree *tree = NULL;
1467 struct tree_walk_context context = { pb, GIT_BUF_INIT };
1468
1469 if (!(error = git_tree_lookup(&tree, pb->repo, oid)) &&
1470 !(error = git_packbuilder_insert(pb, oid, NULL)))
1471 error = git_tree_walk(tree, GIT_TREEWALK_PRE, cb_tree_walk, &context);
1472
1473 git_tree_free(tree);
1474 git_buf_dispose(&context.buf);
1475 return error;
1476 }
1477
1478 int git_packbuilder_insert_recur(git_packbuilder *pb, const git_oid *id, const char *name)
1479 {
1480 git_object *obj;
1481 int error;
1482
1483 assert(pb && id);
1484
1485 if ((error = git_object_lookup(&obj, pb->repo, id, GIT_OBJECT_ANY)) < 0)
1486 return error;
1487
1488 switch (git_object_type(obj)) {
1489 case GIT_OBJECT_BLOB:
1490 error = git_packbuilder_insert(pb, id, name);
1491 break;
1492 case GIT_OBJECT_TREE:
1493 error = git_packbuilder_insert_tree(pb, id);
1494 break;
1495 case GIT_OBJECT_COMMIT:
1496 error = git_packbuilder_insert_commit(pb, id);
1497 break;
1498 case GIT_OBJECT_TAG:
1499 if ((error = git_packbuilder_insert(pb, id, name)) < 0)
1500 goto cleanup;
1501 error = git_packbuilder_insert_recur(pb, git_tag_target_id((git_tag *) obj), NULL);
1502 break;
1503
1504 default:
1505 git_error_set(GIT_ERROR_INVALID, "unknown object type");
1506 error = -1;
1507 }
1508
1509 cleanup:
1510 git_object_free(obj);
1511 return error;
1512 }
1513
1514 size_t git_packbuilder_object_count(git_packbuilder *pb)
1515 {
1516 return pb->nr_objects;
1517 }
1518
1519 size_t git_packbuilder_written(git_packbuilder *pb)
1520 {
1521 return pb->nr_written;
1522 }
1523
1524 static int lookup_walk_object(struct walk_object **out, git_packbuilder *pb, const git_oid *id)
1525 {
1526 struct walk_object *obj;
1527
1528 obj = git_pool_mallocz(&pb->object_pool, 1);
1529 if (!obj) {
1530 git_error_set_oom();
1531 return -1;
1532 }
1533
1534 git_oid_cpy(&obj->id, id);
1535
1536 *out = obj;
1537 return 0;
1538 }
1539
1540 static int retrieve_object(struct walk_object **out, git_packbuilder *pb, const git_oid *id)
1541 {
1542 int error;
1543 size_t pos;
1544 struct walk_object *obj;
1545
1546 pos = git_oidmap_lookup_index(pb->walk_objects, id);
1547 if (git_oidmap_valid_index(pb->walk_objects, pos)) {
1548 obj = git_oidmap_value_at(pb->walk_objects, pos);
1549 } else {
1550 if ((error = lookup_walk_object(&obj, pb, id)) < 0)
1551 return error;
1552
1553 git_oidmap_insert(pb->walk_objects, &obj->id, obj, &error);
1554 }
1555
1556 *out = obj;
1557 return 0;
1558 }
1559
1560 static int mark_blob_uninteresting(git_packbuilder *pb, const git_oid *id)
1561 {
1562 int error;
1563 struct walk_object *obj;
1564
1565 if ((error = retrieve_object(&obj, pb, id)) < 0)
1566 return error;
1567
1568 obj->uninteresting = 1;
1569
1570 return 0;
1571 }
1572
1573 static int mark_tree_uninteresting(git_packbuilder *pb, const git_oid *id)
1574 {
1575 struct walk_object *obj;
1576 git_tree *tree;
1577 int error;
1578 size_t i;
1579
1580 if ((error = retrieve_object(&obj, pb, id)) < 0)
1581 return error;
1582
1583 if (obj->uninteresting)
1584 return 0;
1585
1586 obj->uninteresting = 1;
1587
1588 if ((error = git_tree_lookup(&tree, pb->repo, id)) < 0)
1589 return error;
1590
1591 for (i = 0; i < git_tree_entrycount(tree); i++) {
1592 const git_tree_entry *entry = git_tree_entry_byindex(tree, i);
1593 const git_oid *entry_id = git_tree_entry_id(entry);
1594 switch (git_tree_entry_type(entry)) {
1595 case GIT_OBJECT_TREE:
1596 if ((error = mark_tree_uninteresting(pb, entry_id)) < 0)
1597 goto cleanup;
1598 break;
1599 case GIT_OBJECT_BLOB:
1600 if ((error = mark_blob_uninteresting(pb, entry_id)) < 0)
1601 goto cleanup;
1602 break;
1603 default:
1604 /* it's a submodule or something unknown, we don't want it */
1605 ;
1606 }
1607 }
1608
1609 cleanup:
1610 git_tree_free(tree);
1611 return error;
1612 }
1613
1614 /*
1615 * Mark the edges of the graph uninteresting. Since we start from a
1616 * git_revwalk, the commits are already uninteresting, but we need to
1617 * mark the trees and blobs.
1618 */
1619 static int mark_edges_uninteresting(git_packbuilder *pb, git_commit_list *commits)
1620 {
1621 int error;
1622 git_commit_list *list;
1623 git_commit *commit;
1624
1625 for (list = commits; list; list = list->next) {
1626 if (!list->item->uninteresting)
1627 continue;
1628
1629 if ((error = git_commit_lookup(&commit, pb->repo, &list->item->oid)) < 0)
1630 return error;
1631
1632 error = mark_tree_uninteresting(pb, git_commit_tree_id(commit));
1633 git_commit_free(commit);
1634
1635 if (error < 0)
1636 return error;
1637 }
1638
1639 return 0;
1640 }
1641
1642 int insert_tree(git_packbuilder *pb, git_tree *tree)
1643 {
1644 size_t i;
1645 int error;
1646 git_tree *subtree;
1647 struct walk_object *obj;
1648 const char *name;
1649
1650 if ((error = retrieve_object(&obj, pb, git_tree_id(tree))) < 0)
1651 return error;
1652
1653 if (obj->seen || obj->uninteresting)
1654 return 0;
1655
1656 obj->seen = 1;
1657
1658 if ((error = git_packbuilder_insert(pb, &obj->id, NULL)))
1659 return error;
1660
1661 for (i = 0; i < git_tree_entrycount(tree); i++) {
1662 const git_tree_entry *entry = git_tree_entry_byindex(tree, i);
1663 const git_oid *entry_id = git_tree_entry_id(entry);
1664 switch (git_tree_entry_type(entry)) {
1665 case GIT_OBJECT_TREE:
1666 if ((error = git_tree_lookup(&subtree, pb->repo, entry_id)) < 0)
1667 return error;
1668
1669 error = insert_tree(pb, subtree);
1670 git_tree_free(subtree);
1671
1672 if (error < 0)
1673 return error;
1674
1675 break;
1676 case GIT_OBJECT_BLOB:
1677 if ((error = retrieve_object(&obj, pb, entry_id)) < 0)
1678 return error;
1679 if (obj->uninteresting)
1680 continue;
1681 name = git_tree_entry_name(entry);
1682 if ((error = git_packbuilder_insert(pb, entry_id, name)) < 0)
1683 return error;
1684 break;
1685 default:
1686 /* it's a submodule or something unknown, we don't want it */
1687 ;
1688 }
1689 }
1690
1691
1692 return error;
1693 }
1694
1695 int insert_commit(git_packbuilder *pb, struct walk_object *obj)
1696 {
1697 int error;
1698 git_commit *commit = NULL;
1699 git_tree *tree = NULL;
1700
1701 obj->seen = 1;
1702
1703 if ((error = git_packbuilder_insert(pb, &obj->id, NULL)) < 0)
1704 return error;
1705
1706 if ((error = git_commit_lookup(&commit, pb->repo, &obj->id)) < 0)
1707 return error;
1708
1709 if ((error = git_tree_lookup(&tree, pb->repo, git_commit_tree_id(commit))) < 0)
1710 goto cleanup;
1711
1712 if ((error = insert_tree(pb, tree)) < 0)
1713 goto cleanup;
1714
1715 cleanup:
1716 git_commit_free(commit);
1717 git_tree_free(tree);
1718 return error;
1719 }
1720
1721 int git_packbuilder_insert_walk(git_packbuilder *pb, git_revwalk *walk)
1722 {
1723 int error;
1724 git_oid id;
1725 struct walk_object *obj;
1726
1727 assert(pb && walk);
1728
1729 if ((error = mark_edges_uninteresting(pb, walk->user_input)) < 0)
1730 return error;
1731
1732 /*
1733 * TODO: git marks the parents of the edges
1734 * uninteresting. This may provide a speed advantage, but does
1735 * seem to assume the remote does not have a single-commit
1736 * history on the other end.
1737 */
1738
1739 /* walk down each tree up to the blobs and insert them, stopping when uninteresting */
1740 while ((error = git_revwalk_next(&id, walk)) == 0) {
1741 if ((error = retrieve_object(&obj, pb, &id)) < 0)
1742 return error;
1743
1744 if (obj->seen || obj->uninteresting)
1745 continue;
1746
1747 if ((error = insert_commit(pb, obj)) < 0)
1748 return error;
1749 }
1750
1751 if (error == GIT_ITEROVER)
1752 error = 0;
1753
1754 return error;
1755 }
1756
1757 int git_packbuilder_set_callbacks(git_packbuilder *pb, git_packbuilder_progress progress_cb, void *progress_cb_payload)
1758 {
1759 if (!pb)
1760 return -1;
1761
1762 pb->progress_cb = progress_cb;
1763 pb->progress_cb_payload = progress_cb_payload;
1764
1765 return 0;
1766 }
1767
1768 void git_packbuilder_free(git_packbuilder *pb)
1769 {
1770 if (pb == NULL)
1771 return;
1772
1773 #ifdef GIT_THREADS
1774
1775 git_mutex_free(&pb->cache_mutex);
1776 git_mutex_free(&pb->progress_mutex);
1777 git_cond_free(&pb->progress_cond);
1778
1779 #endif
1780
1781 if (pb->odb)
1782 git_odb_free(pb->odb);
1783
1784 if (pb->object_ix)
1785 git_oidmap_free(pb->object_ix);
1786
1787 if (pb->object_list)
1788 git__free(pb->object_list);
1789
1790 git_oidmap_free(pb->walk_objects);
1791 git_pool_clear(&pb->object_pool);
1792
1793 git_hash_ctx_cleanup(&pb->ctx);
1794 git_zstream_free(&pb->zstream);
1795
1796 git__free(pb);
1797 }