2 * Copyright (C) the libgit2 contributors. All rights reserved.
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
8 #include "pack-objects.h"
15 #include "thread-utils.h"
19 #include "commit_list.h"
21 #include "git2/pack.h"
22 #include "git2/commit.h"
24 #include "git2/indexer.h"
25 #include "git2/config.h"
30 struct git_delta_index
*index
;
34 struct tree_walk_context
{
39 struct pack_write_context
{
41 git_transfer_progress
*stats
;
48 #define GIT_PACKBUILDER__MUTEX_OP(pb, mtx, op) do { \
49 int result = git_mutex_##op(&(pb)->mtx); \
56 #define GIT_PACKBUILDER__MUTEX_OP(pb,mtx,op) GIT_UNUSED(pb)
58 #endif /* GIT_THREADS */
60 #define git_packbuilder__cache_lock(pb) GIT_PACKBUILDER__MUTEX_OP(pb, cache_mutex, lock)
61 #define git_packbuilder__cache_unlock(pb) GIT_PACKBUILDER__MUTEX_OP(pb, cache_mutex, unlock)
62 #define git_packbuilder__progress_lock(pb) GIT_PACKBUILDER__MUTEX_OP(pb, progress_mutex, lock)
63 #define git_packbuilder__progress_unlock(pb) GIT_PACKBUILDER__MUTEX_OP(pb, progress_mutex, unlock)
65 /* The minimal interval between progress updates (in seconds). */
66 #define MIN_PROGRESS_UPDATE_INTERVAL 0.5
68 /* Size of the buffer to feed to zlib */
69 #define COMPRESS_BUFLEN (1024 * 1024)
71 static unsigned name_hash(const char *name
)
79 * This effectively just creates a sortable number from the
80 * last sixteen non-whitespace characters. Last characters
81 * count "most", so things that end in ".c" sort together.
83 while ((c
= *name
++) != 0) {
86 hash
= (hash
>> 2) + (c
<< 24);
91 static int packbuilder_config(git_packbuilder
*pb
)
97 if ((ret
= git_repository_config_snapshot(&config
, pb
->repo
)) < 0)
100 #define config_get(KEY,DST,DFLT) do { \
101 ret = git_config_get_int64(&val, config, KEY); \
102 if (!ret) (DST) = val; \
103 else if (ret == GIT_ENOTFOUND) (DST) = (DFLT); \
104 else if (ret < 0) return -1; } while (0)
106 config_get("pack.deltaCacheSize", pb
->max_delta_cache_size
,
107 GIT_PACK_DELTA_CACHE_SIZE
);
108 config_get("pack.deltaCacheLimit", pb
->cache_max_small_delta_size
,
109 GIT_PACK_DELTA_CACHE_LIMIT
);
110 config_get("pack.deltaCacheSize", pb
->big_file_threshold
,
111 GIT_PACK_BIG_FILE_THRESHOLD
);
112 config_get("pack.windowMemory", pb
->window_memory_limit
, 0);
116 git_config_free(config
);
121 int git_packbuilder_new(git_packbuilder
**out
, git_repository
*repo
)
127 pb
= git__calloc(1, sizeof(*pb
));
128 GITERR_CHECK_ALLOC(pb
);
130 pb
->object_ix
= git_oidmap_alloc();
134 pb
->walk_objects
= git_oidmap_alloc();
135 if (!pb
->walk_objects
)
138 if (git_pool_init(&pb
->object_pool
, sizeof(git_walk_object
), 0) < 0)
142 pb
->nr_threads
= 1; /* do not spawn any thread by default */
144 if (git_hash_ctx_init(&pb
->ctx
) < 0 ||
145 git_zstream_init(&pb
->zstream
) < 0 ||
146 git_repository_odb(&pb
->odb
, repo
) < 0 ||
147 packbuilder_config(pb
) < 0)
152 if (git_mutex_init(&pb
->cache_mutex
) ||
153 git_mutex_init(&pb
->progress_mutex
) ||
154 git_cond_init(&pb
->progress_cond
))
156 giterr_set(GITERR_OS
, "Failed to initialize packbuilder mutex");
166 git_packbuilder_free(pb
);
170 unsigned int git_packbuilder_set_threads(git_packbuilder
*pb
, unsigned int n
)
178 assert(1 == pb
->nr_threads
);
181 return pb
->nr_threads
;
184 static void rehash(git_packbuilder
*pb
)
191 kh_clear(oid
, pb
->object_ix
);
192 for (i
= 0, po
= pb
->object_list
; i
< pb
->nr_objects
; i
++, po
++) {
193 pos
= kh_put(oid
, pb
->object_ix
, &po
->id
, &ret
);
194 kh_value(pb
->object_ix
, pos
) = po
;
198 int git_packbuilder_insert(git_packbuilder
*pb
, const git_oid
*oid
,
208 /* If the object already exists in the hash table, then we don't
209 * have any work to do */
210 pos
= kh_get(oid
, pb
->object_ix
, oid
);
211 if (pos
!= kh_end(pb
->object_ix
))
214 if (pb
->nr_objects
>= pb
->nr_alloc
) {
215 GITERR_CHECK_ALLOC_ADD(&newsize
, pb
->nr_alloc
, 1024);
216 GITERR_CHECK_ALLOC_MULTIPLY(&newsize
, newsize
, 3 / 2);
218 if (!git__is_uint32(newsize
)) {
219 giterr_set(GITERR_NOMEMORY
, "Packfile too large to fit in memory.");
223 pb
->nr_alloc
= (uint32_t)newsize
;
225 pb
->object_list
= git__reallocarray(pb
->object_list
,
226 pb
->nr_alloc
, sizeof(*po
));
227 GITERR_CHECK_ALLOC(pb
->object_list
);
231 po
= pb
->object_list
+ pb
->nr_objects
;
232 memset(po
, 0x0, sizeof(*po
));
234 if ((ret
= git_odb_read_header(&po
->size
, &po
->type
, pb
->odb
, oid
)) < 0)
238 git_oid_cpy(&po
->id
, oid
);
239 po
->hash
= name_hash(name
);
241 pos
= kh_put(oid
, pb
->object_ix
, &po
->id
, &ret
);
247 kh_value(pb
->object_ix
, pos
) = po
;
251 if (pb
->progress_cb
) {
252 double current_time
= git__timer();
253 double elapsed
= current_time
- pb
->last_progress_report_time
;
255 if (elapsed
>= MIN_PROGRESS_UPDATE_INTERVAL
) {
256 pb
->last_progress_report_time
= current_time
;
258 ret
= pb
->progress_cb(
259 GIT_PACKBUILDER_ADDING_OBJECTS
,
260 pb
->nr_objects
, 0, pb
->progress_cb_payload
);
263 return giterr_set_after_callback(ret
);
270 static int get_delta(void **out
, git_odb
*odb
, git_pobject
*po
)
272 git_odb_object
*src
= NULL
, *trg
= NULL
;
273 unsigned long delta_size
;
278 if (git_odb_read(&src
, odb
, &po
->delta
->id
) < 0 ||
279 git_odb_read(&trg
, odb
, &po
->id
) < 0)
282 delta_buf
= git_delta(
283 git_odb_object_data(src
), (unsigned long)git_odb_object_size(src
),
284 git_odb_object_data(trg
), (unsigned long)git_odb_object_size(trg
),
287 if (!delta_buf
|| delta_size
!= po
->delta_size
) {
288 giterr_set(GITERR_INVALID
, "Delta size changed");
294 git_odb_object_free(src
);
295 git_odb_object_free(trg
);
299 git_odb_object_free(src
);
300 git_odb_object_free(trg
);
304 static int write_object(
307 int (*write_cb
)(void *buf
, size_t size
, void *cb_data
),
310 git_odb_object
*obj
= NULL
;
312 unsigned char hdr
[10], *zbuf
= NULL
;
314 size_t hdr_len
, zbuf_len
= COMPRESS_BUFLEN
, data_len
;
318 * If we have a delta base, let's use the delta to save space.
319 * Otherwise load the whole object. 'data' ends up pointing to
320 * whatever data we want to put into the packfile.
324 data
= po
->delta_data
;
325 else if ((error
= get_delta(&data
, pb
->odb
, po
)) < 0)
328 data_len
= po
->delta_size
;
329 type
= GIT_OBJ_REF_DELTA
;
331 if ((error
= git_odb_read(&obj
, pb
->odb
, &po
->id
)) < 0)
334 data
= (void *)git_odb_object_data(obj
);
335 data_len
= git_odb_object_size(obj
);
336 type
= git_odb_object_type(obj
);
340 hdr_len
= git_packfile__object_header(hdr
, data_len
, type
);
342 if ((error
= write_cb(hdr
, hdr_len
, cb_data
)) < 0 ||
343 (error
= git_hash_update(&pb
->ctx
, hdr
, hdr_len
)) < 0)
346 if (type
== GIT_OBJ_REF_DELTA
) {
347 if ((error
= write_cb(po
->delta
->id
.id
, GIT_OID_RAWSZ
, cb_data
)) < 0 ||
348 (error
= git_hash_update(&pb
->ctx
, po
->delta
->id
.id
, GIT_OID_RAWSZ
)) < 0)
353 if (po
->z_delta_size
) {
354 data_len
= po
->z_delta_size
;
356 if ((error
= write_cb(data
, data_len
, cb_data
)) < 0 ||
357 (error
= git_hash_update(&pb
->ctx
, data
, data_len
)) < 0)
360 zbuf
= git__malloc(zbuf_len
);
361 GITERR_CHECK_ALLOC(zbuf
);
363 git_zstream_reset(&pb
->zstream
);
364 git_zstream_set_input(&pb
->zstream
, data
, data_len
);
366 while (!git_zstream_done(&pb
->zstream
)) {
367 if ((error
= git_zstream_get_output(zbuf
, &zbuf_len
, &pb
->zstream
)) < 0 ||
368 (error
= write_cb(zbuf
, zbuf_len
, cb_data
)) < 0 ||
369 (error
= git_hash_update(&pb
->ctx
, zbuf
, zbuf_len
)) < 0)
372 zbuf_len
= COMPRESS_BUFLEN
; /* reuse buffer */
377 * If po->delta is true, data is a delta and it is our
378 * responsibility to free it (otherwise it's a git_object's
379 * data). We set po->delta_data to NULL in case we got the
380 * data from there instead of get_delta(). If we didn't,
385 po
->delta_data
= NULL
;
392 git_odb_object_free(obj
);
396 enum write_one_status
{
397 WRITE_ONE_SKIP
= -1, /* already written */
398 WRITE_ONE_BREAK
= 0, /* writing this will bust the limit; not written */
399 WRITE_ONE_WRITTEN
= 1, /* normal */
400 WRITE_ONE_RECURSIVE
= 2 /* already scheduled to be written */
403 static int write_one(
404 enum write_one_status
*status
,
407 int (*write_cb
)(void *buf
, size_t size
, void *cb_data
),
413 *status
= WRITE_ONE_RECURSIVE
;
415 } else if (po
->written
) {
416 *status
= WRITE_ONE_SKIP
;
423 if ((error
= write_one(status
, pb
, po
->delta
, write_cb
, cb_data
)) < 0)
426 /* we cannot depend on this one */
427 if (*status
== WRITE_ONE_RECURSIVE
)
431 *status
= WRITE_ONE_WRITTEN
;
435 return write_object(pb
, po
, write_cb
, cb_data
);
438 GIT_INLINE(void) add_to_write_order(git_pobject
**wo
, unsigned int *endp
,
447 static void add_descendants_to_write_order(git_pobject
**wo
, unsigned int *endp
,
450 int add_to_order
= 1;
454 /* add this node... */
455 add_to_write_order(wo
, endp
, po
);
456 /* all its siblings... */
457 for (s
= po
->delta_sibling
; s
; s
= s
->delta_sibling
) {
458 add_to_write_order(wo
, endp
, s
);
461 /* drop down a level to add left subtree nodes if possible */
462 if (po
->delta_child
) {
464 po
= po
->delta_child
;
467 /* our sibling might have some children, it is next */
468 if (po
->delta_sibling
) {
469 po
= po
->delta_sibling
;
472 /* go back to our parent node */
474 while (po
&& !po
->delta_sibling
) {
475 /* we're on the right side of a subtree, keep
476 * going up until we can go right again */
480 /* done- we hit our original root node */
483 /* pass it off to sibling at this level */
484 po
= po
->delta_sibling
;
489 static void add_family_to_write_order(git_pobject
**wo
, unsigned int *endp
,
494 for (root
= po
; root
->delta
; root
= root
->delta
)
496 add_descendants_to_write_order(wo
, endp
, root
);
499 static int cb_tag_foreach(const char *name
, git_oid
*oid
, void *data
)
501 git_packbuilder
*pb
= data
;
507 pos
= kh_get(oid
, pb
->object_ix
, oid
);
508 if (pos
== kh_end(pb
->object_ix
))
511 po
= kh_value(pb
->object_ix
, pos
);
514 /* TODO: peel objects */
519 static git_pobject
**compute_write_order(git_packbuilder
*pb
)
521 unsigned int i
, wo_end
, last_untagged
;
524 if ((wo
= git__mallocarray(pb
->nr_objects
, sizeof(*wo
))) == NULL
)
527 for (i
= 0; i
< pb
->nr_objects
; i
++) {
528 git_pobject
*po
= pb
->object_list
+ i
;
531 po
->delta_child
= NULL
;
532 po
->delta_sibling
= NULL
;
536 * Fully connect delta_child/delta_sibling network.
537 * Make sure delta_sibling is sorted in the original
540 for (i
= pb
->nr_objects
; i
> 0;) {
541 git_pobject
*po
= &pb
->object_list
[--i
];
544 /* Mark me as the first child */
545 po
->delta_sibling
= po
->delta
->delta_child
;
546 po
->delta
->delta_child
= po
;
550 * Mark objects that are at the tip of tags.
552 if (git_tag_foreach(pb
->repo
, &cb_tag_foreach
, pb
) < 0) {
558 * Give the objects in the original recency order until
559 * we see a tagged tip.
561 for (i
= wo_end
= 0; i
< pb
->nr_objects
; i
++) {
562 git_pobject
*po
= pb
->object_list
+ i
;
565 add_to_write_order(wo
, &wo_end
, po
);
570 * Then fill all the tagged tips.
572 for (; i
< pb
->nr_objects
; i
++) {
573 git_pobject
*po
= pb
->object_list
+ i
;
575 add_to_write_order(wo
, &wo_end
, po
);
579 * And then all remaining commits and tags.
581 for (i
= last_untagged
; i
< pb
->nr_objects
; i
++) {
582 git_pobject
*po
= pb
->object_list
+ i
;
583 if (po
->type
!= GIT_OBJ_COMMIT
&&
584 po
->type
!= GIT_OBJ_TAG
)
586 add_to_write_order(wo
, &wo_end
, po
);
590 * And then all the trees.
592 for (i
= last_untagged
; i
< pb
->nr_objects
; i
++) {
593 git_pobject
*po
= pb
->object_list
+ i
;
594 if (po
->type
!= GIT_OBJ_TREE
)
596 add_to_write_order(wo
, &wo_end
, po
);
600 * Finally all the rest in really tight order
602 for (i
= last_untagged
; i
< pb
->nr_objects
; i
++) {
603 git_pobject
*po
= pb
->object_list
+ i
;
605 add_family_to_write_order(wo
, &wo_end
, po
);
608 if (wo_end
!= pb
->nr_objects
) {
609 giterr_set(GITERR_INVALID
, "invalid write order");
616 static int write_pack(git_packbuilder
*pb
,
617 int (*write_cb
)(void *buf
, size_t size
, void *cb_data
),
620 git_pobject
**write_order
;
622 enum write_one_status status
;
623 struct git_pack_header ph
;
628 write_order
= compute_write_order(pb
);
629 if (write_order
== NULL
) {
634 /* Write pack header */
635 ph
.hdr_signature
= htonl(PACK_SIGNATURE
);
636 ph
.hdr_version
= htonl(PACK_VERSION
);
637 ph
.hdr_entries
= htonl(pb
->nr_objects
);
639 if ((error
= write_cb(&ph
, sizeof(ph
), cb_data
)) < 0 ||
640 (error
= git_hash_update(&pb
->ctx
, &ph
, sizeof(ph
))) < 0)
643 pb
->nr_remaining
= pb
->nr_objects
;
646 for ( ; i
< pb
->nr_objects
; ++i
) {
649 if ((error
= write_one(&status
, pb
, po
, write_cb
, cb_data
)) < 0)
653 pb
->nr_remaining
-= pb
->nr_written
;
654 } while (pb
->nr_remaining
&& i
< pb
->nr_objects
);
656 if ((error
= git_hash_final(&entry_oid
, &pb
->ctx
)) < 0)
659 error
= write_cb(entry_oid
.id
, GIT_OID_RAWSZ
, cb_data
);
662 /* if callback cancelled writing, we must still free delta_data */
663 for ( ; i
< pb
->nr_objects
; ++i
) {
665 if (po
->delta_data
) {
666 git__free(po
->delta_data
);
667 po
->delta_data
= NULL
;
671 git__free(write_order
);
675 static int write_pack_buf(void *buf
, size_t size
, void *data
)
677 git_buf
*b
= (git_buf
*)data
;
678 return git_buf_put(b
, buf
, size
);
681 static int type_size_sort(const void *_a
, const void *_b
)
683 const git_pobject
*a
= (git_pobject
*)_a
;
684 const git_pobject
*b
= (git_pobject
*)_b
;
686 if (a
->type
> b
->type
)
688 if (a
->type
< b
->type
)
690 if (a
->hash
> b
->hash
)
692 if (a
->hash
< b
->hash
)
697 if (a->preferred_base > b->preferred_base)
699 if (a->preferred_base < b->preferred_base)
702 if (a
->size
> b
->size
)
704 if (a
->size
< b
->size
)
706 return a
< b
? -1 : (a
> b
); /* newest first */
709 static int delta_cacheable(git_packbuilder
*pb
, unsigned long src_size
,
710 unsigned long trg_size
, unsigned long delta_size
)
712 if (pb
->max_delta_cache_size
&&
713 pb
->delta_cache_size
+ delta_size
> pb
->max_delta_cache_size
)
716 if (delta_size
< pb
->cache_max_small_delta_size
)
719 /* cache delta, if objects are large enough compared to delta size */
720 if ((src_size
>> 20) + (trg_size
>> 21) > (delta_size
>> 10))
726 static int try_delta(git_packbuilder
*pb
, struct unpacked
*trg
,
727 struct unpacked
*src
, int max_depth
,
728 unsigned long *mem_usage
, int *ret
)
730 git_pobject
*trg_object
= trg
->object
;
731 git_pobject
*src_object
= src
->object
;
733 unsigned long trg_size
, src_size
, delta_size
,
734 sizediff
, max_size
, sz
;
735 unsigned int ref_depth
;
738 /* Don't bother doing diffs between different types */
739 if (trg_object
->type
!= src_object
->type
) {
746 /* TODO: support reuse-delta */
748 /* Let's not bust the allowed depth. */
749 if (src
->depth
>= max_depth
)
752 /* Now some size filtering heuristics. */
753 trg_size
= (unsigned long)trg_object
->size
;
754 if (!trg_object
->delta
) {
755 max_size
= trg_size
/2 - 20;
758 max_size
= trg_object
->delta_size
;
759 ref_depth
= trg
->depth
;
762 max_size
= (uint64_t)max_size
* (max_depth
- src
->depth
) /
763 (max_depth
- ref_depth
+ 1);
767 src_size
= (unsigned long)src_object
->size
;
768 sizediff
= src_size
< trg_size
? trg_size
- src_size
: 0;
769 if (sizediff
>= max_size
)
771 if (trg_size
< src_size
/ 32)
774 /* Load data if not already done */
776 if (git_odb_read(&obj
, pb
->odb
, &trg_object
->id
) < 0)
779 sz
= (unsigned long)git_odb_object_size(obj
);
780 trg
->data
= git__malloc(sz
);
781 GITERR_CHECK_ALLOC(trg
->data
);
782 memcpy(trg
->data
, git_odb_object_data(obj
), sz
);
784 git_odb_object_free(obj
);
786 if (sz
!= trg_size
) {
787 giterr_set(GITERR_INVALID
,
788 "Inconsistent target object length");
797 if (git_odb_read(&obj
, pb
->odb
, &src_object
->id
) < 0 ||
798 !git__is_ulong(obj_sz
= git_odb_object_size(obj
)))
801 sz
= (unsigned long)obj_sz
;
802 src
->data
= git__malloc(sz
);
803 GITERR_CHECK_ALLOC(src
->data
);
804 memcpy(src
->data
, git_odb_object_data(obj
), sz
);
806 git_odb_object_free(obj
);
808 if (sz
!= src_size
) {
809 giterr_set(GITERR_INVALID
,
810 "Inconsistent source object length");
817 src
->index
= git_delta_create_index(src
->data
, src_size
);
819 return 0; /* suboptimal pack - out of memory */
821 *mem_usage
+= git_delta_sizeof_index(src
->index
);
824 delta_buf
= git_delta_create(src
->index
, trg
->data
, trg_size
,
825 &delta_size
, max_size
);
829 if (trg_object
->delta
) {
830 /* Prefer only shallower same-sized deltas. */
831 if (delta_size
== trg_object
->delta_size
&&
832 src
->depth
+ 1 >= trg
->depth
) {
833 git__free(delta_buf
);
838 git_packbuilder__cache_lock(pb
);
839 if (trg_object
->delta_data
) {
840 git__free(trg_object
->delta_data
);
841 pb
->delta_cache_size
-= trg_object
->delta_size
;
842 trg_object
->delta_data
= NULL
;
844 if (delta_cacheable(pb
, src_size
, trg_size
, delta_size
)) {
845 bool overflow
= git__add_uint64_overflow(
846 &pb
->delta_cache_size
, pb
->delta_cache_size
, delta_size
);
848 git_packbuilder__cache_unlock(pb
);
851 !(trg_object
->delta_data
= git__realloc(delta_buf
, delta_size
)))
854 /* create delta when writing the pack */
855 git_packbuilder__cache_unlock(pb
);
856 git__free(delta_buf
);
859 trg_object
->delta
= src_object
;
860 trg_object
->delta_size
= delta_size
;
861 trg
->depth
= src
->depth
+ 1;
867 static unsigned int check_delta_limit(git_pobject
*me
, unsigned int n
)
869 git_pobject
*child
= me
->delta_child
;
873 unsigned int c
= check_delta_limit(child
, n
+ 1);
876 child
= child
->delta_sibling
;
881 static unsigned long free_unpacked(struct unpacked
*n
)
883 unsigned long freed_mem
= git_delta_sizeof_index(n
->index
);
884 git_delta_free_index(n
->index
);
887 freed_mem
+= (unsigned long)n
->object
->size
;
896 static int report_delta_progress(git_packbuilder
*pb
, uint32_t count
, bool force
)
900 if (pb
->progress_cb
) {
901 double current_time
= git__timer();
902 double elapsed
= current_time
- pb
->last_progress_report_time
;
904 if (force
|| elapsed
>= MIN_PROGRESS_UPDATE_INTERVAL
) {
905 pb
->last_progress_report_time
= current_time
;
907 ret
= pb
->progress_cb(
908 GIT_PACKBUILDER_DELTAFICATION
,
909 count
, pb
->nr_objects
, pb
->progress_cb_payload
);
912 return giterr_set_after_callback(ret
);
919 static int find_deltas(git_packbuilder
*pb
, git_pobject
**list
,
920 unsigned int *list_size
, unsigned int window
,
924 git_buf zbuf
= GIT_BUF_INIT
;
925 struct unpacked
*array
;
926 uint32_t idx
= 0, count
= 0;
927 unsigned long mem_usage
= 0;
931 array
= git__calloc(window
, sizeof(struct unpacked
));
932 GITERR_CHECK_ALLOC(array
);
935 struct unpacked
*n
= array
+ idx
;
936 int max_depth
, j
, best_base
= -1;
938 git_packbuilder__progress_lock(pb
);
940 git_packbuilder__progress_unlock(pb
);
944 pb
->nr_deltified
+= 1;
945 report_delta_progress(pb
, pb
->nr_deltified
, false);
949 git_packbuilder__progress_unlock(pb
);
951 mem_usage
-= free_unpacked(n
);
954 while (pb
->window_memory_limit
&&
955 mem_usage
> pb
->window_memory_limit
&&
957 uint32_t tail
= (idx
+ window
- count
) % window
;
958 mem_usage
-= free_unpacked(array
+ tail
);
963 * If the current object is at pack edge, take the depth the
964 * objects that depend on the current object into account
965 * otherwise they would become too deep.
968 if (po
->delta_child
) {
969 max_depth
-= check_delta_limit(po
, 0);
977 uint32_t other_idx
= idx
+ j
;
980 if (other_idx
>= window
)
983 m
= array
+ other_idx
;
987 if (try_delta(pb
, n
, m
, max_depth
, &mem_usage
, &ret
) < 0)
992 best_base
= other_idx
;
996 * If we decided to cache the delta data, then it is best
997 * to compress it right away. First because we have to do
998 * it anyway, and doing it here while we're threaded will
999 * save a lot of time in the non threaded write phase,
1000 * as well as allow for caching more deltas within
1001 * the same cache size limit.
1003 * But only if not writing to stdout, since in that case
1004 * the network is most likely throttling writes anyway,
1005 * and therefore it is best to go to the write phase ASAP
1006 * instead, as we can afford spending more time compressing
1007 * between writes at that moment.
1009 if (po
->delta_data
) {
1010 if (git_zstream_deflatebuf(&zbuf
, po
->delta_data
, po
->delta_size
) < 0)
1013 git__free(po
->delta_data
);
1014 po
->delta_data
= git__malloc(zbuf
.size
);
1015 GITERR_CHECK_ALLOC(po
->delta_data
);
1017 memcpy(po
->delta_data
, zbuf
.ptr
, zbuf
.size
);
1018 po
->z_delta_size
= (unsigned long)zbuf
.size
;
1019 git_buf_clear(&zbuf
);
1021 git_packbuilder__cache_lock(pb
);
1022 pb
->delta_cache_size
-= po
->delta_size
;
1023 pb
->delta_cache_size
+= po
->z_delta_size
;
1024 git_packbuilder__cache_unlock(pb
);
1028 * If we made n a delta, and if n is already at max
1029 * depth, leaving it in the window is pointless. we
1030 * should evict it first.
1032 if (po
->delta
&& max_depth
<= n
->depth
)
1036 * Move the best delta base up in the window, after the
1037 * currently deltified object, to keep it longer. It will
1038 * be the first base object to be attempted next.
1041 struct unpacked swap
= array
[best_base
];
1042 int dist
= (window
+ idx
- best_base
) % window
;
1043 int dst
= best_base
;
1045 int src
= (dst
+ 1) % window
;
1046 array
[dst
] = array
[src
];
1054 if (count
+ 1 < window
)
1062 for (i
= 0; i
< window
; ++i
) {
1063 git__free(array
[i
].index
);
1064 git__free(array
[i
].data
);
1067 git_buf_free(&zbuf
);
1074 struct thread_params
{
1076 git_packbuilder
*pb
;
1083 unsigned int list_size
;
1084 unsigned int remaining
;
1092 static void *threaded_find_deltas(void *arg
)
1094 struct thread_params
*me
= arg
;
1096 while (me
->remaining
) {
1097 if (find_deltas(me
->pb
, me
->list
, &me
->remaining
,
1098 me
->window
, me
->depth
) < 0) {
1102 git_packbuilder__progress_lock(me
->pb
);
1104 git_cond_signal(&me
->pb
->progress_cond
);
1105 git_packbuilder__progress_unlock(me
->pb
);
1107 if (git_mutex_lock(&me
->mutex
)) {
1108 giterr_set(GITERR_THREAD
, "unable to lock packfile condition mutex");
1112 while (!me
->data_ready
)
1113 git_cond_wait(&me
->cond
, &me
->mutex
);
1116 * We must not set ->data_ready before we wait on the
1117 * condition because the main thread may have set it to 1
1118 * before we get here. In order to be sure that new
1119 * work is available if we see 1 in ->data_ready, it
1120 * was initialized to 0 before this thread was spawned
1121 * and we reset it to 0 right away.
1124 git_mutex_unlock(&me
->mutex
);
1126 /* leave ->working 1 so that this doesn't get more work assigned */
1130 static int ll_find_deltas(git_packbuilder
*pb
, git_pobject
**list
,
1131 unsigned int list_size
, unsigned int window
,
1134 struct thread_params
*p
;
1135 int i
, ret
, active_threads
= 0;
1137 if (!pb
->nr_threads
)
1138 pb
->nr_threads
= git_online_cpus();
1140 if (pb
->nr_threads
<= 1) {
1141 find_deltas(pb
, list
, &list_size
, window
, depth
);
1145 p
= git__mallocarray(pb
->nr_threads
, sizeof(*p
));
1146 GITERR_CHECK_ALLOC(p
);
1148 /* Partition the work among the threads */
1149 for (i
= 0; i
< pb
->nr_threads
; ++i
) {
1150 unsigned sub_size
= list_size
/ (pb
->nr_threads
- i
);
1152 /* don't use too small segments or no deltas will be found */
1153 if (sub_size
< 2*window
&& i
+1 < pb
->nr_threads
)
1157 p
[i
].window
= window
;
1160 p
[i
].data_ready
= 0;
1162 /* try to split chunks on "path" boundaries */
1163 while (sub_size
&& sub_size
< list_size
&&
1164 list
[sub_size
]->hash
&&
1165 list
[sub_size
]->hash
== list
[sub_size
-1]->hash
)
1169 p
[i
].list_size
= sub_size
;
1170 p
[i
].remaining
= sub_size
;
1173 list_size
-= sub_size
;
1176 /* Start work threads */
1177 for (i
= 0; i
< pb
->nr_threads
; ++i
) {
1178 if (!p
[i
].list_size
)
1181 git_mutex_init(&p
[i
].mutex
);
1182 git_cond_init(&p
[i
].cond
);
1184 ret
= git_thread_create(&p
[i
].thread
, NULL
,
1185 threaded_find_deltas
, &p
[i
]);
1187 giterr_set(GITERR_THREAD
, "unable to create thread");
1194 * Now let's wait for work completion. Each time a thread is done
1195 * with its work, we steal half of the remaining work from the
1196 * thread with the largest number of unprocessed objects and give
1197 * it to that newly idle thread. This ensure good load balancing
1198 * until the remaining object list segments are simply too short
1199 * to be worth splitting anymore.
1201 while (active_threads
) {
1202 struct thread_params
*target
= NULL
;
1203 struct thread_params
*victim
= NULL
;
1204 unsigned sub_size
= 0;
1206 /* Start by locating a thread that has transitioned its
1207 * 'working' flag from 1 -> 0. This indicates that it is
1208 * ready to receive more work using our work-stealing
1210 git_packbuilder__progress_lock(pb
);
1212 for (i
= 0; !target
&& i
< pb
->nr_threads
; i
++)
1217 git_cond_wait(&pb
->progress_cond
, &pb
->progress_mutex
);
1220 /* At this point we hold the progress lock and have located
1221 * a thread to receive more work. We still need to locate a
1222 * thread from which to steal work (the victim). */
1223 for (i
= 0; i
< pb
->nr_threads
; i
++)
1224 if (p
[i
].remaining
> 2*window
&&
1225 (!victim
|| victim
->remaining
< p
[i
].remaining
))
1229 sub_size
= victim
->remaining
/ 2;
1230 list
= victim
->list
+ victim
->list_size
- sub_size
;
1231 while (sub_size
&& list
[0]->hash
&&
1232 list
[0]->hash
== list
[-1]->hash
) {
1238 * It is possible for some "paths" to have
1239 * so many objects that no hash boundary
1240 * might be found. Let's just steal the
1241 * exact half in that case.
1243 sub_size
= victim
->remaining
/ 2;
1246 target
->list
= list
;
1247 victim
->list_size
-= sub_size
;
1248 victim
->remaining
-= sub_size
;
1250 target
->list_size
= sub_size
;
1251 target
->remaining
= sub_size
;
1252 target
->working
= 1;
1253 git_packbuilder__progress_unlock(pb
);
1255 if (git_mutex_lock(&target
->mutex
)) {
1256 giterr_set(GITERR_THREAD
, "unable to lock packfile condition mutex");
1261 target
->data_ready
= 1;
1262 git_cond_signal(&target
->cond
);
1263 git_mutex_unlock(&target
->mutex
);
1266 git_thread_join(&target
->thread
, NULL
);
1267 git_cond_free(&target
->cond
);
1268 git_mutex_free(&target
->mutex
);
1278 #define ll_find_deltas(pb, l, ls, w, d) find_deltas(pb, l, &ls, w, d)
1281 static int prepare_pack(git_packbuilder
*pb
)
1283 git_pobject
**delta_list
;
1284 unsigned int i
, n
= 0;
1286 if (pb
->nr_objects
== 0 || pb
->done
)
1287 return 0; /* nothing to do */
1290 * Although we do not report progress during deltafication, we
1291 * at least report that we are in the deltafication stage
1293 if (pb
->progress_cb
)
1294 pb
->progress_cb(GIT_PACKBUILDER_DELTAFICATION
, 0, pb
->nr_objects
, pb
->progress_cb_payload
);
1296 delta_list
= git__mallocarray(pb
->nr_objects
, sizeof(*delta_list
));
1297 GITERR_CHECK_ALLOC(delta_list
);
1299 for (i
= 0; i
< pb
->nr_objects
; ++i
) {
1300 git_pobject
*po
= pb
->object_list
+ i
;
1302 /* Make sure the item is within our size limits */
1303 if (po
->size
< 50 || po
->size
> pb
->big_file_threshold
)
1306 delta_list
[n
++] = po
;
1310 git__tsort((void **)delta_list
, n
, type_size_sort
);
1311 if (ll_find_deltas(pb
, delta_list
, n
,
1312 GIT_PACK_WINDOW
+ 1,
1313 GIT_PACK_DEPTH
) < 0) {
1314 git__free(delta_list
);
1319 report_delta_progress(pb
, pb
->nr_objects
, true);
1322 git__free(delta_list
);
1326 #define PREPARE_PACK if (prepare_pack(pb) < 0) { return -1; }
1328 int git_packbuilder_foreach(git_packbuilder
*pb
, int (*cb
)(void *buf
, size_t size
, void *payload
), void *payload
)
1331 return write_pack(pb
, cb
, payload
);
1334 int git_packbuilder_write_buf(git_buf
*buf
, git_packbuilder
*pb
)
1337 git_buf_sanitize(buf
);
1338 return write_pack(pb
, &write_pack_buf
, buf
);
1341 static int write_cb(void *buf
, size_t len
, void *payload
)
1343 struct pack_write_context
*ctx
= payload
;
1344 return git_indexer_append(ctx
->indexer
, buf
, len
, ctx
->stats
);
1347 int git_packbuilder_write(
1348 git_packbuilder
*pb
,
1351 git_transfer_progress_cb progress_cb
,
1352 void *progress_cb_payload
)
1354 git_indexer
*indexer
;
1355 git_transfer_progress stats
;
1356 struct pack_write_context ctx
;
1360 if (git_indexer_new(
1361 &indexer
, path
, mode
, pb
->odb
, progress_cb
, progress_cb_payload
) < 0)
1364 ctx
.indexer
= indexer
;
1367 if (git_packbuilder_foreach(pb
, write_cb
, &ctx
) < 0 ||
1368 git_indexer_commit(indexer
, &stats
) < 0) {
1369 git_indexer_free(indexer
);
1373 git_oid_cpy(&pb
->pack_oid
, git_indexer_hash(indexer
));
1375 git_indexer_free(indexer
);
1381 const git_oid
*git_packbuilder_hash(git_packbuilder
*pb
)
1383 return &pb
->pack_oid
;
1387 static int cb_tree_walk(
1388 const char *root
, const git_tree_entry
*entry
, void *payload
)
1391 struct tree_walk_context
*ctx
= payload
;
1393 /* A commit inside a tree represents a submodule commit and should be skipped. */
1394 if (git_tree_entry_type(entry
) == GIT_OBJ_COMMIT
)
1397 if (!(error
= git_buf_sets(&ctx
->buf
, root
)) &&
1398 !(error
= git_buf_puts(&ctx
->buf
, git_tree_entry_name(entry
))))
1399 error
= git_packbuilder_insert(
1400 ctx
->pb
, git_tree_entry_id(entry
), git_buf_cstr(&ctx
->buf
));
1405 int git_packbuilder_insert_commit(git_packbuilder
*pb
, const git_oid
*oid
)
1409 if (git_commit_lookup(&commit
, pb
->repo
, oid
) < 0 ||
1410 git_packbuilder_insert(pb
, oid
, NULL
) < 0)
1413 if (git_packbuilder_insert_tree(pb
, git_commit_tree_id(commit
)) < 0)
1416 git_commit_free(commit
);
1420 int git_packbuilder_insert_tree(git_packbuilder
*pb
, const git_oid
*oid
)
1423 git_tree
*tree
= NULL
;
1424 struct tree_walk_context context
= { pb
, GIT_BUF_INIT
};
1426 if (!(error
= git_tree_lookup(&tree
, pb
->repo
, oid
)) &&
1427 !(error
= git_packbuilder_insert(pb
, oid
, NULL
)))
1428 error
= git_tree_walk(tree
, GIT_TREEWALK_PRE
, cb_tree_walk
, &context
);
1430 git_tree_free(tree
);
1431 git_buf_free(&context
.buf
);
1435 int git_packbuilder_insert_recur(git_packbuilder
*pb
, const git_oid
*id
, const char *name
)
1442 if ((error
= git_object_lookup(&obj
, pb
->repo
, id
, GIT_OBJ_ANY
)) < 0)
1445 switch (git_object_type(obj
)) {
1447 error
= git_packbuilder_insert(pb
, id
, name
);
1450 error
= git_packbuilder_insert_tree(pb
, id
);
1452 case GIT_OBJ_COMMIT
:
1453 error
= git_packbuilder_insert_commit(pb
, id
);
1456 if ((error
= git_packbuilder_insert(pb
, id
, name
)) < 0)
1458 error
= git_packbuilder_insert_recur(pb
, git_tag_target_id((git_tag
*) obj
), NULL
);
1462 giterr_set(GITERR_INVALID
, "unknown object type");
1467 git_object_free(obj
);
1471 uint32_t git_packbuilder_object_count(git_packbuilder
*pb
)
1473 return pb
->nr_objects
;
1476 uint32_t git_packbuilder_written(git_packbuilder
*pb
)
1478 return pb
->nr_written
;
1481 int lookup_walk_object(git_walk_object
**out
, git_packbuilder
*pb
, const git_oid
*id
)
1483 git_walk_object
*obj
;
1485 obj
= git_pool_mallocz(&pb
->object_pool
, 1);
1491 git_oid_cpy(&obj
->id
, id
);
1497 static int retrieve_object(git_walk_object
**out
, git_packbuilder
*pb
, const git_oid
*id
)
1501 git_walk_object
*obj
;
1503 pos
= git_oidmap_lookup_index(pb
->walk_objects
, id
);
1504 if (git_oidmap_valid_index(pb
->walk_objects
, pos
)) {
1505 obj
= git_oidmap_value_at(pb
->walk_objects
, pos
);
1507 if ((error
= lookup_walk_object(&obj
, pb
, id
)) < 0)
1510 git_oidmap_insert(pb
->walk_objects
, &obj
->id
, obj
, error
);
1517 static int mark_blob_uninteresting(git_packbuilder
*pb
, const git_oid
*id
)
1520 git_walk_object
*obj
;
1522 if ((error
= retrieve_object(&obj
, pb
, id
)) < 0)
1525 obj
->uninteresting
= 1;
1530 static int mark_tree_uninteresting(git_packbuilder
*pb
, const git_oid
*id
)
1532 git_walk_object
*obj
;
1537 if ((error
= retrieve_object(&obj
, pb
, id
)) < 0)
1540 if (obj
->uninteresting
)
1543 obj
->uninteresting
= 1;
1545 if ((error
= git_tree_lookup(&tree
, pb
->repo
, id
)) < 0)
1548 for (i
= 0; i
< git_tree_entrycount(tree
); i
++) {
1549 const git_tree_entry
*entry
= git_tree_entry_byindex(tree
, i
);
1550 const git_oid
*entry_id
= git_tree_entry_id(entry
);
1551 switch (git_tree_entry_type(entry
)) {
1553 if ((error
= mark_tree_uninteresting(pb
, entry_id
)) < 0)
1557 if ((error
= mark_blob_uninteresting(pb
, entry_id
)) < 0)
1561 /* it's a submodule or something unknown, we don't want it */
1567 git_tree_free(tree
);
1572 * Mark the edges of the graph uninteresting. Since we start from a
1573 * git_revwalk, the commits are already uninteresting, but we need to
1574 * mark the trees and blobs.
1576 static int mark_edges_uninteresting(git_packbuilder
*pb
, git_commit_list
*commits
)
1579 git_commit_list
*list
;
1582 for (list
= commits
; list
; list
= list
->next
) {
1583 if (!list
->item
->uninteresting
)
1586 if ((error
= git_commit_lookup(&commit
, pb
->repo
, &list
->item
->oid
)) < 0)
1589 error
= mark_tree_uninteresting(pb
, git_commit_tree_id(commit
));
1590 git_commit_free(commit
);
1599 int insert_tree(git_packbuilder
*pb
, git_tree
*tree
)
1604 git_walk_object
*obj
;
1607 if ((error
= retrieve_object(&obj
, pb
, git_tree_id(tree
))) < 0)
1615 if ((error
= git_packbuilder_insert(pb
, &obj
->id
, NULL
)))
1618 for (i
= 0; i
< git_tree_entrycount(tree
); i
++) {
1619 const git_tree_entry
*entry
= git_tree_entry_byindex(tree
, i
);
1620 const git_oid
*entry_id
= git_tree_entry_id(entry
);
1621 switch (git_tree_entry_type(entry
)) {
1623 if ((error
= git_tree_lookup(&subtree
, pb
->repo
, entry_id
)) < 0)
1626 error
= insert_tree(pb
, subtree
);
1627 git_tree_free(subtree
);
1634 name
= git_tree_entry_name(entry
);
1635 if ((error
= git_packbuilder_insert(pb
, entry_id
, name
)) < 0)
1639 /* it's a submodule or something unknown, we don't want it */
1648 int insert_commit(git_packbuilder
*pb
, git_walk_object
*obj
)
1651 git_commit
*commit
= NULL
;
1652 git_tree
*tree
= NULL
;
1656 if ((error
= git_packbuilder_insert(pb
, &obj
->id
, NULL
)) < 0)
1659 if ((error
= git_commit_lookup(&commit
, pb
->repo
, &obj
->id
)) < 0)
1662 if ((error
= git_tree_lookup(&tree
, pb
->repo
, git_commit_tree_id(commit
))) < 0)
1665 if ((error
= insert_tree(pb
, tree
)) < 0)
1669 git_commit_free(commit
);
1670 git_tree_free(tree
);
1674 int git_packbuilder_insert_walk(git_packbuilder
*pb
, git_revwalk
*walk
)
1678 git_walk_object
*obj
;
1682 if ((error
= mark_edges_uninteresting(pb
, walk
->user_input
)) < 0)
1686 * TODO: git marks the parents of the edges
1687 * uninteresting. This may provide a speed advantage, but does
1688 * seem to assume the remote does not have a single-commit
1689 * history on the other end.
1692 /* walk down each tree up to the blobs and insert them, stopping when uninteresting */
1693 while ((error
= git_revwalk_next(&id
, walk
)) == 0) {
1694 if ((error
= retrieve_object(&obj
, pb
, &id
)) < 0)
1697 if (obj
->seen
|| obj
->uninteresting
)
1700 if ((error
= insert_commit(pb
, obj
)) < 0)
1704 if (error
== GIT_ITEROVER
)
1710 int git_packbuilder_set_callbacks(git_packbuilder
*pb
, git_packbuilder_progress progress_cb
, void *progress_cb_payload
)
1715 pb
->progress_cb
= progress_cb
;
1716 pb
->progress_cb_payload
= progress_cb_payload
;
1721 void git_packbuilder_free(git_packbuilder
*pb
)
1728 git_mutex_free(&pb
->cache_mutex
);
1729 git_mutex_free(&pb
->progress_mutex
);
1730 git_cond_free(&pb
->progress_cond
);
1735 git_odb_free(pb
->odb
);
1738 git_oidmap_free(pb
->object_ix
);
1740 if (pb
->object_list
)
1741 git__free(pb
->object_list
);
1743 git_oidmap_free(pb
->walk_objects
);
1744 git_pool_clear(&pb
->object_pool
);
1746 git_hash_ctx_cleanup(&pb
->ctx
);
1747 git_zstream_free(&pb
->zstream
);