2 * Copyright (C) the libgit2 contributors. All rights reserved.
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
8 #include "pack-objects.h"
20 #include "commit_list.h"
22 #include "git2/pack.h"
23 #include "git2/commit.h"
25 #include "git2/indexer.h"
26 #include "git2/config.h"
31 struct git_delta_index
*index
;
35 struct tree_walk_context
{
40 struct pack_write_context
{
42 git_indexer_progress
*stats
;
47 unsigned int uninteresting
:1,
52 # define GIT_PACKBUILDER__MUTEX_OP(pb, mtx, op) git_mutex_##op(&(pb)->mtx)
54 # define GIT_PACKBUILDER__MUTEX_OP(pb, mtx, op) git__noop()
57 #define git_packbuilder__cache_lock(pb) GIT_PACKBUILDER__MUTEX_OP(pb, cache_mutex, lock)
58 #define git_packbuilder__cache_unlock(pb) GIT_PACKBUILDER__MUTEX_OP(pb, cache_mutex, unlock)
59 #define git_packbuilder__progress_lock(pb) GIT_PACKBUILDER__MUTEX_OP(pb, progress_mutex, lock)
60 #define git_packbuilder__progress_unlock(pb) GIT_PACKBUILDER__MUTEX_OP(pb, progress_mutex, unlock)
62 /* The minimal interval between progress updates (in seconds). */
63 #define MIN_PROGRESS_UPDATE_INTERVAL 0.5
65 /* Size of the buffer to feed to zlib */
66 #define COMPRESS_BUFLEN (1024 * 1024)
68 static unsigned name_hash(const char *name
)
76 * This effectively just creates a sortable number from the
77 * last sixteen non-whitespace characters. Last characters
78 * count "most", so things that end in ".c" sort together.
80 while ((c
= *name
++) != 0) {
83 hash
= (hash
>> 2) + (c
<< 24);
88 static int packbuilder_config(git_packbuilder
*pb
)
94 if ((ret
= git_repository_config_snapshot(&config
, pb
->repo
)) < 0)
97 #define config_get(KEY,DST,DFLT) do { \
98 ret = git_config_get_int64(&val, config, KEY); \
100 if (!git__is_sizet(val)) { \
101 git_error_set(GIT_ERROR_CONFIG, \
102 "configuration value '%s' is too large", KEY); \
106 (DST) = (size_t)val; \
107 } else if (ret == GIT_ENOTFOUND) { \
110 } else if (ret < 0) goto out; } while (0)
112 config_get("pack.deltaCacheSize", pb
->max_delta_cache_size
,
113 GIT_PACK_DELTA_CACHE_SIZE
);
114 config_get("pack.deltaCacheLimit", pb
->cache_max_small_delta_size
,
115 GIT_PACK_DELTA_CACHE_LIMIT
);
116 config_get("pack.deltaCacheSize", pb
->big_file_threshold
,
117 GIT_PACK_BIG_FILE_THRESHOLD
);
118 config_get("pack.windowMemory", pb
->window_memory_limit
, 0);
123 git_config_free(config
);
128 int git_packbuilder_new(git_packbuilder
**out
, git_repository
*repo
)
134 pb
= git__calloc(1, sizeof(*pb
));
135 GIT_ERROR_CHECK_ALLOC(pb
);
137 if (git_oidmap_new(&pb
->object_ix
) < 0 ||
138 git_oidmap_new(&pb
->walk_objects
) < 0 ||
139 git_pool_init(&pb
->object_pool
, sizeof(struct walk_object
)) < 0)
143 pb
->nr_threads
= 1; /* do not spawn any thread by default */
145 if (git_hash_ctx_init(&pb
->ctx
, GIT_HASH_ALGORITHM_SHA1
) < 0 ||
146 git_zstream_init(&pb
->zstream
, GIT_ZSTREAM_DEFLATE
) < 0 ||
147 git_repository_odb(&pb
->odb
, repo
) < 0 ||
148 packbuilder_config(pb
) < 0)
153 if (git_mutex_init(&pb
->cache_mutex
) ||
154 git_mutex_init(&pb
->progress_mutex
) ||
155 git_cond_init(&pb
->progress_cond
))
157 git_error_set(GIT_ERROR_OS
, "failed to initialize packbuilder mutex");
167 git_packbuilder_free(pb
);
171 unsigned int git_packbuilder_set_threads(git_packbuilder
*pb
, unsigned int n
)
179 GIT_ASSERT(pb
->nr_threads
== 1);
182 return pb
->nr_threads
;
185 static int rehash(git_packbuilder
*pb
)
190 git_oidmap_clear(pb
->object_ix
);
192 for (i
= 0, po
= pb
->object_list
; i
< pb
->nr_objects
; i
++, po
++) {
193 if (git_oidmap_set(pb
->object_ix
, &po
->id
, po
) < 0)
200 int git_packbuilder_insert(git_packbuilder
*pb
, const git_oid
*oid
,
210 /* If the object already exists in the hash table, then we don't
211 * have any work to do */
212 if (git_oidmap_exists(pb
->object_ix
, oid
))
215 if (pb
->nr_objects
>= pb
->nr_alloc
) {
216 GIT_ERROR_CHECK_ALLOC_ADD(&newsize
, pb
->nr_alloc
, 1024);
217 GIT_ERROR_CHECK_ALLOC_MULTIPLY(&newsize
, newsize
/ 2, 3);
219 if (!git__is_uint32(newsize
)) {
220 git_error_set(GIT_ERROR_NOMEMORY
, "packfile too large to fit in memory.");
224 pb
->nr_alloc
= newsize
;
226 pb
->object_list
= git__reallocarray(pb
->object_list
,
227 pb
->nr_alloc
, sizeof(*po
));
228 GIT_ERROR_CHECK_ALLOC(pb
->object_list
);
234 po
= pb
->object_list
+ pb
->nr_objects
;
235 memset(po
, 0x0, sizeof(*po
));
237 if ((ret
= git_odb_read_header(&po
->size
, &po
->type
, pb
->odb
, oid
)) < 0)
241 git_oid_cpy(&po
->id
, oid
);
242 po
->hash
= name_hash(name
);
244 if (git_oidmap_set(pb
->object_ix
, &po
->id
, po
) < 0) {
251 if (pb
->progress_cb
) {
252 double current_time
= git__timer();
253 double elapsed
= current_time
- pb
->last_progress_report_time
;
255 if (elapsed
< 0 || elapsed
>= MIN_PROGRESS_UPDATE_INTERVAL
) {
256 pb
->last_progress_report_time
= current_time
;
258 ret
= pb
->progress_cb(
259 GIT_PACKBUILDER_ADDING_OBJECTS
,
260 pb
->nr_objects
, 0, pb
->progress_cb_payload
);
263 return git_error_set_after_callback(ret
);
270 static int get_delta(void **out
, git_odb
*odb
, git_pobject
*po
)
272 git_odb_object
*src
= NULL
, *trg
= NULL
;
279 if (git_odb_read(&src
, odb
, &po
->delta
->id
) < 0 ||
280 git_odb_read(&trg
, odb
, &po
->id
) < 0)
283 error
= git_delta(&delta_buf
, &delta_size
,
284 git_odb_object_data(src
), git_odb_object_size(src
),
285 git_odb_object_data(trg
), git_odb_object_size(trg
),
288 if (error
< 0 && error
!= GIT_EBUFS
)
291 if (error
== GIT_EBUFS
|| delta_size
!= po
->delta_size
) {
292 git_error_set(GIT_ERROR_INVALID
, "delta size changed");
298 git_odb_object_free(src
);
299 git_odb_object_free(trg
);
303 git_odb_object_free(src
);
304 git_odb_object_free(trg
);
308 static int write_object(
311 int (*write_cb
)(void *buf
, size_t size
, void *cb_data
),
314 git_odb_object
*obj
= NULL
;
316 unsigned char hdr
[10], *zbuf
= NULL
;
318 size_t hdr_len
, zbuf_len
= COMPRESS_BUFLEN
, data_len
;
322 * If we have a delta base, let's use the delta to save space.
323 * Otherwise load the whole object. 'data' ends up pointing to
324 * whatever data we want to put into the packfile.
328 data
= po
->delta_data
;
329 else if ((error
= get_delta(&data
, pb
->odb
, po
)) < 0)
332 data_len
= po
->delta_size
;
333 type
= GIT_OBJECT_REF_DELTA
;
335 if ((error
= git_odb_read(&obj
, pb
->odb
, &po
->id
)) < 0)
338 data
= (void *)git_odb_object_data(obj
);
339 data_len
= git_odb_object_size(obj
);
340 type
= git_odb_object_type(obj
);
344 if ((error
= git_packfile__object_header(&hdr_len
, hdr
, data_len
, type
)) < 0 ||
345 (error
= write_cb(hdr
, hdr_len
, cb_data
)) < 0 ||
346 (error
= git_hash_update(&pb
->ctx
, hdr
, hdr_len
)) < 0)
349 if (type
== GIT_OBJECT_REF_DELTA
) {
350 if ((error
= write_cb(po
->delta
->id
.id
, GIT_OID_RAWSZ
, cb_data
)) < 0 ||
351 (error
= git_hash_update(&pb
->ctx
, po
->delta
->id
.id
, GIT_OID_RAWSZ
)) < 0)
356 if (po
->z_delta_size
) {
357 data_len
= po
->z_delta_size
;
359 if ((error
= write_cb(data
, data_len
, cb_data
)) < 0 ||
360 (error
= git_hash_update(&pb
->ctx
, data
, data_len
)) < 0)
363 zbuf
= git__malloc(zbuf_len
);
364 GIT_ERROR_CHECK_ALLOC(zbuf
);
366 git_zstream_reset(&pb
->zstream
);
368 if ((error
= git_zstream_set_input(&pb
->zstream
, data
, data_len
)) < 0)
371 while (!git_zstream_done(&pb
->zstream
)) {
372 if ((error
= git_zstream_get_output(zbuf
, &zbuf_len
, &pb
->zstream
)) < 0 ||
373 (error
= write_cb(zbuf
, zbuf_len
, cb_data
)) < 0 ||
374 (error
= git_hash_update(&pb
->ctx
, zbuf
, zbuf_len
)) < 0)
377 zbuf_len
= COMPRESS_BUFLEN
; /* reuse buffer */
382 * If po->delta is true, data is a delta and it is our
383 * responsibility to free it (otherwise it's a git_object's
384 * data). We set po->delta_data to NULL in case we got the
385 * data from there instead of get_delta(). If we didn't,
390 po
->delta_data
= NULL
;
397 git_odb_object_free(obj
);
401 enum write_one_status
{
402 WRITE_ONE_SKIP
= -1, /* already written */
403 WRITE_ONE_BREAK
= 0, /* writing this will bust the limit; not written */
404 WRITE_ONE_WRITTEN
= 1, /* normal */
405 WRITE_ONE_RECURSIVE
= 2 /* already scheduled to be written */
408 static int write_one(
409 enum write_one_status
*status
,
412 int (*write_cb
)(void *buf
, size_t size
, void *cb_data
),
418 *status
= WRITE_ONE_RECURSIVE
;
420 } else if (po
->written
) {
421 *status
= WRITE_ONE_SKIP
;
428 if ((error
= write_one(status
, pb
, po
->delta
, write_cb
, cb_data
)) < 0)
431 /* we cannot depend on this one */
432 if (*status
== WRITE_ONE_RECURSIVE
)
436 *status
= WRITE_ONE_WRITTEN
;
440 return write_object(pb
, po
, write_cb
, cb_data
);
443 GIT_INLINE(void) add_to_write_order(git_pobject
**wo
, size_t *endp
,
452 static void add_descendants_to_write_order(git_pobject
**wo
, size_t *endp
,
455 int add_to_order
= 1;
459 /* add this node... */
460 add_to_write_order(wo
, endp
, po
);
461 /* all its siblings... */
462 for (s
= po
->delta_sibling
; s
; s
= s
->delta_sibling
) {
463 add_to_write_order(wo
, endp
, s
);
466 /* drop down a level to add left subtree nodes if possible */
467 if (po
->delta_child
) {
469 po
= po
->delta_child
;
472 /* our sibling might have some children, it is next */
473 if (po
->delta_sibling
) {
474 po
= po
->delta_sibling
;
477 /* go back to our parent node */
479 while (po
&& !po
->delta_sibling
) {
480 /* we're on the right side of a subtree, keep
481 * going up until we can go right again */
485 /* done- we hit our original root node */
488 /* pass it off to sibling at this level */
489 po
= po
->delta_sibling
;
494 static void add_family_to_write_order(git_pobject
**wo
, size_t *endp
,
499 for (root
= po
; root
->delta
; root
= root
->delta
)
501 add_descendants_to_write_order(wo
, endp
, root
);
504 static int cb_tag_foreach(const char *name
, git_oid
*oid
, void *data
)
506 git_packbuilder
*pb
= data
;
511 if ((po
= git_oidmap_get(pb
->object_ix
, oid
)) == NULL
)
516 /* TODO: peel objects */
521 static int compute_write_order(git_pobject
***out
, git_packbuilder
*pb
)
523 size_t i
, wo_end
, last_untagged
;
531 if ((wo
= git__mallocarray(pb
->nr_objects
, sizeof(*wo
))) == NULL
)
534 for (i
= 0; i
< pb
->nr_objects
; i
++) {
535 git_pobject
*po
= pb
->object_list
+ i
;
538 po
->delta_child
= NULL
;
539 po
->delta_sibling
= NULL
;
543 * Fully connect delta_child/delta_sibling network.
544 * Make sure delta_sibling is sorted in the original
547 for (i
= pb
->nr_objects
; i
> 0;) {
548 git_pobject
*po
= &pb
->object_list
[--i
];
551 /* Mark me as the first child */
552 po
->delta_sibling
= po
->delta
->delta_child
;
553 po
->delta
->delta_child
= po
;
557 * Mark objects that are at the tip of tags.
559 if (git_tag_foreach(pb
->repo
, &cb_tag_foreach
, pb
) < 0) {
565 * Give the objects in the original recency order until
566 * we see a tagged tip.
568 for (i
= wo_end
= 0; i
< pb
->nr_objects
; i
++) {
569 git_pobject
*po
= pb
->object_list
+ i
;
572 add_to_write_order(wo
, &wo_end
, po
);
577 * Then fill all the tagged tips.
579 for (; i
< pb
->nr_objects
; i
++) {
580 git_pobject
*po
= pb
->object_list
+ i
;
582 add_to_write_order(wo
, &wo_end
, po
);
586 * And then all remaining commits and tags.
588 for (i
= last_untagged
; i
< pb
->nr_objects
; i
++) {
589 git_pobject
*po
= pb
->object_list
+ i
;
590 if (po
->type
!= GIT_OBJECT_COMMIT
&&
591 po
->type
!= GIT_OBJECT_TAG
)
593 add_to_write_order(wo
, &wo_end
, po
);
597 * And then all the trees.
599 for (i
= last_untagged
; i
< pb
->nr_objects
; i
++) {
600 git_pobject
*po
= pb
->object_list
+ i
;
601 if (po
->type
!= GIT_OBJECT_TREE
)
603 add_to_write_order(wo
, &wo_end
, po
);
607 * Finally all the rest in really tight order
609 for (i
= last_untagged
; i
< pb
->nr_objects
; i
++) {
610 git_pobject
*po
= pb
->object_list
+ i
;
612 add_family_to_write_order(wo
, &wo_end
, po
);
615 if (wo_end
!= pb
->nr_objects
) {
617 git_error_set(GIT_ERROR_INVALID
, "invalid write order");
625 static int write_pack(git_packbuilder
*pb
,
626 int (*write_cb
)(void *buf
, size_t size
, void *cb_data
),
629 git_pobject
**write_order
;
631 enum write_one_status status
;
632 struct git_pack_header ph
;
637 if ((error
= compute_write_order(&write_order
, pb
)) < 0)
640 if (!git__is_uint32(pb
->nr_objects
)) {
641 git_error_set(GIT_ERROR_INVALID
, "too many objects");
646 /* Write pack header */
647 ph
.hdr_signature
= htonl(PACK_SIGNATURE
);
648 ph
.hdr_version
= htonl(PACK_VERSION
);
649 ph
.hdr_entries
= htonl(pb
->nr_objects
);
651 if ((error
= write_cb(&ph
, sizeof(ph
), cb_data
)) < 0 ||
652 (error
= git_hash_update(&pb
->ctx
, &ph
, sizeof(ph
))) < 0)
655 pb
->nr_remaining
= pb
->nr_objects
;
658 for ( ; i
< pb
->nr_objects
; ++i
) {
661 if ((error
= write_one(&status
, pb
, po
, write_cb
, cb_data
)) < 0)
665 pb
->nr_remaining
-= pb
->nr_written
;
666 } while (pb
->nr_remaining
&& i
< pb
->nr_objects
);
668 if ((error
= git_hash_final(entry_oid
.id
, &pb
->ctx
)) < 0)
671 error
= write_cb(entry_oid
.id
, GIT_OID_RAWSZ
, cb_data
);
674 /* if callback cancelled writing, we must still free delta_data */
675 for ( ; i
< pb
->nr_objects
; ++i
) {
677 if (po
->delta_data
) {
678 git__free(po
->delta_data
);
679 po
->delta_data
= NULL
;
683 git__free(write_order
);
687 static int write_pack_buf(void *buf
, size_t size
, void *data
)
689 git_str
*b
= (git_str
*)data
;
690 return git_str_put(b
, buf
, size
);
693 static int type_size_sort(const void *_a
, const void *_b
)
695 const git_pobject
*a
= (git_pobject
*)_a
;
696 const git_pobject
*b
= (git_pobject
*)_b
;
698 if (a
->type
> b
->type
)
700 if (a
->type
< b
->type
)
702 if (a
->hash
> b
->hash
)
704 if (a
->hash
< b
->hash
)
709 if (a->preferred_base > b->preferred_base)
711 if (a->preferred_base < b->preferred_base)
714 if (a
->size
> b
->size
)
716 if (a
->size
< b
->size
)
718 return a
< b
? -1 : (a
> b
); /* newest first */
721 static int delta_cacheable(
729 if (git__add_sizet_overflow(&new_size
, pb
->delta_cache_size
, delta_size
))
732 if (pb
->max_delta_cache_size
&& new_size
> pb
->max_delta_cache_size
)
735 if (delta_size
< pb
->cache_max_small_delta_size
)
738 /* cache delta, if objects are large enough compared to delta size */
739 if ((src_size
>> 20) + (trg_size
>> 21) > (delta_size
>> 10))
745 static int try_delta(git_packbuilder
*pb
, struct unpacked
*trg
,
746 struct unpacked
*src
, size_t max_depth
,
747 size_t *mem_usage
, int *ret
)
749 git_pobject
*trg_object
= trg
->object
;
750 git_pobject
*src_object
= src
->object
;
752 size_t trg_size
, src_size
, delta_size
, sizediff
, max_size
, sz
;
756 /* Don't bother doing diffs between different types */
757 if (trg_object
->type
!= src_object
->type
) {
764 /* TODO: support reuse-delta */
766 /* Let's not bust the allowed depth. */
767 if (src
->depth
>= max_depth
)
770 /* Now some size filtering heuristics. */
771 trg_size
= trg_object
->size
;
772 if (!trg_object
->delta
) {
773 max_size
= trg_size
/2 - 20;
776 max_size
= trg_object
->delta_size
;
777 ref_depth
= trg
->depth
;
780 max_size
= (uint64_t)max_size
* (max_depth
- src
->depth
) /
781 (max_depth
- ref_depth
+ 1);
785 src_size
= src_object
->size
;
786 sizediff
= src_size
< trg_size
? trg_size
- src_size
: 0;
787 if (sizediff
>= max_size
)
789 if (trg_size
< src_size
/ 32)
792 /* Load data if not already done */
794 if (git_odb_read(&obj
, pb
->odb
, &trg_object
->id
) < 0)
797 sz
= git_odb_object_size(obj
);
798 trg
->data
= git__malloc(sz
);
799 GIT_ERROR_CHECK_ALLOC(trg
->data
);
800 memcpy(trg
->data
, git_odb_object_data(obj
), sz
);
802 git_odb_object_free(obj
);
804 if (sz
!= trg_size
) {
805 git_error_set(GIT_ERROR_INVALID
,
806 "inconsistent target object length");
815 if (git_odb_read(&obj
, pb
->odb
, &src_object
->id
) < 0 ||
816 !git__is_ulong(obj_sz
= git_odb_object_size(obj
)))
820 src
->data
= git__malloc(sz
);
821 GIT_ERROR_CHECK_ALLOC(src
->data
);
822 memcpy(src
->data
, git_odb_object_data(obj
), sz
);
824 git_odb_object_free(obj
);
826 if (sz
!= src_size
) {
827 git_error_set(GIT_ERROR_INVALID
,
828 "inconsistent source object length");
835 if (git_delta_index_init(&src
->index
, src
->data
, src_size
) < 0)
836 return 0; /* suboptimal pack - out of memory */
838 *mem_usage
+= git_delta_index_size(src
->index
);
841 if (git_delta_create_from_index(&delta_buf
, &delta_size
, src
->index
, trg
->data
, trg_size
,
845 if (trg_object
->delta
) {
846 /* Prefer only shallower same-sized deltas. */
847 if (delta_size
== trg_object
->delta_size
&&
848 src
->depth
+ 1 >= trg
->depth
) {
849 git__free(delta_buf
);
854 GIT_ASSERT(git_packbuilder__cache_lock(pb
) == 0);
856 if (trg_object
->delta_data
) {
857 git__free(trg_object
->delta_data
);
858 GIT_ASSERT(pb
->delta_cache_size
>= trg_object
->delta_size
);
859 pb
->delta_cache_size
-= trg_object
->delta_size
;
860 trg_object
->delta_data
= NULL
;
862 if (delta_cacheable(pb
, src_size
, trg_size
, delta_size
)) {
863 bool overflow
= git__add_sizet_overflow(
864 &pb
->delta_cache_size
, pb
->delta_cache_size
, delta_size
);
866 GIT_ASSERT(git_packbuilder__cache_unlock(pb
) == 0);
869 git__free(delta_buf
);
873 trg_object
->delta_data
= git__realloc(delta_buf
, delta_size
);
874 GIT_ERROR_CHECK_ALLOC(trg_object
->delta_data
);
876 /* create delta when writing the pack */
877 GIT_ASSERT(git_packbuilder__cache_unlock(pb
) == 0);
878 git__free(delta_buf
);
881 trg_object
->delta
= src_object
;
882 trg_object
->delta_size
= delta_size
;
883 trg
->depth
= src
->depth
+ 1;
889 static size_t check_delta_limit(git_pobject
*me
, size_t n
)
891 git_pobject
*child
= me
->delta_child
;
895 size_t c
= check_delta_limit(child
, n
+ 1);
898 child
= child
->delta_sibling
;
903 static size_t free_unpacked(struct unpacked
*n
)
905 size_t freed_mem
= 0;
908 freed_mem
+= git_delta_index_size(n
->index
);
909 git_delta_index_free(n
->index
);
914 freed_mem
+= n
->object
->size
;
923 static int report_delta_progress(
924 git_packbuilder
*pb
, uint32_t count
, bool force
)
928 if (pb
->progress_cb
) {
929 double current_time
= git__timer();
930 double elapsed
= current_time
- pb
->last_progress_report_time
;
932 if (force
|| elapsed
< 0 || elapsed
>= MIN_PROGRESS_UPDATE_INTERVAL
) {
933 pb
->last_progress_report_time
= current_time
;
935 ret
= pb
->progress_cb(
936 GIT_PACKBUILDER_DELTAFICATION
,
937 count
, pb
->nr_objects
, pb
->progress_cb_payload
);
940 return git_error_set_after_callback(ret
);
947 static int find_deltas(git_packbuilder
*pb
, git_pobject
**list
,
948 size_t *list_size
, size_t window
, size_t depth
)
951 git_str zbuf
= GIT_STR_INIT
;
952 struct unpacked
*array
;
953 size_t idx
= 0, count
= 0;
954 size_t mem_usage
= 0;
958 array
= git__calloc(window
, sizeof(struct unpacked
));
959 GIT_ERROR_CHECK_ALLOC(array
);
962 struct unpacked
*n
= array
+ idx
;
963 size_t max_depth
, j
, best_base
= SIZE_MAX
;
965 GIT_ASSERT(git_packbuilder__progress_lock(pb
) == 0);
967 GIT_ASSERT(git_packbuilder__progress_unlock(pb
) == 0);
971 pb
->nr_deltified
+= 1;
972 report_delta_progress(pb
, pb
->nr_deltified
, false);
976 GIT_ASSERT(git_packbuilder__progress_unlock(pb
) == 0);
978 mem_usage
-= free_unpacked(n
);
981 while (pb
->window_memory_limit
&&
982 mem_usage
> pb
->window_memory_limit
&&
984 size_t tail
= (idx
+ window
- count
) % window
;
985 mem_usage
-= free_unpacked(array
+ tail
);
990 * If the current object is at pack edge, take the depth the
991 * objects that depend on the current object into account
992 * otherwise they would become too deep.
995 if (po
->delta_child
) {
996 size_t delta_limit
= check_delta_limit(po
, 0);
998 if (delta_limit
> max_depth
)
1001 max_depth
-= delta_limit
;
1007 size_t other_idx
= idx
+ j
;
1010 if (other_idx
>= window
)
1011 other_idx
-= window
;
1013 m
= array
+ other_idx
;
1017 if (try_delta(pb
, n
, m
, max_depth
, &mem_usage
, &ret
) < 0)
1022 best_base
= other_idx
;
1026 * If we decided to cache the delta data, then it is best
1027 * to compress it right away. First because we have to do
1028 * it anyway, and doing it here while we're threaded will
1029 * save a lot of time in the non threaded write phase,
1030 * as well as allow for caching more deltas within
1031 * the same cache size limit.
1033 * But only if not writing to stdout, since in that case
1034 * the network is most likely throttling writes anyway,
1035 * and therefore it is best to go to the write phase ASAP
1036 * instead, as we can afford spending more time compressing
1037 * between writes at that moment.
1039 if (po
->delta_data
) {
1040 if (git_zstream_deflatebuf(&zbuf
, po
->delta_data
, po
->delta_size
) < 0)
1043 git__free(po
->delta_data
);
1044 po
->delta_data
= git__malloc(zbuf
.size
);
1045 GIT_ERROR_CHECK_ALLOC(po
->delta_data
);
1047 memcpy(po
->delta_data
, zbuf
.ptr
, zbuf
.size
);
1048 po
->z_delta_size
= zbuf
.size
;
1049 git_str_clear(&zbuf
);
1051 GIT_ASSERT(git_packbuilder__cache_lock(pb
) == 0);
1052 pb
->delta_cache_size
-= po
->delta_size
;
1053 pb
->delta_cache_size
+= po
->z_delta_size
;
1054 GIT_ASSERT(git_packbuilder__cache_unlock(pb
) == 0);
1058 * If we made n a delta, and if n is already at max
1059 * depth, leaving it in the window is pointless. we
1060 * should evict it first.
1062 if (po
->delta
&& max_depth
<= n
->depth
)
1066 * Move the best delta base up in the window, after the
1067 * currently deltified object, to keep it longer. It will
1068 * be the first base object to be attempted next.
1071 struct unpacked swap
= array
[best_base
];
1072 size_t dist
= (window
+ idx
- best_base
) % window
;
1073 size_t dst
= best_base
;
1075 size_t src
= (dst
+ 1) % window
;
1076 array
[dst
] = array
[src
];
1084 if (count
+ 1 < window
)
1092 for (i
= 0; i
< window
; ++i
) {
1093 git__free(array
[i
].index
);
1094 git__free(array
[i
].data
);
1097 git_str_dispose(&zbuf
);
1104 struct thread_params
{
1106 git_packbuilder
*pb
;
1122 static void *threaded_find_deltas(void *arg
)
1124 struct thread_params
*me
= arg
;
1126 while (me
->remaining
) {
1127 if (find_deltas(me
->pb
, me
->list
, &me
->remaining
,
1128 me
->window
, me
->depth
) < 0) {
1132 GIT_ASSERT_WITH_RETVAL(git_packbuilder__progress_lock(me
->pb
) == 0, NULL
);
1134 git_cond_signal(&me
->pb
->progress_cond
);
1135 GIT_ASSERT_WITH_RETVAL(git_packbuilder__progress_unlock(me
->pb
) == 0, NULL
);
1137 if (git_mutex_lock(&me
->mutex
)) {
1138 git_error_set(GIT_ERROR_THREAD
, "unable to lock packfile condition mutex");
1142 while (!me
->data_ready
)
1143 git_cond_wait(&me
->cond
, &me
->mutex
);
1146 * We must not set ->data_ready before we wait on the
1147 * condition because the main thread may have set it to 1
1148 * before we get here. In order to be sure that new
1149 * work is available if we see 1 in ->data_ready, it
1150 * was initialized to 0 before this thread was spawned
1151 * and we reset it to 0 right away.
1154 git_mutex_unlock(&me
->mutex
);
1156 /* leave ->working 1 so that this doesn't get more work assigned */
1160 static int ll_find_deltas(git_packbuilder
*pb
, git_pobject
**list
,
1161 size_t list_size
, size_t window
, size_t depth
)
1163 struct thread_params
*p
;
1165 int ret
, active_threads
= 0;
1167 if (!pb
->nr_threads
)
1168 pb
->nr_threads
= git__online_cpus();
1170 if (pb
->nr_threads
<= 1) {
1171 find_deltas(pb
, list
, &list_size
, window
, depth
);
1175 p
= git__mallocarray(pb
->nr_threads
, sizeof(*p
));
1176 GIT_ERROR_CHECK_ALLOC(p
);
1178 /* Partition the work among the threads */
1179 for (i
= 0; i
< pb
->nr_threads
; ++i
) {
1180 size_t sub_size
= list_size
/ (pb
->nr_threads
- i
);
1182 /* don't use too small segments or no deltas will be found */
1183 if (sub_size
< 2*window
&& i
+1 < pb
->nr_threads
)
1187 p
[i
].window
= window
;
1190 p
[i
].data_ready
= 0;
1192 /* try to split chunks on "path" boundaries */
1193 while (sub_size
&& sub_size
< list_size
&&
1194 list
[sub_size
]->hash
&&
1195 list
[sub_size
]->hash
== list
[sub_size
-1]->hash
)
1199 p
[i
].list_size
= sub_size
;
1200 p
[i
].remaining
= sub_size
;
1203 list_size
-= sub_size
;
1206 /* Start work threads */
1207 for (i
= 0; i
< pb
->nr_threads
; ++i
) {
1208 if (!p
[i
].list_size
)
1211 git_mutex_init(&p
[i
].mutex
);
1212 git_cond_init(&p
[i
].cond
);
1214 ret
= git_thread_create(&p
[i
].thread
,
1215 threaded_find_deltas
, &p
[i
]);
1217 git_error_set(GIT_ERROR_THREAD
, "unable to create thread");
1224 * Now let's wait for work completion. Each time a thread is done
1225 * with its work, we steal half of the remaining work from the
1226 * thread with the largest number of unprocessed objects and give
1227 * it to that newly idle thread. This ensure good load balancing
1228 * until the remaining object list segments are simply too short
1229 * to be worth splitting anymore.
1231 while (active_threads
) {
1232 struct thread_params
*target
= NULL
;
1233 struct thread_params
*victim
= NULL
;
1234 size_t sub_size
= 0;
1236 /* Start by locating a thread that has transitioned its
1237 * 'working' flag from 1 -> 0. This indicates that it is
1238 * ready to receive more work using our work-stealing
1240 GIT_ASSERT(git_packbuilder__progress_lock(pb
) == 0);
1242 for (i
= 0; !target
&& i
< pb
->nr_threads
; i
++)
1247 git_cond_wait(&pb
->progress_cond
, &pb
->progress_mutex
);
1250 /* At this point we hold the progress lock and have located
1251 * a thread to receive more work. We still need to locate a
1252 * thread from which to steal work (the victim). */
1253 for (i
= 0; i
< pb
->nr_threads
; i
++)
1254 if (p
[i
].remaining
> 2*window
&&
1255 (!victim
|| victim
->remaining
< p
[i
].remaining
))
1259 sub_size
= victim
->remaining
/ 2;
1260 list
= victim
->list
+ victim
->list_size
- sub_size
;
1261 while (sub_size
&& list
[0]->hash
&&
1262 list
[0]->hash
== list
[-1]->hash
) {
1268 * It is possible for some "paths" to have
1269 * so many objects that no hash boundary
1270 * might be found. Let's just steal the
1271 * exact half in that case.
1273 sub_size
= victim
->remaining
/ 2;
1276 target
->list
= list
;
1277 victim
->list_size
-= sub_size
;
1278 victim
->remaining
-= sub_size
;
1280 target
->list_size
= sub_size
;
1281 target
->remaining
= sub_size
;
1282 target
->working
= 1;
1283 GIT_ASSERT(git_packbuilder__progress_unlock(pb
) == 0);
1285 if (git_mutex_lock(&target
->mutex
)) {
1286 git_error_set(GIT_ERROR_THREAD
, "unable to lock packfile condition mutex");
1291 target
->data_ready
= 1;
1292 git_cond_signal(&target
->cond
);
1293 git_mutex_unlock(&target
->mutex
);
1296 git_thread_join(&target
->thread
, NULL
);
1297 git_cond_free(&target
->cond
);
1298 git_mutex_free(&target
->mutex
);
1308 #define ll_find_deltas(pb, l, ls, w, d) find_deltas(pb, l, &ls, w, d)
1311 int git_packbuilder__prepare(git_packbuilder
*pb
)
1313 git_pobject
**delta_list
;
1316 if (pb
->nr_objects
== 0 || pb
->done
)
1317 return 0; /* nothing to do */
1320 * Although we do not report progress during deltafication, we
1321 * at least report that we are in the deltafication stage
1323 if (pb
->progress_cb
)
1324 pb
->progress_cb(GIT_PACKBUILDER_DELTAFICATION
, 0, pb
->nr_objects
, pb
->progress_cb_payload
);
1326 delta_list
= git__mallocarray(pb
->nr_objects
, sizeof(*delta_list
));
1327 GIT_ERROR_CHECK_ALLOC(delta_list
);
1329 for (i
= 0; i
< pb
->nr_objects
; ++i
) {
1330 git_pobject
*po
= pb
->object_list
+ i
;
1332 /* Make sure the item is within our size limits */
1333 if (po
->size
< 50 || po
->size
> pb
->big_file_threshold
)
1336 delta_list
[n
++] = po
;
1340 git__tsort((void **)delta_list
, n
, type_size_sort
);
1341 if (ll_find_deltas(pb
, delta_list
, n
,
1342 GIT_PACK_WINDOW
+ 1,
1343 GIT_PACK_DEPTH
) < 0) {
1344 git__free(delta_list
);
1349 report_delta_progress(pb
, pb
->nr_objects
, true);
1352 git__free(delta_list
);
1356 #define PREPARE_PACK if (git_packbuilder__prepare(pb) < 0) { return -1; }
1358 int git_packbuilder_foreach(git_packbuilder
*pb
, int (*cb
)(void *buf
, size_t size
, void *payload
), void *payload
)
1361 return write_pack(pb
, cb
, payload
);
1364 int git_packbuilder__write_buf(git_str
*buf
, git_packbuilder
*pb
)
1368 return write_pack(pb
, &write_pack_buf
, buf
);
1371 int git_packbuilder_write_buf(git_buf
*buf
, git_packbuilder
*pb
)
1373 GIT_BUF_WRAP_PRIVATE(buf
, git_packbuilder__write_buf
, pb
);
1376 static int write_cb(void *buf
, size_t len
, void *payload
)
1378 struct pack_write_context
*ctx
= payload
;
1379 return git_indexer_append(ctx
->indexer
, buf
, len
, ctx
->stats
);
1382 int git_packbuilder_write(
1383 git_packbuilder
*pb
,
1386 git_indexer_progress_cb progress_cb
,
1387 void *progress_cb_payload
)
1390 git_str object_path
= GIT_STR_INIT
;
1391 git_indexer_options opts
= GIT_INDEXER_OPTIONS_INIT
;
1392 git_indexer
*indexer
= NULL
;
1393 git_indexer_progress stats
;
1394 struct pack_write_context ctx
;
1400 if ((error
= git_repository__item_path(&object_path
, pb
->repo
, GIT_REPOSITORY_ITEM_OBJECTS
)) < 0)
1402 if ((error
= git_str_joinpath(&object_path
, git_str_cstr(&object_path
), "pack")) < 0)
1404 path
= git_str_cstr(&object_path
);
1407 opts
.progress_cb
= progress_cb
;
1408 opts
.progress_cb_payload
= progress_cb_payload
;
1410 if ((error
= git_indexer_new(&indexer
, path
, mode
, pb
->odb
, &opts
)) < 0)
1413 if (!git_repository__configmap_lookup(&t
, pb
->repo
, GIT_CONFIGMAP_FSYNCOBJECTFILES
) && t
)
1414 git_indexer__set_fsync(indexer
, 1);
1416 ctx
.indexer
= indexer
;
1419 if ((error
= git_packbuilder_foreach(pb
, write_cb
, &ctx
)) < 0)
1422 if ((error
= git_indexer_commit(indexer
, &stats
)) < 0)
1425 #ifndef GIT_DEPRECATE_HARD
1426 git_oid_cpy(&pb
->pack_oid
, git_indexer_hash(indexer
));
1429 pb
->pack_name
= git__strdup(git_indexer_name(indexer
));
1430 GIT_ERROR_CHECK_ALLOC(pb
->pack_name
);
1433 git_indexer_free(indexer
);
1434 git_str_dispose(&object_path
);
1440 #ifndef GIT_DEPRECATE_HARD
1441 const git_oid
*git_packbuilder_hash(git_packbuilder
*pb
)
1443 return &pb
->pack_oid
;
1447 const char *git_packbuilder_name(git_packbuilder
*pb
)
1449 return pb
->pack_name
;
1453 static int cb_tree_walk(
1454 const char *root
, const git_tree_entry
*entry
, void *payload
)
1457 struct tree_walk_context
*ctx
= payload
;
1459 /* A commit inside a tree represents a submodule commit and should be skipped. */
1460 if (git_tree_entry_type(entry
) == GIT_OBJECT_COMMIT
)
1463 if (!(error
= git_str_sets(&ctx
->buf
, root
)) &&
1464 !(error
= git_str_puts(&ctx
->buf
, git_tree_entry_name(entry
))))
1465 error
= git_packbuilder_insert(
1466 ctx
->pb
, git_tree_entry_id(entry
), git_str_cstr(&ctx
->buf
));
1471 int git_packbuilder_insert_commit(git_packbuilder
*pb
, const git_oid
*oid
)
1475 if (git_commit_lookup(&commit
, pb
->repo
, oid
) < 0 ||
1476 git_packbuilder_insert(pb
, oid
, NULL
) < 0)
1479 if (git_packbuilder_insert_tree(pb
, git_commit_tree_id(commit
)) < 0)
1482 git_commit_free(commit
);
1486 int git_packbuilder_insert_tree(git_packbuilder
*pb
, const git_oid
*oid
)
1489 git_tree
*tree
= NULL
;
1490 struct tree_walk_context context
= { pb
, GIT_STR_INIT
};
1492 if (!(error
= git_tree_lookup(&tree
, pb
->repo
, oid
)) &&
1493 !(error
= git_packbuilder_insert(pb
, oid
, NULL
)))
1494 error
= git_tree_walk(tree
, GIT_TREEWALK_PRE
, cb_tree_walk
, &context
);
1496 git_tree_free(tree
);
1497 git_str_dispose(&context
.buf
);
1501 int git_packbuilder_insert_recur(git_packbuilder
*pb
, const git_oid
*id
, const char *name
)
1509 if ((error
= git_object_lookup(&obj
, pb
->repo
, id
, GIT_OBJECT_ANY
)) < 0)
1512 switch (git_object_type(obj
)) {
1513 case GIT_OBJECT_BLOB
:
1514 error
= git_packbuilder_insert(pb
, id
, name
);
1516 case GIT_OBJECT_TREE
:
1517 error
= git_packbuilder_insert_tree(pb
, id
);
1519 case GIT_OBJECT_COMMIT
:
1520 error
= git_packbuilder_insert_commit(pb
, id
);
1522 case GIT_OBJECT_TAG
:
1523 if ((error
= git_packbuilder_insert(pb
, id
, name
)) < 0)
1525 error
= git_packbuilder_insert_recur(pb
, git_tag_target_id((git_tag
*) obj
), NULL
);
1529 git_error_set(GIT_ERROR_INVALID
, "unknown object type");
1534 git_object_free(obj
);
1538 size_t git_packbuilder_object_count(git_packbuilder
*pb
)
1540 return pb
->nr_objects
;
1543 size_t git_packbuilder_written(git_packbuilder
*pb
)
1545 return pb
->nr_written
;
1548 static int lookup_walk_object(struct walk_object
**out
, git_packbuilder
*pb
, const git_oid
*id
)
1550 struct walk_object
*obj
;
1552 obj
= git_pool_mallocz(&pb
->object_pool
, 1);
1554 git_error_set_oom();
1558 git_oid_cpy(&obj
->id
, id
);
1564 static int retrieve_object(struct walk_object
**out
, git_packbuilder
*pb
, const git_oid
*id
)
1566 struct walk_object
*obj
;
1569 if ((obj
= git_oidmap_get(pb
->walk_objects
, id
)) == NULL
) {
1570 if ((error
= lookup_walk_object(&obj
, pb
, id
)) < 0)
1573 if ((error
= git_oidmap_set(pb
->walk_objects
, &obj
->id
, obj
)) < 0)
1581 static int mark_blob_uninteresting(git_packbuilder
*pb
, const git_oid
*id
)
1584 struct walk_object
*obj
;
1586 if ((error
= retrieve_object(&obj
, pb
, id
)) < 0)
1589 obj
->uninteresting
= 1;
1594 static int mark_tree_uninteresting(git_packbuilder
*pb
, const git_oid
*id
)
1596 struct walk_object
*obj
;
1601 if ((error
= retrieve_object(&obj
, pb
, id
)) < 0)
1604 if (obj
->uninteresting
)
1607 obj
->uninteresting
= 1;
1609 if ((error
= git_tree_lookup(&tree
, pb
->repo
, id
)) < 0)
1612 for (i
= 0; i
< git_tree_entrycount(tree
); i
++) {
1613 const git_tree_entry
*entry
= git_tree_entry_byindex(tree
, i
);
1614 const git_oid
*entry_id
= git_tree_entry_id(entry
);
1615 switch (git_tree_entry_type(entry
)) {
1616 case GIT_OBJECT_TREE
:
1617 if ((error
= mark_tree_uninteresting(pb
, entry_id
)) < 0)
1620 case GIT_OBJECT_BLOB
:
1621 if ((error
= mark_blob_uninteresting(pb
, entry_id
)) < 0)
1625 /* it's a submodule or something unknown, we don't want it */
1631 git_tree_free(tree
);
1636 * Mark the edges of the graph uninteresting. Since we start from a
1637 * git_revwalk, the commits are already uninteresting, but we need to
1638 * mark the trees and blobs.
1640 static int mark_edges_uninteresting(git_packbuilder
*pb
, git_commit_list
*commits
)
1643 git_commit_list
*list
;
1646 for (list
= commits
; list
; list
= list
->next
) {
1647 if (!list
->item
->uninteresting
)
1650 if ((error
= git_commit_lookup(&commit
, pb
->repo
, &list
->item
->oid
)) < 0)
1653 error
= mark_tree_uninteresting(pb
, git_commit_tree_id(commit
));
1654 git_commit_free(commit
);
1663 static int pack_objects_insert_tree(git_packbuilder
*pb
, git_tree
*tree
)
1668 struct walk_object
*obj
;
1671 if ((error
= retrieve_object(&obj
, pb
, git_tree_id(tree
))) < 0)
1674 if (obj
->seen
|| obj
->uninteresting
)
1679 if ((error
= git_packbuilder_insert(pb
, &obj
->id
, NULL
)))
1682 for (i
= 0; i
< git_tree_entrycount(tree
); i
++) {
1683 const git_tree_entry
*entry
= git_tree_entry_byindex(tree
, i
);
1684 const git_oid
*entry_id
= git_tree_entry_id(entry
);
1685 switch (git_tree_entry_type(entry
)) {
1686 case GIT_OBJECT_TREE
:
1687 if ((error
= git_tree_lookup(&subtree
, pb
->repo
, entry_id
)) < 0)
1690 error
= pack_objects_insert_tree(pb
, subtree
);
1691 git_tree_free(subtree
);
1697 case GIT_OBJECT_BLOB
:
1698 if ((error
= retrieve_object(&obj
, pb
, entry_id
)) < 0)
1700 if (obj
->uninteresting
)
1702 name
= git_tree_entry_name(entry
);
1703 if ((error
= git_packbuilder_insert(pb
, entry_id
, name
)) < 0)
1707 /* it's a submodule or something unknown, we don't want it */
1716 static int pack_objects_insert_commit(git_packbuilder
*pb
, struct walk_object
*obj
)
1719 git_commit
*commit
= NULL
;
1720 git_tree
*tree
= NULL
;
1724 if ((error
= git_packbuilder_insert(pb
, &obj
->id
, NULL
)) < 0)
1727 if ((error
= git_commit_lookup(&commit
, pb
->repo
, &obj
->id
)) < 0)
1730 if ((error
= git_tree_lookup(&tree
, pb
->repo
, git_commit_tree_id(commit
))) < 0)
1733 if ((error
= pack_objects_insert_tree(pb
, tree
)) < 0)
1737 git_commit_free(commit
);
1738 git_tree_free(tree
);
1742 int git_packbuilder_insert_walk(git_packbuilder
*pb
, git_revwalk
*walk
)
1746 struct walk_object
*obj
;
1749 GIT_ASSERT_ARG(walk
);
1751 if ((error
= mark_edges_uninteresting(pb
, walk
->user_input
)) < 0)
1755 * TODO: git marks the parents of the edges
1756 * uninteresting. This may provide a speed advantage, but does
1757 * seem to assume the remote does not have a single-commit
1758 * history on the other end.
1761 /* walk down each tree up to the blobs and insert them, stopping when uninteresting */
1762 while ((error
= git_revwalk_next(&id
, walk
)) == 0) {
1763 if ((error
= retrieve_object(&obj
, pb
, &id
)) < 0)
1766 if (obj
->seen
|| obj
->uninteresting
)
1769 if ((error
= pack_objects_insert_commit(pb
, obj
)) < 0)
1773 if (error
== GIT_ITEROVER
)
1779 int git_packbuilder_set_callbacks(git_packbuilder
*pb
, git_packbuilder_progress progress_cb
, void *progress_cb_payload
)
1784 pb
->progress_cb
= progress_cb
;
1785 pb
->progress_cb_payload
= progress_cb_payload
;
1790 void git_packbuilder_free(git_packbuilder
*pb
)
1797 git_mutex_free(&pb
->cache_mutex
);
1798 git_mutex_free(&pb
->progress_mutex
);
1799 git_cond_free(&pb
->progress_cond
);
1804 git_odb_free(pb
->odb
);
1807 git_oidmap_free(pb
->object_ix
);
1809 if (pb
->object_list
)
1810 git__free(pb
->object_list
);
1812 git_oidmap_free(pb
->walk_objects
);
1813 git_pool_clear(&pb
->object_pool
);
1815 git_hash_ctx_cleanup(&pb
->ctx
);
1816 git_zstream_free(&pb
->zstream
);
1818 git__free(pb
->pack_name
);