2 * Copyright (C) the libgit2 contributors. All rights reserved.
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
8 #include "pack-objects.h"
15 #include "thread-utils.h"
19 #include "commit_list.h"
21 #include "git2/pack.h"
22 #include "git2/commit.h"
24 #include "git2/indexer.h"
25 #include "git2/config.h"
30 struct git_delta_index
*index
;
34 struct tree_walk_context
{
39 struct pack_write_context
{
41 git_transfer_progress
*stats
;
46 unsigned int uninteresting
:1,
52 #define GIT_PACKBUILDER__MUTEX_OP(pb, mtx, op) do { \
53 int result = git_mutex_##op(&(pb)->mtx); \
60 #define GIT_PACKBUILDER__MUTEX_OP(pb,mtx,op) GIT_UNUSED(pb)
62 #endif /* GIT_THREADS */
64 #define git_packbuilder__cache_lock(pb) GIT_PACKBUILDER__MUTEX_OP(pb, cache_mutex, lock)
65 #define git_packbuilder__cache_unlock(pb) GIT_PACKBUILDER__MUTEX_OP(pb, cache_mutex, unlock)
66 #define git_packbuilder__progress_lock(pb) GIT_PACKBUILDER__MUTEX_OP(pb, progress_mutex, lock)
67 #define git_packbuilder__progress_unlock(pb) GIT_PACKBUILDER__MUTEX_OP(pb, progress_mutex, unlock)
69 /* The minimal interval between progress updates (in seconds). */
70 #define MIN_PROGRESS_UPDATE_INTERVAL 0.5
72 /* Size of the buffer to feed to zlib */
73 #define COMPRESS_BUFLEN (1024 * 1024)
75 static unsigned name_hash(const char *name
)
83 * This effectively just creates a sortable number from the
84 * last sixteen non-whitespace characters. Last characters
85 * count "most", so things that end in ".c" sort together.
87 while ((c
= *name
++) != 0) {
90 hash
= (hash
>> 2) + (c
<< 24);
95 static int packbuilder_config(git_packbuilder
*pb
)
101 if ((ret
= git_repository_config_snapshot(&config
, pb
->repo
)) < 0)
104 #define config_get(KEY,DST,DFLT) do { \
105 ret = git_config_get_int64(&val, config, KEY); \
107 if (!git__is_sizet(val)) { \
108 git_error_set(GIT_ERROR_CONFIG, \
109 "configuration value '%s' is too large", KEY); \
113 (DST) = (size_t)val; \
114 } else if (ret == GIT_ENOTFOUND) { \
117 } else if (ret < 0) goto out; } while (0)
119 config_get("pack.deltaCacheSize", pb
->max_delta_cache_size
,
120 GIT_PACK_DELTA_CACHE_SIZE
);
121 config_get("pack.deltaCacheLimit", pb
->cache_max_small_delta_size
,
122 GIT_PACK_DELTA_CACHE_LIMIT
);
123 config_get("pack.deltaCacheSize", pb
->big_file_threshold
,
124 GIT_PACK_BIG_FILE_THRESHOLD
);
125 config_get("pack.windowMemory", pb
->window_memory_limit
, 0);
130 git_config_free(config
);
135 int git_packbuilder_new(git_packbuilder
**out
, git_repository
*repo
)
141 pb
= git__calloc(1, sizeof(*pb
));
142 GIT_ERROR_CHECK_ALLOC(pb
);
144 pb
->object_ix
= git_oidmap_alloc();
148 pb
->walk_objects
= git_oidmap_alloc();
149 if (!pb
->walk_objects
)
152 git_pool_init(&pb
->object_pool
, sizeof(struct walk_object
));
155 pb
->nr_threads
= 1; /* do not spawn any thread by default */
157 if (git_hash_ctx_init(&pb
->ctx
) < 0 ||
158 git_zstream_init(&pb
->zstream
, GIT_ZSTREAM_DEFLATE
) < 0 ||
159 git_repository_odb(&pb
->odb
, repo
) < 0 ||
160 packbuilder_config(pb
) < 0)
165 if (git_mutex_init(&pb
->cache_mutex
) ||
166 git_mutex_init(&pb
->progress_mutex
) ||
167 git_cond_init(&pb
->progress_cond
))
169 git_error_set(GIT_ERROR_OS
, "failed to initialize packbuilder mutex");
179 git_packbuilder_free(pb
);
183 unsigned int git_packbuilder_set_threads(git_packbuilder
*pb
, unsigned int n
)
191 assert(1 == pb
->nr_threads
);
194 return pb
->nr_threads
;
197 static void rehash(git_packbuilder
*pb
)
203 git_oidmap_clear(pb
->object_ix
);
204 for (i
= 0, po
= pb
->object_list
; i
< pb
->nr_objects
; i
++, po
++) {
205 pos
= git_oidmap_put(pb
->object_ix
, &po
->id
, &ret
);
206 git_oidmap_set_value_at(pb
->object_ix
, pos
, po
);
210 int git_packbuilder_insert(git_packbuilder
*pb
, const git_oid
*oid
,
219 /* If the object already exists in the hash table, then we don't
220 * have any work to do */
221 if (git_oidmap_exists(pb
->object_ix
, oid
))
224 if (pb
->nr_objects
>= pb
->nr_alloc
) {
225 GIT_ERROR_CHECK_ALLOC_ADD(&newsize
, pb
->nr_alloc
, 1024);
226 GIT_ERROR_CHECK_ALLOC_MULTIPLY(&newsize
, newsize
, 3 / 2);
228 if (!git__is_uint32(newsize
)) {
229 git_error_set(GIT_ERROR_NOMEMORY
, "packfile too large to fit in memory.");
233 pb
->nr_alloc
= newsize
;
235 pb
->object_list
= git__reallocarray(pb
->object_list
,
236 pb
->nr_alloc
, sizeof(*po
));
237 GIT_ERROR_CHECK_ALLOC(pb
->object_list
);
241 po
= pb
->object_list
+ pb
->nr_objects
;
242 memset(po
, 0x0, sizeof(*po
));
244 if ((ret
= git_odb_read_header(&po
->size
, &po
->type
, pb
->odb
, oid
)) < 0)
248 git_oid_cpy(&po
->id
, oid
);
249 po
->hash
= name_hash(name
);
251 pos
= git_oidmap_put(pb
->object_ix
, &po
->id
, &ret
);
257 git_oidmap_set_value_at(pb
->object_ix
, pos
, po
);
261 if (pb
->progress_cb
) {
262 double current_time
= git__timer();
263 double elapsed
= current_time
- pb
->last_progress_report_time
;
265 if (elapsed
>= MIN_PROGRESS_UPDATE_INTERVAL
) {
266 pb
->last_progress_report_time
= current_time
;
268 ret
= pb
->progress_cb(
269 GIT_PACKBUILDER_ADDING_OBJECTS
,
270 pb
->nr_objects
, 0, pb
->progress_cb_payload
);
273 return git_error_set_after_callback(ret
);
280 static int get_delta(void **out
, git_odb
*odb
, git_pobject
*po
)
282 git_odb_object
*src
= NULL
, *trg
= NULL
;
289 if (git_odb_read(&src
, odb
, &po
->delta
->id
) < 0 ||
290 git_odb_read(&trg
, odb
, &po
->id
) < 0)
293 error
= git_delta(&delta_buf
, &delta_size
,
294 git_odb_object_data(src
), git_odb_object_size(src
),
295 git_odb_object_data(trg
), git_odb_object_size(trg
),
298 if (error
< 0 && error
!= GIT_EBUFS
)
301 if (error
== GIT_EBUFS
|| delta_size
!= po
->delta_size
) {
302 git_error_set(GIT_ERROR_INVALID
, "delta size changed");
308 git_odb_object_free(src
);
309 git_odb_object_free(trg
);
313 git_odb_object_free(src
);
314 git_odb_object_free(trg
);
318 static int write_object(
321 int (*write_cb
)(void *buf
, size_t size
, void *cb_data
),
324 git_odb_object
*obj
= NULL
;
326 unsigned char hdr
[10], *zbuf
= NULL
;
328 size_t hdr_len
, zbuf_len
= COMPRESS_BUFLEN
, data_len
;
332 * If we have a delta base, let's use the delta to save space.
333 * Otherwise load the whole object. 'data' ends up pointing to
334 * whatever data we want to put into the packfile.
338 data
= po
->delta_data
;
339 else if ((error
= get_delta(&data
, pb
->odb
, po
)) < 0)
342 data_len
= po
->delta_size
;
343 type
= GIT_OBJECT_REF_DELTA
;
345 if ((error
= git_odb_read(&obj
, pb
->odb
, &po
->id
)) < 0)
348 data
= (void *)git_odb_object_data(obj
);
349 data_len
= git_odb_object_size(obj
);
350 type
= git_odb_object_type(obj
);
354 hdr_len
= git_packfile__object_header(hdr
, data_len
, type
);
356 if ((error
= write_cb(hdr
, hdr_len
, cb_data
)) < 0 ||
357 (error
= git_hash_update(&pb
->ctx
, hdr
, hdr_len
)) < 0)
360 if (type
== GIT_OBJECT_REF_DELTA
) {
361 if ((error
= write_cb(po
->delta
->id
.id
, GIT_OID_RAWSZ
, cb_data
)) < 0 ||
362 (error
= git_hash_update(&pb
->ctx
, po
->delta
->id
.id
, GIT_OID_RAWSZ
)) < 0)
367 if (po
->z_delta_size
) {
368 data_len
= po
->z_delta_size
;
370 if ((error
= write_cb(data
, data_len
, cb_data
)) < 0 ||
371 (error
= git_hash_update(&pb
->ctx
, data
, data_len
)) < 0)
374 zbuf
= git__malloc(zbuf_len
);
375 GIT_ERROR_CHECK_ALLOC(zbuf
);
377 git_zstream_reset(&pb
->zstream
);
378 git_zstream_set_input(&pb
->zstream
, data
, data_len
);
380 while (!git_zstream_done(&pb
->zstream
)) {
381 if ((error
= git_zstream_get_output(zbuf
, &zbuf_len
, &pb
->zstream
)) < 0 ||
382 (error
= write_cb(zbuf
, zbuf_len
, cb_data
)) < 0 ||
383 (error
= git_hash_update(&pb
->ctx
, zbuf
, zbuf_len
)) < 0)
386 zbuf_len
= COMPRESS_BUFLEN
; /* reuse buffer */
391 * If po->delta is true, data is a delta and it is our
392 * responsibility to free it (otherwise it's a git_object's
393 * data). We set po->delta_data to NULL in case we got the
394 * data from there instead of get_delta(). If we didn't,
399 po
->delta_data
= NULL
;
406 git_odb_object_free(obj
);
410 enum write_one_status
{
411 WRITE_ONE_SKIP
= -1, /* already written */
412 WRITE_ONE_BREAK
= 0, /* writing this will bust the limit; not written */
413 WRITE_ONE_WRITTEN
= 1, /* normal */
414 WRITE_ONE_RECURSIVE
= 2 /* already scheduled to be written */
417 static int write_one(
418 enum write_one_status
*status
,
421 int (*write_cb
)(void *buf
, size_t size
, void *cb_data
),
427 *status
= WRITE_ONE_RECURSIVE
;
429 } else if (po
->written
) {
430 *status
= WRITE_ONE_SKIP
;
437 if ((error
= write_one(status
, pb
, po
->delta
, write_cb
, cb_data
)) < 0)
440 /* we cannot depend on this one */
441 if (*status
== WRITE_ONE_RECURSIVE
)
445 *status
= WRITE_ONE_WRITTEN
;
449 return write_object(pb
, po
, write_cb
, cb_data
);
452 GIT_INLINE(void) add_to_write_order(git_pobject
**wo
, size_t *endp
,
461 static void add_descendants_to_write_order(git_pobject
**wo
, size_t *endp
,
464 int add_to_order
= 1;
468 /* add this node... */
469 add_to_write_order(wo
, endp
, po
);
470 /* all its siblings... */
471 for (s
= po
->delta_sibling
; s
; s
= s
->delta_sibling
) {
472 add_to_write_order(wo
, endp
, s
);
475 /* drop down a level to add left subtree nodes if possible */
476 if (po
->delta_child
) {
478 po
= po
->delta_child
;
481 /* our sibling might have some children, it is next */
482 if (po
->delta_sibling
) {
483 po
= po
->delta_sibling
;
486 /* go back to our parent node */
488 while (po
&& !po
->delta_sibling
) {
489 /* we're on the right side of a subtree, keep
490 * going up until we can go right again */
494 /* done- we hit our original root node */
497 /* pass it off to sibling at this level */
498 po
= po
->delta_sibling
;
503 static void add_family_to_write_order(git_pobject
**wo
, size_t *endp
,
508 for (root
= po
; root
->delta
; root
= root
->delta
)
510 add_descendants_to_write_order(wo
, endp
, root
);
513 static int cb_tag_foreach(const char *name
, git_oid
*oid
, void *data
)
515 git_packbuilder
*pb
= data
;
521 pos
= git_oidmap_lookup_index(pb
->object_ix
, oid
);
522 if (!git_oidmap_valid_index(pb
->object_ix
, pos
))
525 po
= git_oidmap_value_at(pb
->object_ix
, pos
);
528 /* TODO: peel objects */
533 static git_pobject
**compute_write_order(git_packbuilder
*pb
)
535 size_t i
, wo_end
, last_untagged
;
538 if ((wo
= git__mallocarray(pb
->nr_objects
, sizeof(*wo
))) == NULL
)
541 for (i
= 0; i
< pb
->nr_objects
; i
++) {
542 git_pobject
*po
= pb
->object_list
+ i
;
545 po
->delta_child
= NULL
;
546 po
->delta_sibling
= NULL
;
550 * Fully connect delta_child/delta_sibling network.
551 * Make sure delta_sibling is sorted in the original
554 for (i
= pb
->nr_objects
; i
> 0;) {
555 git_pobject
*po
= &pb
->object_list
[--i
];
558 /* Mark me as the first child */
559 po
->delta_sibling
= po
->delta
->delta_child
;
560 po
->delta
->delta_child
= po
;
564 * Mark objects that are at the tip of tags.
566 if (git_tag_foreach(pb
->repo
, &cb_tag_foreach
, pb
) < 0) {
572 * Give the objects in the original recency order until
573 * we see a tagged tip.
575 for (i
= wo_end
= 0; i
< pb
->nr_objects
; i
++) {
576 git_pobject
*po
= pb
->object_list
+ i
;
579 add_to_write_order(wo
, &wo_end
, po
);
584 * Then fill all the tagged tips.
586 for (; i
< pb
->nr_objects
; i
++) {
587 git_pobject
*po
= pb
->object_list
+ i
;
589 add_to_write_order(wo
, &wo_end
, po
);
593 * And then all remaining commits and tags.
595 for (i
= last_untagged
; i
< pb
->nr_objects
; i
++) {
596 git_pobject
*po
= pb
->object_list
+ i
;
597 if (po
->type
!= GIT_OBJECT_COMMIT
&&
598 po
->type
!= GIT_OBJECT_TAG
)
600 add_to_write_order(wo
, &wo_end
, po
);
604 * And then all the trees.
606 for (i
= last_untagged
; i
< pb
->nr_objects
; i
++) {
607 git_pobject
*po
= pb
->object_list
+ i
;
608 if (po
->type
!= GIT_OBJECT_TREE
)
610 add_to_write_order(wo
, &wo_end
, po
);
614 * Finally all the rest in really tight order
616 for (i
= last_untagged
; i
< pb
->nr_objects
; i
++) {
617 git_pobject
*po
= pb
->object_list
+ i
;
619 add_family_to_write_order(wo
, &wo_end
, po
);
622 if (wo_end
!= pb
->nr_objects
) {
624 git_error_set(GIT_ERROR_INVALID
, "invalid write order");
631 static int write_pack(git_packbuilder
*pb
,
632 int (*write_cb
)(void *buf
, size_t size
, void *cb_data
),
635 git_pobject
**write_order
;
637 enum write_one_status status
;
638 struct git_pack_header ph
;
643 write_order
= compute_write_order(pb
);
644 if (write_order
== NULL
)
647 if (!git__is_uint32(pb
->nr_objects
)) {
648 git_error_set(GIT_ERROR_INVALID
, "too many objects");
652 /* Write pack header */
653 ph
.hdr_signature
= htonl(PACK_SIGNATURE
);
654 ph
.hdr_version
= htonl(PACK_VERSION
);
655 ph
.hdr_entries
= htonl(pb
->nr_objects
);
657 if ((error
= write_cb(&ph
, sizeof(ph
), cb_data
)) < 0 ||
658 (error
= git_hash_update(&pb
->ctx
, &ph
, sizeof(ph
))) < 0)
661 pb
->nr_remaining
= pb
->nr_objects
;
664 for ( ; i
< pb
->nr_objects
; ++i
) {
667 if ((error
= write_one(&status
, pb
, po
, write_cb
, cb_data
)) < 0)
671 pb
->nr_remaining
-= pb
->nr_written
;
672 } while (pb
->nr_remaining
&& i
< pb
->nr_objects
);
674 if ((error
= git_hash_final(&entry_oid
, &pb
->ctx
)) < 0)
677 error
= write_cb(entry_oid
.id
, GIT_OID_RAWSZ
, cb_data
);
680 /* if callback cancelled writing, we must still free delta_data */
681 for ( ; i
< pb
->nr_objects
; ++i
) {
683 if (po
->delta_data
) {
684 git__free(po
->delta_data
);
685 po
->delta_data
= NULL
;
689 git__free(write_order
);
693 static int write_pack_buf(void *buf
, size_t size
, void *data
)
695 git_buf
*b
= (git_buf
*)data
;
696 return git_buf_put(b
, buf
, size
);
699 static int type_size_sort(const void *_a
, const void *_b
)
701 const git_pobject
*a
= (git_pobject
*)_a
;
702 const git_pobject
*b
= (git_pobject
*)_b
;
704 if (a
->type
> b
->type
)
706 if (a
->type
< b
->type
)
708 if (a
->hash
> b
->hash
)
710 if (a
->hash
< b
->hash
)
715 if (a->preferred_base > b->preferred_base)
717 if (a->preferred_base < b->preferred_base)
720 if (a
->size
> b
->size
)
722 if (a
->size
< b
->size
)
724 return a
< b
? -1 : (a
> b
); /* newest first */
727 static int delta_cacheable(
735 if (git__add_sizet_overflow(&new_size
, pb
->delta_cache_size
, delta_size
))
738 if (pb
->max_delta_cache_size
&& new_size
> pb
->max_delta_cache_size
)
741 if (delta_size
< pb
->cache_max_small_delta_size
)
744 /* cache delta, if objects are large enough compared to delta size */
745 if ((src_size
>> 20) + (trg_size
>> 21) > (delta_size
>> 10))
751 static int try_delta(git_packbuilder
*pb
, struct unpacked
*trg
,
752 struct unpacked
*src
, size_t max_depth
,
753 size_t *mem_usage
, int *ret
)
755 git_pobject
*trg_object
= trg
->object
;
756 git_pobject
*src_object
= src
->object
;
758 size_t trg_size
, src_size
, delta_size
, sizediff
, max_size
, sz
;
762 /* Don't bother doing diffs between different types */
763 if (trg_object
->type
!= src_object
->type
) {
770 /* TODO: support reuse-delta */
772 /* Let's not bust the allowed depth. */
773 if (src
->depth
>= max_depth
)
776 /* Now some size filtering heuristics. */
777 trg_size
= trg_object
->size
;
778 if (!trg_object
->delta
) {
779 max_size
= trg_size
/2 - 20;
782 max_size
= trg_object
->delta_size
;
783 ref_depth
= trg
->depth
;
786 max_size
= (uint64_t)max_size
* (max_depth
- src
->depth
) /
787 (max_depth
- ref_depth
+ 1);
791 src_size
= src_object
->size
;
792 sizediff
= src_size
< trg_size
? trg_size
- src_size
: 0;
793 if (sizediff
>= max_size
)
795 if (trg_size
< src_size
/ 32)
798 /* Load data if not already done */
800 if (git_odb_read(&obj
, pb
->odb
, &trg_object
->id
) < 0)
803 sz
= git_odb_object_size(obj
);
804 trg
->data
= git__malloc(sz
);
805 GIT_ERROR_CHECK_ALLOC(trg
->data
);
806 memcpy(trg
->data
, git_odb_object_data(obj
), sz
);
808 git_odb_object_free(obj
);
810 if (sz
!= trg_size
) {
811 git_error_set(GIT_ERROR_INVALID
,
812 "inconsistent target object length");
821 if (git_odb_read(&obj
, pb
->odb
, &src_object
->id
) < 0 ||
822 !git__is_ulong(obj_sz
= git_odb_object_size(obj
)))
826 src
->data
= git__malloc(sz
);
827 GIT_ERROR_CHECK_ALLOC(src
->data
);
828 memcpy(src
->data
, git_odb_object_data(obj
), sz
);
830 git_odb_object_free(obj
);
832 if (sz
!= src_size
) {
833 git_error_set(GIT_ERROR_INVALID
,
834 "inconsistent source object length");
841 if (git_delta_index_init(&src
->index
, src
->data
, src_size
) < 0)
842 return 0; /* suboptimal pack - out of memory */
844 *mem_usage
+= git_delta_index_size(src
->index
);
847 if (git_delta_create_from_index(&delta_buf
, &delta_size
, src
->index
, trg
->data
, trg_size
,
851 if (trg_object
->delta
) {
852 /* Prefer only shallower same-sized deltas. */
853 if (delta_size
== trg_object
->delta_size
&&
854 src
->depth
+ 1 >= trg
->depth
) {
855 git__free(delta_buf
);
860 git_packbuilder__cache_lock(pb
);
861 if (trg_object
->delta_data
) {
862 git__free(trg_object
->delta_data
);
863 assert(pb
->delta_cache_size
>= trg_object
->delta_size
);
864 pb
->delta_cache_size
-= trg_object
->delta_size
;
865 trg_object
->delta_data
= NULL
;
867 if (delta_cacheable(pb
, src_size
, trg_size
, delta_size
)) {
868 bool overflow
= git__add_sizet_overflow(
869 &pb
->delta_cache_size
, pb
->delta_cache_size
, delta_size
);
871 git_packbuilder__cache_unlock(pb
);
874 git__free(delta_buf
);
878 trg_object
->delta_data
= git__realloc(delta_buf
, delta_size
);
879 GIT_ERROR_CHECK_ALLOC(trg_object
->delta_data
);
881 /* create delta when writing the pack */
882 git_packbuilder__cache_unlock(pb
);
883 git__free(delta_buf
);
886 trg_object
->delta
= src_object
;
887 trg_object
->delta_size
= delta_size
;
888 trg
->depth
= src
->depth
+ 1;
894 static size_t check_delta_limit(git_pobject
*me
, size_t n
)
896 git_pobject
*child
= me
->delta_child
;
900 size_t c
= check_delta_limit(child
, n
+ 1);
903 child
= child
->delta_sibling
;
908 static size_t free_unpacked(struct unpacked
*n
)
910 size_t freed_mem
= 0;
913 freed_mem
+= git_delta_index_size(n
->index
);
914 git_delta_index_free(n
->index
);
919 freed_mem
+= n
->object
->size
;
928 static int report_delta_progress(
929 git_packbuilder
*pb
, uint32_t count
, bool force
)
933 if (pb
->progress_cb
) {
934 double current_time
= git__timer();
935 double elapsed
= current_time
- pb
->last_progress_report_time
;
937 if (force
|| elapsed
>= MIN_PROGRESS_UPDATE_INTERVAL
) {
938 pb
->last_progress_report_time
= current_time
;
940 ret
= pb
->progress_cb(
941 GIT_PACKBUILDER_DELTAFICATION
,
942 count
, pb
->nr_objects
, pb
->progress_cb_payload
);
945 return git_error_set_after_callback(ret
);
952 static int find_deltas(git_packbuilder
*pb
, git_pobject
**list
,
953 size_t *list_size
, size_t window
, size_t depth
)
956 git_buf zbuf
= GIT_BUF_INIT
;
957 struct unpacked
*array
;
958 size_t idx
= 0, count
= 0;
959 size_t mem_usage
= 0;
963 array
= git__calloc(window
, sizeof(struct unpacked
));
964 GIT_ERROR_CHECK_ALLOC(array
);
967 struct unpacked
*n
= array
+ idx
;
968 size_t max_depth
, j
, best_base
= SIZE_MAX
;
970 git_packbuilder__progress_lock(pb
);
972 git_packbuilder__progress_unlock(pb
);
976 pb
->nr_deltified
+= 1;
977 report_delta_progress(pb
, pb
->nr_deltified
, false);
981 git_packbuilder__progress_unlock(pb
);
983 mem_usage
-= free_unpacked(n
);
986 while (pb
->window_memory_limit
&&
987 mem_usage
> pb
->window_memory_limit
&&
989 size_t tail
= (idx
+ window
- count
) % window
;
990 mem_usage
-= free_unpacked(array
+ tail
);
995 * If the current object is at pack edge, take the depth the
996 * objects that depend on the current object into account
997 * otherwise they would become too deep.
1000 if (po
->delta_child
) {
1001 size_t delta_limit
= check_delta_limit(po
, 0);
1003 if (delta_limit
> max_depth
)
1006 max_depth
-= delta_limit
;
1012 size_t other_idx
= idx
+ j
;
1015 if (other_idx
>= window
)
1016 other_idx
-= window
;
1018 m
= array
+ other_idx
;
1022 if (try_delta(pb
, n
, m
, max_depth
, &mem_usage
, &ret
) < 0)
1027 best_base
= other_idx
;
1031 * If we decided to cache the delta data, then it is best
1032 * to compress it right away. First because we have to do
1033 * it anyway, and doing it here while we're threaded will
1034 * save a lot of time in the non threaded write phase,
1035 * as well as allow for caching more deltas within
1036 * the same cache size limit.
1038 * But only if not writing to stdout, since in that case
1039 * the network is most likely throttling writes anyway,
1040 * and therefore it is best to go to the write phase ASAP
1041 * instead, as we can afford spending more time compressing
1042 * between writes at that moment.
1044 if (po
->delta_data
) {
1045 if (git_zstream_deflatebuf(&zbuf
, po
->delta_data
, po
->delta_size
) < 0)
1048 git__free(po
->delta_data
);
1049 po
->delta_data
= git__malloc(zbuf
.size
);
1050 GIT_ERROR_CHECK_ALLOC(po
->delta_data
);
1052 memcpy(po
->delta_data
, zbuf
.ptr
, zbuf
.size
);
1053 po
->z_delta_size
= zbuf
.size
;
1054 git_buf_clear(&zbuf
);
1056 git_packbuilder__cache_lock(pb
);
1057 pb
->delta_cache_size
-= po
->delta_size
;
1058 pb
->delta_cache_size
+= po
->z_delta_size
;
1059 git_packbuilder__cache_unlock(pb
);
1063 * If we made n a delta, and if n is already at max
1064 * depth, leaving it in the window is pointless. we
1065 * should evict it first.
1067 if (po
->delta
&& max_depth
<= n
->depth
)
1071 * Move the best delta base up in the window, after the
1072 * currently deltified object, to keep it longer. It will
1073 * be the first base object to be attempted next.
1076 struct unpacked swap
= array
[best_base
];
1077 size_t dist
= (window
+ idx
- best_base
) % window
;
1078 size_t dst
= best_base
;
1080 size_t src
= (dst
+ 1) % window
;
1081 array
[dst
] = array
[src
];
1089 if (count
+ 1 < window
)
1097 for (i
= 0; i
< window
; ++i
) {
1098 git__free(array
[i
].index
);
1099 git__free(array
[i
].data
);
1102 git_buf_dispose(&zbuf
);
1109 struct thread_params
{
1111 git_packbuilder
*pb
;
1127 static void *threaded_find_deltas(void *arg
)
1129 struct thread_params
*me
= arg
;
1131 while (me
->remaining
) {
1132 if (find_deltas(me
->pb
, me
->list
, &me
->remaining
,
1133 me
->window
, me
->depth
) < 0) {
1137 git_packbuilder__progress_lock(me
->pb
);
1139 git_cond_signal(&me
->pb
->progress_cond
);
1140 git_packbuilder__progress_unlock(me
->pb
);
1142 if (git_mutex_lock(&me
->mutex
)) {
1143 git_error_set(GIT_ERROR_THREAD
, "unable to lock packfile condition mutex");
1147 while (!me
->data_ready
)
1148 git_cond_wait(&me
->cond
, &me
->mutex
);
1151 * We must not set ->data_ready before we wait on the
1152 * condition because the main thread may have set it to 1
1153 * before we get here. In order to be sure that new
1154 * work is available if we see 1 in ->data_ready, it
1155 * was initialized to 0 before this thread was spawned
1156 * and we reset it to 0 right away.
1159 git_mutex_unlock(&me
->mutex
);
1161 /* leave ->working 1 so that this doesn't get more work assigned */
1165 static int ll_find_deltas(git_packbuilder
*pb
, git_pobject
**list
,
1166 size_t list_size
, size_t window
, size_t depth
)
1168 struct thread_params
*p
;
1170 int ret
, active_threads
= 0;
1172 if (!pb
->nr_threads
)
1173 pb
->nr_threads
= git_online_cpus();
1175 if (pb
->nr_threads
<= 1) {
1176 find_deltas(pb
, list
, &list_size
, window
, depth
);
1180 p
= git__mallocarray(pb
->nr_threads
, sizeof(*p
));
1181 GIT_ERROR_CHECK_ALLOC(p
);
1183 /* Partition the work among the threads */
1184 for (i
= 0; i
< pb
->nr_threads
; ++i
) {
1185 size_t sub_size
= list_size
/ (pb
->nr_threads
- i
);
1187 /* don't use too small segments or no deltas will be found */
1188 if (sub_size
< 2*window
&& i
+1 < pb
->nr_threads
)
1192 p
[i
].window
= window
;
1195 p
[i
].data_ready
= 0;
1197 /* try to split chunks on "path" boundaries */
1198 while (sub_size
&& sub_size
< list_size
&&
1199 list
[sub_size
]->hash
&&
1200 list
[sub_size
]->hash
== list
[sub_size
-1]->hash
)
1204 p
[i
].list_size
= sub_size
;
1205 p
[i
].remaining
= sub_size
;
1208 list_size
-= sub_size
;
1211 /* Start work threads */
1212 for (i
= 0; i
< pb
->nr_threads
; ++i
) {
1213 if (!p
[i
].list_size
)
1216 git_mutex_init(&p
[i
].mutex
);
1217 git_cond_init(&p
[i
].cond
);
1219 ret
= git_thread_create(&p
[i
].thread
,
1220 threaded_find_deltas
, &p
[i
]);
1222 git_error_set(GIT_ERROR_THREAD
, "unable to create thread");
1229 * Now let's wait for work completion. Each time a thread is done
1230 * with its work, we steal half of the remaining work from the
1231 * thread with the largest number of unprocessed objects and give
1232 * it to that newly idle thread. This ensure good load balancing
1233 * until the remaining object list segments are simply too short
1234 * to be worth splitting anymore.
1236 while (active_threads
) {
1237 struct thread_params
*target
= NULL
;
1238 struct thread_params
*victim
= NULL
;
1239 size_t sub_size
= 0;
1241 /* Start by locating a thread that has transitioned its
1242 * 'working' flag from 1 -> 0. This indicates that it is
1243 * ready to receive more work using our work-stealing
1245 git_packbuilder__progress_lock(pb
);
1247 for (i
= 0; !target
&& i
< pb
->nr_threads
; i
++)
1252 git_cond_wait(&pb
->progress_cond
, &pb
->progress_mutex
);
1255 /* At this point we hold the progress lock and have located
1256 * a thread to receive more work. We still need to locate a
1257 * thread from which to steal work (the victim). */
1258 for (i
= 0; i
< pb
->nr_threads
; i
++)
1259 if (p
[i
].remaining
> 2*window
&&
1260 (!victim
|| victim
->remaining
< p
[i
].remaining
))
1264 sub_size
= victim
->remaining
/ 2;
1265 list
= victim
->list
+ victim
->list_size
- sub_size
;
1266 while (sub_size
&& list
[0]->hash
&&
1267 list
[0]->hash
== list
[-1]->hash
) {
1273 * It is possible for some "paths" to have
1274 * so many objects that no hash boundary
1275 * might be found. Let's just steal the
1276 * exact half in that case.
1278 sub_size
= victim
->remaining
/ 2;
1281 target
->list
= list
;
1282 victim
->list_size
-= sub_size
;
1283 victim
->remaining
-= sub_size
;
1285 target
->list_size
= sub_size
;
1286 target
->remaining
= sub_size
;
1287 target
->working
= 1;
1288 git_packbuilder__progress_unlock(pb
);
1290 if (git_mutex_lock(&target
->mutex
)) {
1291 git_error_set(GIT_ERROR_THREAD
, "unable to lock packfile condition mutex");
1296 target
->data_ready
= 1;
1297 git_cond_signal(&target
->cond
);
1298 git_mutex_unlock(&target
->mutex
);
1301 git_thread_join(&target
->thread
, NULL
);
1302 git_cond_free(&target
->cond
);
1303 git_mutex_free(&target
->mutex
);
1313 #define ll_find_deltas(pb, l, ls, w, d) find_deltas(pb, l, &ls, w, d)
1316 static int prepare_pack(git_packbuilder
*pb
)
1318 git_pobject
**delta_list
;
1321 if (pb
->nr_objects
== 0 || pb
->done
)
1322 return 0; /* nothing to do */
1325 * Although we do not report progress during deltafication, we
1326 * at least report that we are in the deltafication stage
1328 if (pb
->progress_cb
)
1329 pb
->progress_cb(GIT_PACKBUILDER_DELTAFICATION
, 0, pb
->nr_objects
, pb
->progress_cb_payload
);
1331 delta_list
= git__mallocarray(pb
->nr_objects
, sizeof(*delta_list
));
1332 GIT_ERROR_CHECK_ALLOC(delta_list
);
1334 for (i
= 0; i
< pb
->nr_objects
; ++i
) {
1335 git_pobject
*po
= pb
->object_list
+ i
;
1337 /* Make sure the item is within our size limits */
1338 if (po
->size
< 50 || po
->size
> pb
->big_file_threshold
)
1341 delta_list
[n
++] = po
;
1345 git__tsort((void **)delta_list
, n
, type_size_sort
);
1346 if (ll_find_deltas(pb
, delta_list
, n
,
1347 GIT_PACK_WINDOW
+ 1,
1348 GIT_PACK_DEPTH
) < 0) {
1349 git__free(delta_list
);
1354 report_delta_progress(pb
, pb
->nr_objects
, true);
1357 git__free(delta_list
);
1361 #define PREPARE_PACK if (prepare_pack(pb) < 0) { return -1; }
1363 int git_packbuilder_foreach(git_packbuilder
*pb
, int (*cb
)(void *buf
, size_t size
, void *payload
), void *payload
)
1366 return write_pack(pb
, cb
, payload
);
1369 int git_packbuilder_write_buf(git_buf
*buf
, git_packbuilder
*pb
)
1372 git_buf_sanitize(buf
);
1373 return write_pack(pb
, &write_pack_buf
, buf
);
1376 static int write_cb(void *buf
, size_t len
, void *payload
)
1378 struct pack_write_context
*ctx
= payload
;
1379 return git_indexer_append(ctx
->indexer
, buf
, len
, ctx
->stats
);
1382 int git_packbuilder_write(
1383 git_packbuilder
*pb
,
1386 git_transfer_progress_cb progress_cb
,
1387 void *progress_cb_payload
)
1389 git_indexer_options opts
= GIT_INDEXER_OPTIONS_INIT
;
1390 git_indexer
*indexer
;
1391 git_transfer_progress stats
;
1392 struct pack_write_context ctx
;
1397 opts
.progress_cb
= progress_cb
;
1398 opts
.progress_cb_payload
= progress_cb_payload
;
1400 if (git_indexer_new(
1401 &indexer
, path
, mode
, pb
->odb
, &opts
) < 0)
1404 if (!git_repository__cvar(&t
, pb
->repo
, GIT_CVAR_FSYNCOBJECTFILES
) && t
)
1405 git_indexer__set_fsync(indexer
, 1);
1407 ctx
.indexer
= indexer
;
1410 if (git_packbuilder_foreach(pb
, write_cb
, &ctx
) < 0 ||
1411 git_indexer_commit(indexer
, &stats
) < 0) {
1412 git_indexer_free(indexer
);
1416 git_oid_cpy(&pb
->pack_oid
, git_indexer_hash(indexer
));
1418 git_indexer_free(indexer
);
1424 const git_oid
*git_packbuilder_hash(git_packbuilder
*pb
)
1426 return &pb
->pack_oid
;
1430 static int cb_tree_walk(
1431 const char *root
, const git_tree_entry
*entry
, void *payload
)
1434 struct tree_walk_context
*ctx
= payload
;
1436 /* A commit inside a tree represents a submodule commit and should be skipped. */
1437 if (git_tree_entry_type(entry
) == GIT_OBJECT_COMMIT
)
1440 if (!(error
= git_buf_sets(&ctx
->buf
, root
)) &&
1441 !(error
= git_buf_puts(&ctx
->buf
, git_tree_entry_name(entry
))))
1442 error
= git_packbuilder_insert(
1443 ctx
->pb
, git_tree_entry_id(entry
), git_buf_cstr(&ctx
->buf
));
1448 int git_packbuilder_insert_commit(git_packbuilder
*pb
, const git_oid
*oid
)
1452 if (git_commit_lookup(&commit
, pb
->repo
, oid
) < 0 ||
1453 git_packbuilder_insert(pb
, oid
, NULL
) < 0)
1456 if (git_packbuilder_insert_tree(pb
, git_commit_tree_id(commit
)) < 0)
1459 git_commit_free(commit
);
1463 int git_packbuilder_insert_tree(git_packbuilder
*pb
, const git_oid
*oid
)
1466 git_tree
*tree
= NULL
;
1467 struct tree_walk_context context
= { pb
, GIT_BUF_INIT
};
1469 if (!(error
= git_tree_lookup(&tree
, pb
->repo
, oid
)) &&
1470 !(error
= git_packbuilder_insert(pb
, oid
, NULL
)))
1471 error
= git_tree_walk(tree
, GIT_TREEWALK_PRE
, cb_tree_walk
, &context
);
1473 git_tree_free(tree
);
1474 git_buf_dispose(&context
.buf
);
1478 int git_packbuilder_insert_recur(git_packbuilder
*pb
, const git_oid
*id
, const char *name
)
1485 if ((error
= git_object_lookup(&obj
, pb
->repo
, id
, GIT_OBJECT_ANY
)) < 0)
1488 switch (git_object_type(obj
)) {
1489 case GIT_OBJECT_BLOB
:
1490 error
= git_packbuilder_insert(pb
, id
, name
);
1492 case GIT_OBJECT_TREE
:
1493 error
= git_packbuilder_insert_tree(pb
, id
);
1495 case GIT_OBJECT_COMMIT
:
1496 error
= git_packbuilder_insert_commit(pb
, id
);
1498 case GIT_OBJECT_TAG
:
1499 if ((error
= git_packbuilder_insert(pb
, id
, name
)) < 0)
1501 error
= git_packbuilder_insert_recur(pb
, git_tag_target_id((git_tag
*) obj
), NULL
);
1505 git_error_set(GIT_ERROR_INVALID
, "unknown object type");
1510 git_object_free(obj
);
1514 size_t git_packbuilder_object_count(git_packbuilder
*pb
)
1516 return pb
->nr_objects
;
1519 size_t git_packbuilder_written(git_packbuilder
*pb
)
1521 return pb
->nr_written
;
1524 static int lookup_walk_object(struct walk_object
**out
, git_packbuilder
*pb
, const git_oid
*id
)
1526 struct walk_object
*obj
;
1528 obj
= git_pool_mallocz(&pb
->object_pool
, 1);
1530 git_error_set_oom();
1534 git_oid_cpy(&obj
->id
, id
);
1540 static int retrieve_object(struct walk_object
**out
, git_packbuilder
*pb
, const git_oid
*id
)
1544 struct walk_object
*obj
;
1546 pos
= git_oidmap_lookup_index(pb
->walk_objects
, id
);
1547 if (git_oidmap_valid_index(pb
->walk_objects
, pos
)) {
1548 obj
= git_oidmap_value_at(pb
->walk_objects
, pos
);
1550 if ((error
= lookup_walk_object(&obj
, pb
, id
)) < 0)
1553 git_oidmap_insert(pb
->walk_objects
, &obj
->id
, obj
, &error
);
1560 static int mark_blob_uninteresting(git_packbuilder
*pb
, const git_oid
*id
)
1563 struct walk_object
*obj
;
1565 if ((error
= retrieve_object(&obj
, pb
, id
)) < 0)
1568 obj
->uninteresting
= 1;
1573 static int mark_tree_uninteresting(git_packbuilder
*pb
, const git_oid
*id
)
1575 struct walk_object
*obj
;
1580 if ((error
= retrieve_object(&obj
, pb
, id
)) < 0)
1583 if (obj
->uninteresting
)
1586 obj
->uninteresting
= 1;
1588 if ((error
= git_tree_lookup(&tree
, pb
->repo
, id
)) < 0)
1591 for (i
= 0; i
< git_tree_entrycount(tree
); i
++) {
1592 const git_tree_entry
*entry
= git_tree_entry_byindex(tree
, i
);
1593 const git_oid
*entry_id
= git_tree_entry_id(entry
);
1594 switch (git_tree_entry_type(entry
)) {
1595 case GIT_OBJECT_TREE
:
1596 if ((error
= mark_tree_uninteresting(pb
, entry_id
)) < 0)
1599 case GIT_OBJECT_BLOB
:
1600 if ((error
= mark_blob_uninteresting(pb
, entry_id
)) < 0)
1604 /* it's a submodule or something unknown, we don't want it */
1610 git_tree_free(tree
);
1615 * Mark the edges of the graph uninteresting. Since we start from a
1616 * git_revwalk, the commits are already uninteresting, but we need to
1617 * mark the trees and blobs.
1619 static int mark_edges_uninteresting(git_packbuilder
*pb
, git_commit_list
*commits
)
1622 git_commit_list
*list
;
1625 for (list
= commits
; list
; list
= list
->next
) {
1626 if (!list
->item
->uninteresting
)
1629 if ((error
= git_commit_lookup(&commit
, pb
->repo
, &list
->item
->oid
)) < 0)
1632 error
= mark_tree_uninteresting(pb
, git_commit_tree_id(commit
));
1633 git_commit_free(commit
);
1642 int insert_tree(git_packbuilder
*pb
, git_tree
*tree
)
1647 struct walk_object
*obj
;
1650 if ((error
= retrieve_object(&obj
, pb
, git_tree_id(tree
))) < 0)
1653 if (obj
->seen
|| obj
->uninteresting
)
1658 if ((error
= git_packbuilder_insert(pb
, &obj
->id
, NULL
)))
1661 for (i
= 0; i
< git_tree_entrycount(tree
); i
++) {
1662 const git_tree_entry
*entry
= git_tree_entry_byindex(tree
, i
);
1663 const git_oid
*entry_id
= git_tree_entry_id(entry
);
1664 switch (git_tree_entry_type(entry
)) {
1665 case GIT_OBJECT_TREE
:
1666 if ((error
= git_tree_lookup(&subtree
, pb
->repo
, entry_id
)) < 0)
1669 error
= insert_tree(pb
, subtree
);
1670 git_tree_free(subtree
);
1676 case GIT_OBJECT_BLOB
:
1677 if ((error
= retrieve_object(&obj
, pb
, entry_id
)) < 0)
1679 if (obj
->uninteresting
)
1681 name
= git_tree_entry_name(entry
);
1682 if ((error
= git_packbuilder_insert(pb
, entry_id
, name
)) < 0)
1686 /* it's a submodule or something unknown, we don't want it */
1695 int insert_commit(git_packbuilder
*pb
, struct walk_object
*obj
)
1698 git_commit
*commit
= NULL
;
1699 git_tree
*tree
= NULL
;
1703 if ((error
= git_packbuilder_insert(pb
, &obj
->id
, NULL
)) < 0)
1706 if ((error
= git_commit_lookup(&commit
, pb
->repo
, &obj
->id
)) < 0)
1709 if ((error
= git_tree_lookup(&tree
, pb
->repo
, git_commit_tree_id(commit
))) < 0)
1712 if ((error
= insert_tree(pb
, tree
)) < 0)
1716 git_commit_free(commit
);
1717 git_tree_free(tree
);
1721 int git_packbuilder_insert_walk(git_packbuilder
*pb
, git_revwalk
*walk
)
1725 struct walk_object
*obj
;
1729 if ((error
= mark_edges_uninteresting(pb
, walk
->user_input
)) < 0)
1733 * TODO: git marks the parents of the edges
1734 * uninteresting. This may provide a speed advantage, but does
1735 * seem to assume the remote does not have a single-commit
1736 * history on the other end.
1739 /* walk down each tree up to the blobs and insert them, stopping when uninteresting */
1740 while ((error
= git_revwalk_next(&id
, walk
)) == 0) {
1741 if ((error
= retrieve_object(&obj
, pb
, &id
)) < 0)
1744 if (obj
->seen
|| obj
->uninteresting
)
1747 if ((error
= insert_commit(pb
, obj
)) < 0)
1751 if (error
== GIT_ITEROVER
)
1757 int git_packbuilder_set_callbacks(git_packbuilder
*pb
, git_packbuilder_progress progress_cb
, void *progress_cb_payload
)
1762 pb
->progress_cb
= progress_cb
;
1763 pb
->progress_cb_payload
= progress_cb_payload
;
1768 void git_packbuilder_free(git_packbuilder
*pb
)
1775 git_mutex_free(&pb
->cache_mutex
);
1776 git_mutex_free(&pb
->progress_mutex
);
1777 git_cond_free(&pb
->progress_cond
);
1782 git_odb_free(pb
->odb
);
1785 git_oidmap_free(pb
->object_ix
);
1787 if (pb
->object_list
)
1788 git__free(pb
->object_list
);
1790 git_oidmap_free(pb
->walk_objects
);
1791 git_pool_clear(&pb
->object_pool
);
1793 git_hash_ctx_cleanup(&pb
->ctx
);
1794 git_zstream_free(&pb
->zstream
);