2 * Copyright (C) 2009-2012 the libgit2 contributors
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
8 #include "pack-objects.h"
15 #include "thread-utils.h"
18 #include "git2/pack.h"
19 #include "git2/commit.h"
21 #include "git2/indexer.h"
22 #include "git2/config.h"
29 struct git_delta_index
*index
;
35 #define GIT_PACKBUILDER__MUTEX_OP(pb, mtx, op) do { \
36 int result = git_mutex_##op(&(pb)->mtx); \
43 #define GIT_PACKBUILDER__MUTEX_OP(pb,mtx,op) GIT_UNUSED(pb)
45 #endif /* GIT_THREADS */
47 #define git_packbuilder__cache_lock(pb) GIT_PACKBUILDER__MUTEX_OP(pb, cache_mutex, lock)
48 #define git_packbuilder__cache_unlock(pb) GIT_PACKBUILDER__MUTEX_OP(pb, cache_mutex, unlock)
49 #define git_packbuilder__progress_lock(pb) GIT_PACKBUILDER__MUTEX_OP(pb, progress_mutex, lock)
50 #define git_packbuilder__progress_unlock(pb) GIT_PACKBUILDER__MUTEX_OP(pb, progress_mutex, unlock)
52 static unsigned name_hash(const char *name
)
60 * This effectively just creates a sortable number from the
61 * last sixteen non-whitespace characters. Last characters
62 * count "most", so things that end in ".c" sort together.
64 while ((c
= *name
++) != 0) {
67 hash
= (hash
>> 2) + (c
<< 24);
72 static int packbuilder_config(git_packbuilder
*pb
)
77 if (git_repository_config__weakptr(&config
, pb
->repo
) < 0)
80 #define config_get(key, dst, default) \
81 ret = git_config_get_int64((int64_t *)&dst, config, key); \
82 if (ret == GIT_ENOTFOUND) \
87 config_get("pack.deltaCacheSize", pb
->max_delta_cache_size
,
88 GIT_PACK_DELTA_CACHE_SIZE
);
89 config_get("pack.deltaCacheLimit", pb
->cache_max_small_delta_size
,
90 GIT_PACK_DELTA_CACHE_LIMIT
);
91 config_get("pack.deltaCacheSize", pb
->big_file_threshold
,
92 GIT_PACK_BIG_FILE_THRESHOLD
);
93 config_get("pack.windowMemory", pb
->window_memory_limit
, 0);
100 int git_packbuilder_new(git_packbuilder
**out
, git_repository
*repo
)
106 pb
= git__calloc(sizeof(*pb
), 1);
107 GITERR_CHECK_ALLOC(pb
);
109 pb
->object_ix
= git_oidmap_alloc();
115 pb
->nr_threads
= 1; /* do not spawn any thread by default */
116 pb
->ctx
= git_hash_new_ctx();
119 git_repository_odb(&pb
->odb
, repo
) < 0 ||
120 packbuilder_config(pb
) < 0)
125 if (git_mutex_init(&pb
->cache_mutex
) ||
126 git_mutex_init(&pb
->progress_mutex
) ||
127 git_cond_init(&pb
->progress_cond
))
136 git_packbuilder_free(pb
);
140 void git_packbuilder_set_threads(git_packbuilder
*pb
, unsigned int n
)
146 static void rehash(git_packbuilder
*pb
)
153 kh_clear(oid
, pb
->object_ix
);
154 for (i
= 0, po
= pb
->object_list
; i
< pb
->nr_objects
; i
++, po
++) {
155 pos
= kh_put(oid
, pb
->object_ix
, &po
->id
, &ret
);
156 kh_value(pb
->object_ix
, pos
) = po
;
160 int git_packbuilder_insert(git_packbuilder
*pb
, const git_oid
*oid
,
169 /* If the object already exists in the hash table, then we don't
170 * have any work to do */
171 pos
= kh_get(oid
, pb
->object_ix
, oid
);
172 if (pos
!= kh_end(pb
->object_ix
))
175 if (pb
->nr_objects
>= pb
->nr_alloc
) {
176 pb
->nr_alloc
= (pb
->nr_alloc
+ 1024) * 3 / 2;
177 pb
->object_list
= git__realloc(pb
->object_list
,
178 pb
->nr_alloc
* sizeof(*po
));
179 GITERR_CHECK_ALLOC(pb
->object_list
);
183 po
= pb
->object_list
+ pb
->nr_objects
;
184 memset(po
, 0x0, sizeof(*po
));
186 if (git_odb_read_header(&po
->size
, &po
->type
, pb
->odb
, oid
) < 0)
190 git_oid_cpy(&po
->id
, oid
);
191 po
->hash
= name_hash(name
);
193 pos
= kh_put(oid
, pb
->object_ix
, &po
->id
, &ret
);
195 kh_value(pb
->object_ix
, pos
) = po
;
202 * The per-object header is a pretty dense thing, which is
203 * - first byte: low four bits are "size",
204 * then three bits of "type",
205 * with the high bit being "size continues".
206 * - each byte afterwards: low seven bits are size continuation,
207 * with the high bit being "size continues"
209 static int gen_pack_object_header(
214 unsigned char *hdr_base
;
217 assert(type
>= GIT_OBJ_COMMIT
&& type
<= GIT_OBJ_REF_DELTA
);
219 /* TODO: add support for chunked objects; see git.git 6c0d19b1 */
221 c
= (unsigned char)((type
<< 4) | (size
& 15));
232 return hdr
- hdr_base
;
235 static int get_delta(void **out
, git_odb
*odb
, git_pobject
*po
)
237 git_odb_object
*src
= NULL
, *trg
= NULL
;
238 unsigned long delta_size
;
243 if (git_odb_read(&src
, odb
, &po
->delta
->id
) < 0 ||
244 git_odb_read(&trg
, odb
, &po
->id
) < 0)
247 delta_buf
= git_delta(git_odb_object_data(src
), git_odb_object_size(src
),
248 git_odb_object_data(trg
), git_odb_object_size(trg
),
251 if (!delta_buf
|| delta_size
!= po
->delta_size
) {
252 giterr_set(GITERR_INVALID
, "Delta size changed");
258 git_odb_object_free(src
);
259 git_odb_object_free(trg
);
263 git_odb_object_free(src
);
264 git_odb_object_free(trg
);
268 static int write_object(git_buf
*buf
, git_packbuilder
*pb
, git_pobject
*po
)
270 git_odb_object
*obj
= NULL
;
271 git_buf zbuf
= GIT_BUF_INIT
;
273 unsigned char hdr
[10];
274 unsigned int hdr_len
;
280 data
= po
->delta_data
;
281 else if (get_delta(&data
, pb
->odb
, po
) < 0)
283 size
= po
->delta_size
;
284 type
= GIT_OBJ_REF_DELTA
;
286 if (git_odb_read(&obj
, pb
->odb
, &po
->id
))
289 data
= (void *)git_odb_object_data(obj
);
290 size
= git_odb_object_size(obj
);
291 type
= git_odb_object_type(obj
);
295 hdr_len
= gen_pack_object_header(hdr
, size
, type
);
297 if (git_buf_put(buf
, (char *)hdr
, hdr_len
) < 0)
300 git_hash_update(pb
->ctx
, hdr
, hdr_len
);
302 if (type
== GIT_OBJ_REF_DELTA
) {
303 if (git_buf_put(buf
, (char *)po
->delta
->id
.id
,
307 git_hash_update(pb
->ctx
, po
->delta
->id
.id
, GIT_OID_RAWSZ
);
311 if (po
->z_delta_size
)
312 size
= po
->z_delta_size
;
313 else if (git__compress(&zbuf
, data
, size
) < 0)
322 if (git_buf_put(buf
, data
, size
) < 0)
325 git_hash_update(pb
->ctx
, data
, size
);
328 git__free(po
->delta_data
);
330 git_odb_object_free(obj
);
337 git_odb_object_free(obj
);
342 enum write_one_status
{
343 WRITE_ONE_SKIP
= -1, /* already written */
344 WRITE_ONE_BREAK
= 0, /* writing this will bust the limit; not written */
345 WRITE_ONE_WRITTEN
= 1, /* normal */
346 WRITE_ONE_RECURSIVE
= 2 /* already scheduled to be written */
349 static int write_one(git_buf
*buf
, git_packbuilder
*pb
, git_pobject
*po
,
350 enum write_one_status
*status
)
353 *status
= WRITE_ONE_RECURSIVE
;
355 } else if (po
->written
) {
356 *status
= WRITE_ONE_SKIP
;
362 if (write_one(buf
, pb
, po
->delta
, status
) < 0)
365 case WRITE_ONE_RECURSIVE
:
366 /* we cannot depend on this one */
376 return write_object(buf
, pb
, po
);
379 GIT_INLINE(void) add_to_write_order(git_pobject
**wo
, unsigned int *endp
,
388 static void add_descendants_to_write_order(git_pobject
**wo
, unsigned int *endp
,
391 int add_to_order
= 1;
395 /* add this node... */
396 add_to_write_order(wo
, endp
, po
);
397 /* all its siblings... */
398 for (s
= po
->delta_sibling
; s
; s
= s
->delta_sibling
) {
399 add_to_write_order(wo
, endp
, s
);
402 /* drop down a level to add left subtree nodes if possible */
403 if (po
->delta_child
) {
405 po
= po
->delta_child
;
408 /* our sibling might have some children, it is next */
409 if (po
->delta_sibling
) {
410 po
= po
->delta_sibling
;
413 /* go back to our parent node */
415 while (po
&& !po
->delta_sibling
) {
416 /* we're on the right side of a subtree, keep
417 * going up until we can go right again */
421 /* done- we hit our original root node */
424 /* pass it off to sibling at this level */
425 po
= po
->delta_sibling
;
430 static void add_family_to_write_order(git_pobject
**wo
, unsigned int *endp
,
435 for (root
= po
; root
->delta
; root
= root
->delta
)
437 add_descendants_to_write_order(wo
, endp
, root
);
440 static int cb_tag_foreach(const char *name
, git_oid
*oid
, void *data
)
442 git_packbuilder
*pb
= data
;
448 pos
= kh_get(oid
, pb
->object_ix
, oid
);
449 if (pos
== kh_end(pb
->object_ix
))
452 po
= kh_value(pb
->object_ix
, pos
);
455 /* TODO: peel objects */
460 static git_pobject
**compute_write_order(git_packbuilder
*pb
)
462 unsigned int i
, wo_end
, last_untagged
;
464 git_pobject
**wo
= git__malloc(sizeof(*wo
) * pb
->nr_objects
);
466 for (i
= 0; i
< pb
->nr_objects
; i
++) {
467 git_pobject
*po
= pb
->object_list
+ i
;
470 po
->delta_child
= NULL
;
471 po
->delta_sibling
= NULL
;
475 * Fully connect delta_child/delta_sibling network.
476 * Make sure delta_sibling is sorted in the original
479 for (i
= pb
->nr_objects
; i
> 0;) {
480 git_pobject
*po
= &pb
->object_list
[--i
];
483 /* Mark me as the first child */
484 po
->delta_sibling
= po
->delta
->delta_child
;
485 po
->delta
->delta_child
= po
;
489 * Mark objects that are at the tip of tags.
491 if (git_tag_foreach(pb
->repo
, &cb_tag_foreach
, pb
) < 0)
495 * Give the objects in the original recency order until
496 * we see a tagged tip.
498 for (i
= wo_end
= 0; i
< pb
->nr_objects
; i
++) {
499 git_pobject
*po
= pb
->object_list
+ i
;
502 add_to_write_order(wo
, &wo_end
, po
);
507 * Then fill all the tagged tips.
509 for (; i
< pb
->nr_objects
; i
++) {
510 git_pobject
*po
= pb
->object_list
+ i
;
512 add_to_write_order(wo
, &wo_end
, po
);
516 * And then all remaining commits and tags.
518 for (i
= last_untagged
; i
< pb
->nr_objects
; i
++) {
519 git_pobject
*po
= pb
->object_list
+ i
;
520 if (po
->type
!= GIT_OBJ_COMMIT
&&
521 po
->type
!= GIT_OBJ_TAG
)
523 add_to_write_order(wo
, &wo_end
, po
);
527 * And then all the trees.
529 for (i
= last_untagged
; i
< pb
->nr_objects
; i
++) {
530 git_pobject
*po
= pb
->object_list
+ i
;
531 if (po
->type
!= GIT_OBJ_TREE
)
533 add_to_write_order(wo
, &wo_end
, po
);
537 * Finally all the rest in really tight order
539 for (i
= last_untagged
; i
< pb
->nr_objects
; i
++) {
540 git_pobject
*po
= pb
->object_list
+ i
;
542 add_family_to_write_order(wo
, &wo_end
, po
);
545 if (wo_end
!= pb
->nr_objects
) {
546 giterr_set(GITERR_INVALID
, "invalid write order");
553 static int write_pack(git_packbuilder
*pb
,
554 int (*cb
)(void *buf
, size_t size
, void *data
),
557 git_pobject
**write_order
;
559 git_buf buf
= GIT_BUF_INIT
;
560 enum write_one_status status
;
561 struct git_pack_header ph
;
564 write_order
= compute_write_order(pb
);
565 if (write_order
== NULL
)
568 /* Write pack header */
569 ph
.hdr_signature
= htonl(PACK_SIGNATURE
);
570 ph
.hdr_version
= htonl(PACK_VERSION
);
571 ph
.hdr_entries
= htonl(pb
->nr_objects
);
573 if (cb(&ph
, sizeof(ph
), data
) < 0)
576 git_hash_update(pb
->ctx
, &ph
, sizeof(ph
));
578 pb
->nr_remaining
= pb
->nr_objects
;
581 for ( ; i
< pb
->nr_objects
; ++i
) {
583 if (write_one(&buf
, pb
, po
, &status
) < 0)
585 if (cb(buf
.ptr
, buf
.size
, data
) < 0)
590 pb
->nr_remaining
-= pb
->nr_written
;
591 } while (pb
->nr_remaining
&& i
< pb
->nr_objects
);
593 git__free(write_order
);
595 git_hash_final(&pb
->pack_oid
, pb
->ctx
);
597 return cb(pb
->pack_oid
.id
, GIT_OID_RAWSZ
, data
);
600 git__free(write_order
);
605 static int send_pack_file(void *buf
, size_t size
, void *data
)
607 git_transport
*t
= (git_transport
*)data
;
608 return gitno_send(t
, buf
, size
, 0);
611 static int write_pack_buf(void *buf
, size_t size
, void *data
)
613 git_buf
*b
= (git_buf
*)data
;
614 return git_buf_put(b
, buf
, size
);
617 static int write_pack_to_file(void *buf
, size_t size
, void *data
)
619 git_filebuf
*file
= (git_filebuf
*)data
;
620 return git_filebuf_write(file
, buf
, size
);
623 static int write_pack_file(git_packbuilder
*pb
, const char *path
)
625 git_filebuf file
= GIT_FILEBUF_INIT
;
627 if (git_filebuf_open(&file
, path
, 0) < 0 ||
628 write_pack(pb
, &write_pack_to_file
, &file
) < 0 ||
629 git_filebuf_commit(&file
, GIT_PACK_FILE_MODE
) < 0) {
630 git_filebuf_cleanup(&file
);
637 static int type_size_sort(const void *_a
, const void *_b
)
639 const git_pobject
*a
= (git_pobject
*)_a
;
640 const git_pobject
*b
= (git_pobject
*)_b
;
642 if (a
->type
> b
->type
)
644 if (a
->type
< b
->type
)
646 if (a
->hash
> b
->hash
)
648 if (a
->hash
< b
->hash
)
653 if (a->preferred_base > b->preferred_base)
655 if (a->preferred_base < b->preferred_base)
658 if (a
->size
> b
->size
)
660 if (a
->size
< b
->size
)
662 return a
< b
? -1 : (a
> b
); /* newest first */
665 static int delta_cacheable(git_packbuilder
*pb
, unsigned long src_size
,
666 unsigned long trg_size
, unsigned long delta_size
)
668 if (pb
->max_delta_cache_size
&&
669 pb
->delta_cache_size
+ delta_size
> pb
->max_delta_cache_size
)
672 if (delta_size
< pb
->cache_max_small_delta_size
)
675 /* cache delta, if objects are large enough compared to delta size */
676 if ((src_size
>> 20) + (trg_size
>> 21) > (delta_size
>> 10))
682 static int try_delta(git_packbuilder
*pb
, struct unpacked
*trg
,
683 struct unpacked
*src
, unsigned int max_depth
,
684 unsigned long *mem_usage
, int *ret
)
686 git_pobject
*trg_object
= trg
->object
;
687 git_pobject
*src_object
= src
->object
;
689 unsigned long trg_size
, src_size
, delta_size
,
690 sizediff
, max_size
, sz
;
691 unsigned int ref_depth
;
694 /* Don't bother doing diffs between different types */
695 if (trg_object
->type
!= src_object
->type
) {
702 /* TODO: support reuse-delta */
704 /* Let's not bust the allowed depth. */
705 if (src
->depth
>= max_depth
)
708 /* Now some size filtering heuristics. */
709 trg_size
= trg_object
->size
;
710 if (!trg_object
->delta
) {
711 max_size
= trg_size
/2 - 20;
714 max_size
= trg_object
->delta_size
;
715 ref_depth
= trg
->depth
;
718 max_size
= (uint64_t)max_size
* (max_depth
- src
->depth
) /
719 (max_depth
- ref_depth
+ 1);
723 src_size
= src_object
->size
;
724 sizediff
= src_size
< trg_size
? trg_size
- src_size
: 0;
725 if (sizediff
>= max_size
)
727 if (trg_size
< src_size
/ 32)
730 /* Load data if not already done */
732 if (git_odb_read(&obj
, pb
->odb
, &trg_object
->id
) < 0)
735 sz
= git_odb_object_size(obj
);
736 trg
->data
= git__malloc(sz
);
737 GITERR_CHECK_ALLOC(trg
->data
);
738 memcpy(trg
->data
, git_odb_object_data(obj
), sz
);
740 git_odb_object_free(obj
);
742 if (sz
!= trg_size
) {
743 giterr_set(GITERR_INVALID
,
744 "Inconsistent target object length");
751 if (git_odb_read(&obj
, pb
->odb
, &src_object
->id
) < 0)
754 sz
= git_odb_object_size(obj
);
755 src
->data
= git__malloc(sz
);
756 GITERR_CHECK_ALLOC(src
->data
);
757 memcpy(src
->data
, git_odb_object_data(obj
), sz
);
759 git_odb_object_free(obj
);
761 if (sz
!= src_size
) {
762 giterr_set(GITERR_INVALID
,
763 "Inconsistent source object length");
770 src
->index
= git_delta_create_index(src
->data
, src_size
);
772 return 0; /* suboptimal pack - out of memory */
774 *mem_usage
+= git_delta_sizeof_index(src
->index
);
777 delta_buf
= git_delta_create(src
->index
, trg
->data
, trg_size
,
778 &delta_size
, max_size
);
782 if (trg_object
->delta
) {
783 /* Prefer only shallower same-sized deltas. */
784 if (delta_size
== trg_object
->delta_size
&&
785 src
->depth
+ 1 >= trg
->depth
) {
786 git__free(delta_buf
);
791 git_packbuilder__cache_lock(pb
);
792 if (trg_object
->delta_data
) {
793 git__free(trg_object
->delta_data
);
794 pb
->delta_cache_size
-= trg_object
->delta_size
;
795 trg_object
->delta_data
= NULL
;
797 if (delta_cacheable(pb
, src_size
, trg_size
, delta_size
)) {
798 pb
->delta_cache_size
+= delta_size
;
799 git_packbuilder__cache_unlock(pb
);
801 trg_object
->delta_data
= git__realloc(delta_buf
, delta_size
);
802 GITERR_CHECK_ALLOC(trg_object
->delta_data
);
804 /* create delta when writing the pack */
805 git_packbuilder__cache_unlock(pb
);
806 git__free(delta_buf
);
809 trg_object
->delta
= src_object
;
810 trg_object
->delta_size
= delta_size
;
811 trg
->depth
= src
->depth
+ 1;
817 static unsigned int check_delta_limit(git_pobject
*me
, unsigned int n
)
819 git_pobject
*child
= me
->delta_child
;
823 unsigned int c
= check_delta_limit(child
, n
+ 1);
826 child
= child
->delta_sibling
;
831 static unsigned long free_unpacked(struct unpacked
*n
)
833 unsigned long freed_mem
= git_delta_sizeof_index(n
->index
);
834 git_delta_free_index(n
->index
);
837 freed_mem
+= n
->object
->size
;
846 static int find_deltas(git_packbuilder
*pb
, git_pobject
**list
,
847 unsigned int *list_size
, unsigned int window
,
851 git_buf zbuf
= GIT_BUF_INIT
;
852 struct unpacked
*array
;
853 uint32_t idx
= 0, count
= 0;
854 unsigned long mem_usage
= 0;
858 array
= git__calloc(window
, sizeof(struct unpacked
));
859 GITERR_CHECK_ALLOC(array
);
862 struct unpacked
*n
= array
+ idx
;
863 unsigned int max_depth
;
864 int j
, best_base
= -1;
866 git_packbuilder__progress_lock(pb
);
868 git_packbuilder__progress_unlock(pb
);
874 git_packbuilder__progress_unlock(pb
);
876 mem_usage
-= free_unpacked(n
);
879 while (pb
->window_memory_limit
&&
880 mem_usage
> pb
->window_memory_limit
&&
882 uint32_t tail
= (idx
+ window
- count
) % window
;
883 mem_usage
-= free_unpacked(array
+ tail
);
888 * If the current object is at pack edge, take the depth the
889 * objects that depend on the current object into account
890 * otherwise they would become too deep.
893 if (po
->delta_child
) {
894 max_depth
-= check_delta_limit(po
, 0);
902 uint32_t other_idx
= idx
+ j
;
905 if (other_idx
>= window
)
908 m
= array
+ other_idx
;
912 if (try_delta(pb
, n
, m
, max_depth
, &mem_usage
, &ret
) < 0)
917 best_base
= other_idx
;
921 * If we decided to cache the delta data, then it is best
922 * to compress it right away. First because we have to do
923 * it anyway, and doing it here while we're threaded will
924 * save a lot of time in the non threaded write phase,
925 * as well as allow for caching more deltas within
926 * the same cache size limit.
928 * But only if not writing to stdout, since in that case
929 * the network is most likely throttling writes anyway,
930 * and therefore it is best to go to the write phase ASAP
931 * instead, as we can afford spending more time compressing
932 * between writes at that moment.
934 if (po
->delta_data
) {
935 if (git__compress(&zbuf
, po
->delta_data
, po
->delta_size
) < 0)
938 git__free(po
->delta_data
);
939 po
->delta_data
= git__malloc(zbuf
.size
);
940 GITERR_CHECK_ALLOC(po
->delta_data
);
942 memcpy(po
->delta_data
, zbuf
.ptr
, zbuf
.size
);
943 po
->z_delta_size
= zbuf
.size
;
944 git_buf_clear(&zbuf
);
946 git_packbuilder__cache_lock(pb
);
947 pb
->delta_cache_size
-= po
->delta_size
;
948 pb
->delta_cache_size
+= po
->z_delta_size
;
949 git_packbuilder__cache_unlock(pb
);
953 * If we made n a delta, and if n is already at max
954 * depth, leaving it in the window is pointless. we
955 * should evict it first.
957 if (po
->delta
&& max_depth
<= n
->depth
)
961 * Move the best delta base up in the window, after the
962 * currently deltified object, to keep it longer. It will
963 * be the first base object to be attempted next.
966 struct unpacked swap
= array
[best_base
];
967 int dist
= (window
+ idx
- best_base
) % window
;
970 int src
= (dst
+ 1) % window
;
971 array
[dst
] = array
[src
];
979 if (count
+ 1 < window
)
987 for (i
= 0; i
< window
; ++i
) {
988 git__free(array
[i
].index
);
989 git__free(array
[i
].data
);
999 struct thread_params
{
1001 git_packbuilder
*pb
;
1008 unsigned int list_size
;
1009 unsigned int remaining
;
1017 static void *threaded_find_deltas(void *arg
)
1019 struct thread_params
*me
= arg
;
1021 while (me
->remaining
) {
1022 if (find_deltas(me
->pb
, me
->list
, &me
->remaining
,
1023 me
->window
, me
->depth
) < 0) {
1027 git_packbuilder__progress_lock(me
->pb
);
1029 git_cond_signal(&me
->pb
->progress_cond
);
1030 git_packbuilder__progress_unlock(me
->pb
);
1033 * We must not set ->data_ready before we wait on the
1034 * condition because the main thread may have set it to 1
1035 * before we get here. In order to be sure that new
1036 * work is available if we see 1 in ->data_ready, it
1037 * was initialized to 0 before this thread was spawned
1038 * and we reset it to 0 right away.
1040 git_mutex_lock(&me
->mutex
);
1041 while (!me
->data_ready
)
1042 git_cond_wait(&me
->cond
, &me
->mutex
);
1044 git_mutex_unlock(&me
->mutex
);
1046 /* leave ->working 1 so that this doesn't get more work assigned */
1050 static int ll_find_deltas(git_packbuilder
*pb
, git_pobject
**list
,
1051 unsigned int list_size
, unsigned int window
,
1054 struct thread_params
*p
;
1055 int i
, ret
, active_threads
= 0;
1057 if (!pb
->nr_threads
)
1058 pb
->nr_threads
= git_online_cpus();
1060 if (pb
->nr_threads
<= 1) {
1061 find_deltas(pb
, list
, &list_size
, window
, depth
);
1065 p
= git__malloc(pb
->nr_threads
* sizeof(*p
));
1066 GITERR_CHECK_ALLOC(p
);
1068 /* Partition the work among the threads */
1069 for (i
= 0; i
< pb
->nr_threads
; ++i
) {
1070 unsigned sub_size
= list_size
/ (pb
->nr_threads
- i
);
1072 /* don't use too small segments or no deltas will be found */
1073 if (sub_size
< 2*window
&& i
+1 < pb
->nr_threads
)
1077 p
[i
].window
= window
;
1080 p
[i
].data_ready
= 0;
1082 /* try to split chunks on "path" boundaries */
1083 while (sub_size
&& sub_size
< list_size
&&
1084 list
[sub_size
]->hash
&&
1085 list
[sub_size
]->hash
== list
[sub_size
-1]->hash
)
1089 p
[i
].list_size
= sub_size
;
1090 p
[i
].remaining
= sub_size
;
1093 list_size
-= sub_size
;
1096 /* Start work threads */
1097 for (i
= 0; i
< pb
->nr_threads
; ++i
) {
1098 if (!p
[i
].list_size
)
1101 git_mutex_init(&p
[i
].mutex
);
1102 git_cond_init(&p
[i
].cond
);
1104 ret
= git_thread_create(&p
[i
].thread
, NULL
,
1105 threaded_find_deltas
, &p
[i
]);
1107 giterr_set(GITERR_THREAD
, "unable to create thread");
1114 * Now let's wait for work completion. Each time a thread is done
1115 * with its work, we steal half of the remaining work from the
1116 * thread with the largest number of unprocessed objects and give
1117 * it to that newly idle thread. This ensure good load balancing
1118 * until the remaining object list segments are simply too short
1119 * to be worth splitting anymore.
1121 while (active_threads
) {
1122 struct thread_params
*target
= NULL
;
1123 struct thread_params
*victim
= NULL
;
1124 unsigned sub_size
= 0;
1126 /* Start by locating a thread that has transitioned its
1127 * 'working' flag from 1 -> 0. This indicates that it is
1128 * ready to receive more work using our work-stealing
1130 git_packbuilder__progress_lock(pb
);
1132 for (i
= 0; !target
&& i
< pb
->nr_threads
; i
++)
1137 git_cond_wait(&pb
->progress_cond
, &pb
->progress_mutex
);
1140 /* At this point we hold the progress lock and have located
1141 * a thread to receive more work. We still need to locate a
1142 * thread from which to steal work (the victim). */
1143 for (i
= 0; i
< pb
->nr_threads
; i
++)
1144 if (p
[i
].remaining
> 2*window
&&
1145 (!victim
|| victim
->remaining
< p
[i
].remaining
))
1149 sub_size
= victim
->remaining
/ 2;
1150 list
= victim
->list
+ victim
->list_size
- sub_size
;
1151 while (sub_size
&& list
[0]->hash
&&
1152 list
[0]->hash
== list
[-1]->hash
) {
1158 * It is possible for some "paths" to have
1159 * so many objects that no hash boundary
1160 * might be found. Let's just steal the
1161 * exact half in that case.
1163 sub_size
= victim
->remaining
/ 2;
1166 target
->list
= list
;
1167 victim
->list_size
-= sub_size
;
1168 victim
->remaining
-= sub_size
;
1170 target
->list_size
= sub_size
;
1171 target
->remaining
= sub_size
;
1172 target
->working
= 1;
1173 git_packbuilder__progress_unlock(pb
);
1175 git_mutex_lock(&target
->mutex
);
1176 target
->data_ready
= 1;
1177 git_cond_signal(&target
->cond
);
1178 git_mutex_unlock(&target
->mutex
);
1181 git_thread_join(target
->thread
, NULL
);
1182 git_cond_free(&target
->cond
);
1183 git_mutex_free(&target
->mutex
);
1193 #define ll_find_deltas(pb, l, ls, w, d) find_deltas(pb, l, &ls, w, d)
1196 static int prepare_pack(git_packbuilder
*pb
)
1198 git_pobject
**delta_list
;
1199 unsigned int i
, n
= 0;
1201 if (pb
->nr_objects
== 0 || pb
->done
)
1202 return 0; /* nothing to do */
1204 delta_list
= git__malloc(pb
->nr_objects
* sizeof(*delta_list
));
1205 GITERR_CHECK_ALLOC(delta_list
);
1207 for (i
= 0; i
< pb
->nr_objects
; ++i
) {
1208 git_pobject
*po
= pb
->object_list
+ i
;
1210 /* Make sure the item is within our size limits */
1211 if (po
->size
< 50 || po
->size
> pb
->big_file_threshold
)
1214 delta_list
[n
++] = po
;
1218 git__tsort((void **)delta_list
, n
, type_size_sort
);
1219 if (ll_find_deltas(pb
, delta_list
, n
,
1220 GIT_PACK_WINDOW
+ 1,
1221 GIT_PACK_DEPTH
) < 0) {
1222 git__free(delta_list
);
1228 git__free(delta_list
);
1232 #define PREPARE_PACK if (prepare_pack(pb) < 0) { return -1; }
1234 int git_packbuilder_send(git_packbuilder
*pb
, git_transport
*t
)
1237 return write_pack(pb
, &send_pack_file
, t
);
1240 int git_packbuilder_write_buf(git_buf
*buf
, git_packbuilder
*pb
)
1243 return write_pack(pb
, &write_pack_buf
, buf
);
1246 int git_packbuilder_write(git_packbuilder
*pb
, const char *path
)
1249 return write_pack_file(pb
, path
);
1254 static int cb_tree_walk(const char *root
, const git_tree_entry
*entry
, void *payload
)
1256 git_packbuilder
*pb
= payload
;
1257 git_buf buf
= GIT_BUF_INIT
;
1259 git_buf_puts(&buf
, root
);
1260 git_buf_puts(&buf
, git_tree_entry_name(entry
));
1262 if (git_packbuilder_insert(pb
, git_tree_entry_id(entry
),
1263 git_buf_cstr(&buf
)) < 0) {
1272 int git_packbuilder_insert_tree(git_packbuilder
*pb
, const git_oid
*oid
)
1276 if (git_tree_lookup(&tree
, pb
->repo
, oid
) < 0 ||
1277 git_packbuilder_insert(pb
, oid
, NULL
) < 0)
1280 if (git_tree_walk(tree
, cb_tree_walk
, GIT_TREEWALK_PRE
, pb
) < 0) {
1281 git_tree_free(tree
);
1285 git_tree_free(tree
);
1289 void git_packbuilder_free(git_packbuilder
*pb
)
1296 git_mutex_free(&pb
->cache_mutex
);
1297 git_mutex_free(&pb
->progress_mutex
);
1298 git_cond_free(&pb
->progress_cond
);
1303 git_odb_free(pb
->odb
);
1306 git_hash_free_ctx(pb
->ctx
);
1309 git_oidmap_free(pb
->object_ix
);
1311 if (pb
->object_list
)
1312 git__free(pb
->object_list
);