]> git.proxmox.com Git - libgit2.git/blame - src/pack-objects.c
install as examples
[libgit2.git] / src / pack-objects.c
CommitLineData
0a32dca5 1/*
359fc2d2 2 * Copyright (C) the libgit2 contributors. All rights reserved.
0a32dca5
MS
3 *
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
6 */
7
8#include "pack-objects.h"
9
c6f26b48 10#include "zstream.h"
0a32dca5
MS
11#include "delta.h"
12#include "iterator.h"
13#include "netops.h"
14#include "pack.h"
15#include "thread-utils.h"
16#include "tree.h"
b176eded 17#include "util.h"
04a36fef
CMN
18#include "revwalk.h"
19#include "commit_list.h"
0a32dca5
MS
20
21#include "git2/pack.h"
22#include "git2/commit.h"
23#include "git2/tag.h"
24#include "git2/indexer.h"
25#include "git2/config.h"
26
27struct unpacked {
28 git_pobject *object;
29 void *data;
30 struct git_delta_index *index;
60e15ecd 31 size_t depth;
0a32dca5
MS
32};
33
f4b86126
PK
34struct tree_walk_context {
35 git_packbuilder *pb;
36 git_buf buf;
37};
38
563c19a9 39struct pack_write_context {
a6154f21 40 git_indexer *indexer;
22a2d3d5 41 git_indexer_progress *stats;
563c19a9
MS
42};
43
ac3d33df
JK
44struct walk_object {
45 git_oid id;
46 unsigned int uninteresting:1,
47 seen:1;
48};
49
b4491b99
PK
50#ifdef GIT_THREADS
51
52#define GIT_PACKBUILDER__MUTEX_OP(pb, mtx, op) do { \
53 int result = git_mutex_##op(&(pb)->mtx); \
54 assert(!result); \
55 GIT_UNUSED(result); \
56 } while (0)
57
58#else
59
60#define GIT_PACKBUILDER__MUTEX_OP(pb,mtx,op) GIT_UNUSED(pb)
61
62#endif /* GIT_THREADS */
63
64#define git_packbuilder__cache_lock(pb) GIT_PACKBUILDER__MUTEX_OP(pb, cache_mutex, lock)
65#define git_packbuilder__cache_unlock(pb) GIT_PACKBUILDER__MUTEX_OP(pb, cache_mutex, unlock)
66#define git_packbuilder__progress_lock(pb) GIT_PACKBUILDER__MUTEX_OP(pb, progress_mutex, lock)
67#define git_packbuilder__progress_unlock(pb) GIT_PACKBUILDER__MUTEX_OP(pb, progress_mutex, unlock)
68
b176eded
JM
69/* The minimal interval between progress updates (in seconds). */
70#define MIN_PROGRESS_UPDATE_INTERVAL 0.5
71
0ade2f7a
ET
72/* Size of the buffer to feed to zlib */
73#define COMPRESS_BUFLEN (1024 * 1024)
74
0a32dca5
MS
75static unsigned name_hash(const char *name)
76{
77 unsigned c, hash = 0;
78
79 if (!name)
80 return 0;
81
82 /*
83 * This effectively just creates a sortable number from the
84 * last sixteen non-whitespace characters. Last characters
85 * count "most", so things that end in ".c" sort together.
86 */
87 while ((c = *name++) != 0) {
88 if (git__isspace(c))
89 continue;
90 hash = (hash >> 2) + (c << 24);
91 }
92 return hash;
93}
94
95static int packbuilder_config(git_packbuilder *pb)
96{
97 git_config *config;
39c9dd24 98 int ret = 0;
757b4065 99 int64_t val;
0a32dca5 100
ac99d86b 101 if ((ret = git_repository_config_snapshot(&config, pb->repo)) < 0)
29c4cb09 102 return ret;
0a32dca5 103
757b4065
RB
104#define config_get(KEY,DST,DFLT) do { \
105 ret = git_config_get_int64(&val, config, KEY); \
60e15ecd
ET
106 if (!ret) { \
107 if (!git__is_sizet(val)) { \
ac3d33df 108 git_error_set(GIT_ERROR_CONFIG, \
60e15ecd
ET
109 "configuration value '%s' is too large", KEY); \
110 ret = -1; \
111 goto out; \
112 } \
113 (DST) = (size_t)val; \
114 } else if (ret == GIT_ENOTFOUND) { \
39c9dd24
PS
115 (DST) = (DFLT); \
116 ret = 0; \
117 } else if (ret < 0) goto out; } while (0)
0a32dca5
MS
118
119 config_get("pack.deltaCacheSize", pb->max_delta_cache_size,
120 GIT_PACK_DELTA_CACHE_SIZE);
121 config_get("pack.deltaCacheLimit", pb->cache_max_small_delta_size,
122 GIT_PACK_DELTA_CACHE_LIMIT);
123 config_get("pack.deltaCacheSize", pb->big_file_threshold,
124 GIT_PACK_BIG_FILE_THRESHOLD);
125 config_get("pack.windowMemory", pb->window_memory_limit, 0);
126
127#undef config_get
128
39c9dd24 129out:
29c4cb09
CMN
130 git_config_free(config);
131
39c9dd24 132 return ret;
0a32dca5
MS
133}
134
135int git_packbuilder_new(git_packbuilder **out, git_repository *repo)
136{
137 git_packbuilder *pb;
138
139 *out = NULL;
140
603bee07 141 pb = git__calloc(1, sizeof(*pb));
ac3d33df 142 GIT_ERROR_CHECK_ALLOC(pb);
0a32dca5 143
22a2d3d5
UG
144 if (git_oidmap_new(&pb->object_ix) < 0 ||
145 git_oidmap_new(&pb->walk_objects) < 0 ||
146 git_pool_init(&pb->object_pool, sizeof(struct walk_object)) < 0)
b4491b99 147 goto on_error;
0cf49e10 148
0a32dca5
MS
149 pb->repo = repo;
150 pb->nr_threads = 1; /* do not spawn any thread by default */
0a32dca5 151
603bee07 152 if (git_hash_ctx_init(&pb->ctx) < 0 ||
b88f1713 153 git_zstream_init(&pb->zstream, GIT_ZSTREAM_DEFLATE) < 0 ||
b4491b99
PK
154 git_repository_odb(&pb->odb, repo) < 0 ||
155 packbuilder_config(pb) < 0)
0a32dca5
MS
156 goto on_error;
157
b4491b99
PK
158#ifdef GIT_THREADS
159
160 if (git_mutex_init(&pb->cache_mutex) ||
161 git_mutex_init(&pb->progress_mutex) ||
162 git_cond_init(&pb->progress_cond))
1a42dd17 163 {
ac3d33df 164 git_error_set(GIT_ERROR_OS, "failed to initialize packbuilder mutex");
0a32dca5 165 goto on_error;
1a42dd17 166 }
0a32dca5 167
b4491b99
PK
168#endif
169
0a32dca5
MS
170 *out = pb;
171 return 0;
172
173on_error:
b4491b99 174 git_packbuilder_free(pb);
0a32dca5
MS
175 return -1;
176}
177
2508cc66 178unsigned int git_packbuilder_set_threads(git_packbuilder *pb, unsigned int n)
0a32dca5
MS
179{
180 assert(pb);
b8b897bb
PK
181
182#ifdef GIT_THREADS
0a32dca5 183 pb->nr_threads = n;
b8b897bb
PK
184#else
185 GIT_UNUSED(n);
186 assert(1 == pb->nr_threads);
187#endif
188
2508cc66 189 return pb->nr_threads;
0a32dca5
MS
190}
191
22a2d3d5 192static int rehash(git_packbuilder *pb)
0cf49e10
MS
193{
194 git_pobject *po;
22a2d3d5 195 size_t i;
0cf49e10 196
c37b069b 197 git_oidmap_clear(pb->object_ix);
22a2d3d5 198
0cf49e10 199 for (i = 0, po = pb->object_list; i < pb->nr_objects; i++, po++) {
22a2d3d5
UG
200 if (git_oidmap_set(pb->object_ix, &po->id, po) < 0)
201 return -1;
0cf49e10 202 }
22a2d3d5
UG
203
204 return 0;
0cf49e10
MS
205}
206
0a32dca5
MS
207int git_packbuilder_insert(git_packbuilder *pb, const git_oid *oid,
208 const char *name)
209{
210 git_pobject *po;
22a2d3d5 211 size_t newsize;
0cf49e10 212 int ret;
0a32dca5
MS
213
214 assert(pb && oid);
215
b4491b99
PK
216 /* If the object already exists in the hash table, then we don't
217 * have any work to do */
036daa59 218 if (git_oidmap_exists(pb->object_ix, oid))
0a32dca5
MS
219 return 0;
220
221 if (pb->nr_objects >= pb->nr_alloc) {
ac3d33df 222 GIT_ERROR_CHECK_ALLOC_ADD(&newsize, pb->nr_alloc, 1024);
22a2d3d5 223 GIT_ERROR_CHECK_ALLOC_MULTIPLY(&newsize, newsize / 2, 3);
ec3b4d35
ET
224
225 if (!git__is_uint32(newsize)) {
ac3d33df 226 git_error_set(GIT_ERROR_NOMEMORY, "packfile too large to fit in memory.");
ec3b4d35
ET
227 return -1;
228 }
229
60e15ecd 230 pb->nr_alloc = newsize;
392702ee 231
3603cb09
ET
232 pb->object_list = git__reallocarray(pb->object_list,
233 pb->nr_alloc, sizeof(*po));
ac3d33df 234 GIT_ERROR_CHECK_ALLOC(pb->object_list);
22a2d3d5
UG
235
236 if (rehash(pb) < 0)
237 return -1;
0a32dca5
MS
238 }
239
b4491b99 240 po = pb->object_list + pb->nr_objects;
0a32dca5
MS
241 memset(po, 0x0, sizeof(*po));
242
25e0b157
RB
243 if ((ret = git_odb_read_header(&po->size, &po->type, pb->odb, oid)) < 0)
244 return ret;
b4491b99
PK
245
246 pb->nr_objects++;
0a32dca5 247 git_oid_cpy(&po->id, oid);
0cf49e10 248 po->hash = name_hash(name);
0a32dca5 249
22a2d3d5 250 if (git_oidmap_set(pb->object_ix, &po->id, po) < 0) {
ac3d33df 251 git_error_set_oom();
22a2d3d5 252 return -1;
25e0b157 253 }
0a32dca5 254
5b188225
JM
255 pb->done = false;
256
b176eded
JM
257 if (pb->progress_cb) {
258 double current_time = git__timer();
dab89f9b
RB
259 double elapsed = current_time - pb->last_progress_report_time;
260
261 if (elapsed >= MIN_PROGRESS_UPDATE_INTERVAL) {
b176eded 262 pb->last_progress_report_time = current_time;
dab89f9b 263
c7b3e1b3 264 ret = pb->progress_cb(
25e0b157 265 GIT_PACKBUILDER_ADDING_OBJECTS,
c7b3e1b3
RB
266 pb->nr_objects, 0, pb->progress_cb_payload);
267
268 if (ret)
ac3d33df 269 return git_error_set_after_callback(ret);
b176eded
JM
270 }
271 }
272
0a32dca5
MS
273 return 0;
274}
275
0a32dca5
MS
276static int get_delta(void **out, git_odb *odb, git_pobject *po)
277{
278 git_odb_object *src = NULL, *trg = NULL;
60e15ecd 279 size_t delta_size;
0a32dca5 280 void *delta_buf;
1cd65991 281 int error;
0a32dca5
MS
282
283 *out = NULL;
284
285 if (git_odb_read(&src, odb, &po->delta->id) < 0 ||
286 git_odb_read(&trg, odb, &po->id) < 0)
287 goto on_error;
288
1cd65991
ET
289 error = git_delta(&delta_buf, &delta_size,
290 git_odb_object_data(src), git_odb_object_size(src),
291 git_odb_object_data(trg), git_odb_object_size(trg),
292 0);
293
294 if (error < 0 && error != GIT_EBUFS)
295 goto on_error;
0a32dca5 296
1cd65991 297 if (error == GIT_EBUFS || delta_size != po->delta_size) {
ac3d33df 298 git_error_set(GIT_ERROR_INVALID, "delta size changed");
0a32dca5
MS
299 goto on_error;
300 }
301
302 *out = delta_buf;
303
304 git_odb_object_free(src);
305 git_odb_object_free(trg);
306 return 0;
307
308on_error:
309 git_odb_object_free(src);
310 git_odb_object_free(trg);
311 return -1;
312}
313
0ade2f7a
ET
314static int write_object(
315 git_packbuilder *pb,
316 git_pobject *po,
317 int (*write_cb)(void *buf, size_t size, void *cb_data),
318 void *cb_data)
0a32dca5
MS
319{
320 git_odb_object *obj = NULL;
ac3d33df 321 git_object_t type;
0ade2f7a 322 unsigned char hdr[10], *zbuf = NULL;
a14aa1e7 323 void *data = NULL;
0ade2f7a 324 size_t hdr_len, zbuf_len = COMPRESS_BUFLEN, data_len;
0ade2f7a 325 int error;
0a32dca5 326
a14aa1e7
CMN
327 /*
328 * If we have a delta base, let's use the delta to save space.
329 * Otherwise load the whole object. 'data' ends up pointing to
330 * whatever data we want to put into the packfile.
331 */
0a32dca5
MS
332 if (po->delta) {
333 if (po->delta_data)
a14aa1e7
CMN
334 data = po->delta_data;
335 else if ((error = get_delta(&data, pb->odb, po)) < 0)
0ade2f7a
ET
336 goto done;
337
338 data_len = po->delta_size;
ac3d33df 339 type = GIT_OBJECT_REF_DELTA;
0a32dca5 340 } else {
0ade2f7a
ET
341 if ((error = git_odb_read(&obj, pb->odb, &po->id)) < 0)
342 goto done;
0a32dca5
MS
343
344 data = (void *)git_odb_object_data(obj);
0ade2f7a 345 data_len = git_odb_object_size(obj);
0a32dca5
MS
346 type = git_odb_object_type(obj);
347 }
348
349 /* Write header */
0ade2f7a 350 hdr_len = git_packfile__object_header(hdr, data_len, type);
0a32dca5 351
0ade2f7a
ET
352 if ((error = write_cb(hdr, hdr_len, cb_data)) < 0 ||
353 (error = git_hash_update(&pb->ctx, hdr, hdr_len)) < 0)
354 goto done;
0a32dca5 355
ac3d33df 356 if (type == GIT_OBJECT_REF_DELTA) {
0ade2f7a
ET
357 if ((error = write_cb(po->delta->id.id, GIT_OID_RAWSZ, cb_data)) < 0 ||
358 (error = git_hash_update(&pb->ctx, po->delta->id.id, GIT_OID_RAWSZ)) < 0)
359 goto done;
0a32dca5
MS
360 }
361
362 /* Write data */
0ade2f7a
ET
363 if (po->z_delta_size) {
364 data_len = po->z_delta_size;
365
366 if ((error = write_cb(data, data_len, cb_data)) < 0 ||
367 (error = git_hash_update(&pb->ctx, data, data_len)) < 0)
368 goto done;
369 } else {
370 zbuf = git__malloc(zbuf_len);
ac3d33df 371 GIT_ERROR_CHECK_ALLOC(zbuf);
0ade2f7a 372
52a8a130 373 git_zstream_reset(&pb->zstream);
22a2d3d5
UG
374
375 if ((error = git_zstream_set_input(&pb->zstream, data, data_len)) < 0)
376 goto done;
0ade2f7a 377
d9b04d78
RB
378 while (!git_zstream_done(&pb->zstream)) {
379 if ((error = git_zstream_get_output(zbuf, &zbuf_len, &pb->zstream)) < 0 ||
380 (error = write_cb(zbuf, zbuf_len, cb_data)) < 0 ||
381 (error = git_hash_update(&pb->ctx, zbuf, zbuf_len)) < 0)
0ade2f7a
ET
382 goto done;
383
d9b04d78 384 zbuf_len = COMPRESS_BUFLEN; /* reuse buffer */
0ade2f7a 385 }
0a32dca5
MS
386 }
387
a14aa1e7
CMN
388 /*
389 * If po->delta is true, data is a delta and it is our
390 * responsibility to free it (otherwise it's a git_object's
391 * data). We set po->delta_data to NULL in case we got the
392 * data from there instead of get_delta(). If we didn't,
393 * there's no harm.
394 */
395 if (po->delta) {
396 git__free(data);
7e3ed419
RB
397 po->delta_data = NULL;
398 }
0a32dca5 399
0a32dca5 400 pb->nr_written++;
0a32dca5 401
0ade2f7a
ET
402done:
403 git__free(zbuf);
0a32dca5 404 git_odb_object_free(obj);
0ade2f7a 405 return error;
0a32dca5
MS
406}
407
408enum write_one_status {
409 WRITE_ONE_SKIP = -1, /* already written */
410 WRITE_ONE_BREAK = 0, /* writing this will bust the limit; not written */
411 WRITE_ONE_WRITTEN = 1, /* normal */
412 WRITE_ONE_RECURSIVE = 2 /* already scheduled to be written */
413};
414
0ade2f7a
ET
415static int write_one(
416 enum write_one_status *status,
417 git_packbuilder *pb,
418 git_pobject *po,
419 int (*write_cb)(void *buf, size_t size, void *cb_data),
420 void *cb_data)
0a32dca5 421{
0ade2f7a
ET
422 int error;
423
0a32dca5
MS
424 if (po->recursing) {
425 *status = WRITE_ONE_RECURSIVE;
426 return 0;
427 } else if (po->written) {
428 *status = WRITE_ONE_SKIP;
429 return 0;
430 }
431
432 if (po->delta) {
433 po->recursing = 1;
0ade2f7a
ET
434
435 if ((error = write_one(status, pb, po->delta, write_cb, cb_data)) < 0)
436 return error;
437
438 /* we cannot depend on this one */
439 if (*status == WRITE_ONE_RECURSIVE)
0a32dca5 440 po->delta = NULL;
0a32dca5
MS
441 }
442
38d338b2 443 *status = WRITE_ONE_WRITTEN;
0a32dca5
MS
444 po->written = 1;
445 po->recursing = 0;
0ade2f7a
ET
446
447 return write_object(pb, po, write_cb, cb_data);
0a32dca5
MS
448}
449
60e15ecd
ET
450GIT_INLINE(void) add_to_write_order(git_pobject **wo, size_t *endp,
451 git_pobject *po)
0a32dca5
MS
452{
453 if (po->filled)
454 return;
455 wo[(*endp)++] = po;
456 po->filled = 1;
457}
458
60e15ecd
ET
459static void add_descendants_to_write_order(git_pobject **wo, size_t *endp,
460 git_pobject *po)
0a32dca5
MS
461{
462 int add_to_order = 1;
463 while (po) {
464 if (add_to_order) {
465 git_pobject *s;
466 /* add this node... */
467 add_to_write_order(wo, endp, po);
468 /* all its siblings... */
469 for (s = po->delta_sibling; s; s = s->delta_sibling) {
470 add_to_write_order(wo, endp, s);
471 }
472 }
473 /* drop down a level to add left subtree nodes if possible */
474 if (po->delta_child) {
475 add_to_order = 1;
476 po = po->delta_child;
477 } else {
478 add_to_order = 0;
479 /* our sibling might have some children, it is next */
480 if (po->delta_sibling) {
481 po = po->delta_sibling;
482 continue;
483 }
484 /* go back to our parent node */
485 po = po->delta;
486 while (po && !po->delta_sibling) {
487 /* we're on the right side of a subtree, keep
488 * going up until we can go right again */
489 po = po->delta;
490 }
491 if (!po) {
492 /* done- we hit our original root node */
493 return;
494 }
495 /* pass it off to sibling at this level */
496 po = po->delta_sibling;
497 }
498 };
499}
500
60e15ecd
ET
501static void add_family_to_write_order(git_pobject **wo, size_t *endp,
502 git_pobject *po)
0a32dca5
MS
503{
504 git_pobject *root;
505
506 for (root = po; root->delta; root = root->delta)
507 ; /* nothing */
508 add_descendants_to_write_order(wo, endp, root);
509}
510
511static int cb_tag_foreach(const char *name, git_oid *oid, void *data)
512{
513 git_packbuilder *pb = data;
0cf49e10 514 git_pobject *po;
0a32dca5
MS
515
516 GIT_UNUSED(name);
517
22a2d3d5 518 if ((po = git_oidmap_get(pb->object_ix, oid)) == NULL)
0cf49e10
MS
519 return 0;
520
0cf49e10
MS
521 po->tagged = 1;
522
0a32dca5 523 /* TODO: peel objects */
0cf49e10 524
0a32dca5
MS
525 return 0;
526}
527
528static git_pobject **compute_write_order(git_packbuilder *pb)
529{
60e15ecd 530 size_t i, wo_end, last_untagged;
392702ee 531 git_pobject **wo;
0a32dca5 532
2884cc42 533 if ((wo = git__mallocarray(pb->nr_objects, sizeof(*wo))) == NULL)
392702ee 534 return NULL;
0a32dca5
MS
535
536 for (i = 0; i < pb->nr_objects; i++) {
537 git_pobject *po = pb->object_list + i;
538 po->tagged = 0;
539 po->filled = 0;
540 po->delta_child = NULL;
541 po->delta_sibling = NULL;
542 }
543
544 /*
545 * Fully connect delta_child/delta_sibling network.
546 * Make sure delta_sibling is sorted in the original
547 * recency order.
548 */
549 for (i = pb->nr_objects; i > 0;) {
550 git_pobject *po = &pb->object_list[--i];
551 if (!po->delta)
552 continue;
553 /* Mark me as the first child */
554 po->delta_sibling = po->delta->delta_child;
555 po->delta->delta_child = po;
556 }
557
558 /*
559 * Mark objects that are at the tip of tags.
560 */
050af8bb
RD
561 if (git_tag_foreach(pb->repo, &cb_tag_foreach, pb) < 0) {
562 git__free(wo);
0a32dca5 563 return NULL;
050af8bb 564 }
0a32dca5
MS
565
566 /*
567 * Give the objects in the original recency order until
568 * we see a tagged tip.
569 */
570 for (i = wo_end = 0; i < pb->nr_objects; i++) {
571 git_pobject *po = pb->object_list + i;
572 if (po->tagged)
573 break;
574 add_to_write_order(wo, &wo_end, po);
575 }
576 last_untagged = i;
577
578 /*
579 * Then fill all the tagged tips.
580 */
581 for (; i < pb->nr_objects; i++) {
582 git_pobject *po = pb->object_list + i;
583 if (po->tagged)
584 add_to_write_order(wo, &wo_end, po);
585 }
586
587 /*
588 * And then all remaining commits and tags.
589 */
590 for (i = last_untagged; i < pb->nr_objects; i++) {
591 git_pobject *po = pb->object_list + i;
ac3d33df
JK
592 if (po->type != GIT_OBJECT_COMMIT &&
593 po->type != GIT_OBJECT_TAG)
0a32dca5
MS
594 continue;
595 add_to_write_order(wo, &wo_end, po);
596 }
597
598 /*
599 * And then all the trees.
600 */
601 for (i = last_untagged; i < pb->nr_objects; i++) {
602 git_pobject *po = pb->object_list + i;
ac3d33df 603 if (po->type != GIT_OBJECT_TREE)
0a32dca5
MS
604 continue;
605 add_to_write_order(wo, &wo_end, po);
606 }
607
608 /*
609 * Finally all the rest in really tight order
610 */
611 for (i = last_untagged; i < pb->nr_objects; i++) {
612 git_pobject *po = pb->object_list + i;
613 if (!po->filled)
614 add_family_to_write_order(wo, &wo_end, po);
615 }
616
617 if (wo_end != pb->nr_objects) {
0b2437bb 618 git__free(wo);
ac3d33df 619 git_error_set(GIT_ERROR_INVALID, "invalid write order");
0a32dca5
MS
620 return NULL;
621 }
622
623 return wo;
624}
625
626static int write_pack(git_packbuilder *pb,
0ade2f7a
ET
627 int (*write_cb)(void *buf, size_t size, void *cb_data),
628 void *cb_data)
0a32dca5
MS
629{
630 git_pobject **write_order;
631 git_pobject *po;
0a32dca5
MS
632 enum write_one_status status;
633 struct git_pack_header ph;
cc2447da 634 git_oid entry_oid;
60e15ecd 635 size_t i = 0;
5b188225 636 int error = 0;
0a32dca5
MS
637
638 write_order = compute_write_order(pb);
bac52ab0
PS
639 if (write_order == NULL)
640 return -1;
0a32dca5 641
60e15ecd 642 if (!git__is_uint32(pb->nr_objects)) {
ac3d33df 643 git_error_set(GIT_ERROR_INVALID, "too many objects");
60e15ecd
ET
644 return -1;
645 }
646
0a32dca5
MS
647 /* Write pack header */
648 ph.hdr_signature = htonl(PACK_SIGNATURE);
649 ph.hdr_version = htonl(PACK_VERSION);
650 ph.hdr_entries = htonl(pb->nr_objects);
651
0ade2f7a
ET
652 if ((error = write_cb(&ph, sizeof(ph), cb_data)) < 0 ||
653 (error = git_hash_update(&pb->ctx, &ph, sizeof(ph))) < 0)
5b188225 654 goto done;
0a32dca5
MS
655
656 pb->nr_remaining = pb->nr_objects;
657 do {
658 pb->nr_written = 0;
659 for ( ; i < pb->nr_objects; ++i) {
660 po = write_order[i];
0ade2f7a
ET
661
662 if ((error = write_one(&status, pb, po, write_cb, cb_data)) < 0)
5b188225 663 goto done;
0a32dca5
MS
664 }
665
666 pb->nr_remaining -= pb->nr_written;
667 } while (pb->nr_remaining && i < pb->nr_objects);
668
cc2447da 669 if ((error = git_hash_final(&entry_oid, &pb->ctx)) < 0)
5b188225 670 goto done;
0a32dca5 671
0ade2f7a 672 error = write_cb(entry_oid.id, GIT_OID_RAWSZ, cb_data);
0a32dca5 673
5b188225 674done:
7e3ed419
RB
675 /* if callback cancelled writing, we must still free delta_data */
676 for ( ; i < pb->nr_objects; ++i) {
677 po = write_order[i];
678 if (po->delta_data) {
679 git__free(po->delta_data);
680 po->delta_data = NULL;
681 }
682 }
683
0a32dca5 684 git__free(write_order);
5b188225 685 return error;
0a32dca5
MS
686}
687
0a32dca5
MS
688static int write_pack_buf(void *buf, size_t size, void *data)
689{
690 git_buf *b = (git_buf *)data;
691 return git_buf_put(b, buf, size);
692}
693
0a32dca5
MS
694static int type_size_sort(const void *_a, const void *_b)
695{
696 const git_pobject *a = (git_pobject *)_a;
697 const git_pobject *b = (git_pobject *)_b;
698
699 if (a->type > b->type)
700 return -1;
701 if (a->type < b->type)
702 return 1;
703 if (a->hash > b->hash)
704 return -1;
705 if (a->hash < b->hash)
706 return 1;
707 /*
708 * TODO
709 *
710 if (a->preferred_base > b->preferred_base)
711 return -1;
712 if (a->preferred_base < b->preferred_base)
713 return 1;
714 */
715 if (a->size > b->size)
716 return -1;
717 if (a->size < b->size)
718 return 1;
719 return a < b ? -1 : (a > b); /* newest first */
720}
721
60e15ecd
ET
722static int delta_cacheable(
723 git_packbuilder *pb,
724 size_t src_size,
725 size_t trg_size,
726 size_t delta_size)
0a32dca5 727{
60e15ecd
ET
728 size_t new_size;
729
730 if (git__add_sizet_overflow(&new_size, pb->delta_cache_size, delta_size))
731 return 0;
732
733 if (pb->max_delta_cache_size && new_size > pb->max_delta_cache_size)
0a32dca5
MS
734 return 0;
735
736 if (delta_size < pb->cache_max_small_delta_size)
737 return 1;
738
739 /* cache delta, if objects are large enough compared to delta size */
740 if ((src_size >> 20) + (trg_size >> 21) > (delta_size >> 10))
741 return 1;
742
743 return 0;
744}
745
0a32dca5 746static int try_delta(git_packbuilder *pb, struct unpacked *trg,
60e15ecd
ET
747 struct unpacked *src, size_t max_depth,
748 size_t *mem_usage, int *ret)
0a32dca5
MS
749{
750 git_pobject *trg_object = trg->object;
751 git_pobject *src_object = src->object;
752 git_odb_object *obj;
60e15ecd
ET
753 size_t trg_size, src_size, delta_size, sizediff, max_size, sz;
754 size_t ref_depth;
0a32dca5
MS
755 void *delta_buf;
756
757 /* Don't bother doing diffs between different types */
758 if (trg_object->type != src_object->type) {
759 *ret = -1;
760 return 0;
761 }
762
763 *ret = 0;
764
765 /* TODO: support reuse-delta */
766
767 /* Let's not bust the allowed depth. */
768 if (src->depth >= max_depth)
769 return 0;
770
771 /* Now some size filtering heuristics. */
60e15ecd 772 trg_size = trg_object->size;
0a32dca5
MS
773 if (!trg_object->delta) {
774 max_size = trg_size/2 - 20;
775 ref_depth = 1;
776 } else {
777 max_size = trg_object->delta_size;
778 ref_depth = trg->depth;
779 }
780
781 max_size = (uint64_t)max_size * (max_depth - src->depth) /
782 (max_depth - ref_depth + 1);
783 if (max_size == 0)
784 return 0;
785
60e15ecd 786 src_size = src_object->size;
0a32dca5
MS
787 sizediff = src_size < trg_size ? trg_size - src_size : 0;
788 if (sizediff >= max_size)
789 return 0;
790 if (trg_size < src_size / 32)
791 return 0;
792
793 /* Load data if not already done */
794 if (!trg->data) {
795 if (git_odb_read(&obj, pb->odb, &trg_object->id) < 0)
796 return -1;
797
60e15ecd 798 sz = git_odb_object_size(obj);
0a32dca5 799 trg->data = git__malloc(sz);
ac3d33df 800 GIT_ERROR_CHECK_ALLOC(trg->data);
0a32dca5
MS
801 memcpy(trg->data, git_odb_object_data(obj), sz);
802
803 git_odb_object_free(obj);
804
805 if (sz != trg_size) {
ac3d33df 806 git_error_set(GIT_ERROR_INVALID,
909d5494 807 "inconsistent target object length");
0a32dca5
MS
808 return -1;
809 }
810
811 *mem_usage += sz;
812 }
813 if (!src->data) {
392702ee
ET
814 size_t obj_sz;
815
816 if (git_odb_read(&obj, pb->odb, &src_object->id) < 0 ||
817 !git__is_ulong(obj_sz = git_odb_object_size(obj)))
0a32dca5
MS
818 return -1;
819
60e15ecd 820 sz = obj_sz;
0a32dca5 821 src->data = git__malloc(sz);
ac3d33df 822 GIT_ERROR_CHECK_ALLOC(src->data);
0a32dca5
MS
823 memcpy(src->data, git_odb_object_data(obj), sz);
824
825 git_odb_object_free(obj);
826
827 if (sz != src_size) {
ac3d33df 828 git_error_set(GIT_ERROR_INVALID,
909d5494 829 "inconsistent source object length");
0a32dca5
MS
830 return -1;
831 }
832
833 *mem_usage += sz;
834 }
835 if (!src->index) {
1cd65991 836 if (git_delta_index_init(&src->index, src->data, src_size) < 0)
0a32dca5
MS
837 return 0; /* suboptimal pack - out of memory */
838
1cd65991 839 *mem_usage += git_delta_index_size(src->index);
0a32dca5
MS
840 }
841
1cd65991
ET
842 if (git_delta_create_from_index(&delta_buf, &delta_size, src->index, trg->data, trg_size,
843 max_size) < 0)
0a32dca5
MS
844 return 0;
845
846 if (trg_object->delta) {
847 /* Prefer only shallower same-sized deltas. */
848 if (delta_size == trg_object->delta_size &&
849 src->depth + 1 >= trg->depth) {
850 git__free(delta_buf);
851 return 0;
852 }
853 }
854
b4491b99 855 git_packbuilder__cache_lock(pb);
0a32dca5
MS
856 if (trg_object->delta_data) {
857 git__free(trg_object->delta_data);
60e15ecd 858 assert(pb->delta_cache_size >= trg_object->delta_size);
0a32dca5
MS
859 pb->delta_cache_size -= trg_object->delta_size;
860 trg_object->delta_data = NULL;
861 }
862 if (delta_cacheable(pb, src_size, trg_size, delta_size)) {
60e15ecd 863 bool overflow = git__add_sizet_overflow(
0f07d54b 864 &pb->delta_cache_size, pb->delta_cache_size, delta_size);
392702ee 865
b4491b99 866 git_packbuilder__cache_unlock(pb);
0a32dca5 867
3fe5768b
PS
868 if (overflow) {
869 git__free(delta_buf);
0f07d54b 870 return -1;
3fe5768b 871 }
d1c9a48d
PS
872
873 trg_object->delta_data = git__realloc(delta_buf, delta_size);
ac3d33df 874 GIT_ERROR_CHECK_ALLOC(trg_object->delta_data);
0a32dca5
MS
875 } else {
876 /* create delta when writing the pack */
b4491b99 877 git_packbuilder__cache_unlock(pb);
0a32dca5
MS
878 git__free(delta_buf);
879 }
880
881 trg_object->delta = src_object;
882 trg_object->delta_size = delta_size;
883 trg->depth = src->depth + 1;
884
885 *ret = 1;
886 return 0;
887}
888
60e15ecd 889static size_t check_delta_limit(git_pobject *me, size_t n)
0a32dca5
MS
890{
891 git_pobject *child = me->delta_child;
60e15ecd 892 size_t m = n;
0a32dca5
MS
893
894 while (child) {
60e15ecd 895 size_t c = check_delta_limit(child, n + 1);
0a32dca5
MS
896 if (m < c)
897 m = c;
898 child = child->delta_sibling;
899 }
900 return m;
901}
902
60e15ecd 903static size_t free_unpacked(struct unpacked *n)
0a32dca5 904{
60e15ecd 905 size_t freed_mem = 0;
a03952f0 906
1cd65991
ET
907 if (n->index) {
908 freed_mem += git_delta_index_size(n->index);
909 git_delta_index_free(n->index);
910 }
0a32dca5 911 n->index = NULL;
1cd65991 912
0a32dca5 913 if (n->data) {
60e15ecd 914 freed_mem += n->object->size;
0a32dca5
MS
915 git__free(n->data);
916 n->data = NULL;
917 }
918 n->object = NULL;
919 n->depth = 0;
920 return freed_mem;
921}
922
60e15ecd
ET
923static int report_delta_progress(
924 git_packbuilder *pb, uint32_t count, bool force)
3c337a5d
CMN
925{
926 int ret;
927
928 if (pb->progress_cb) {
929 double current_time = git__timer();
930 double elapsed = current_time - pb->last_progress_report_time;
931
932 if (force || elapsed >= MIN_PROGRESS_UPDATE_INTERVAL) {
933 pb->last_progress_report_time = current_time;
934
935 ret = pb->progress_cb(
936 GIT_PACKBUILDER_DELTAFICATION,
937 count, pb->nr_objects, pb->progress_cb_payload);
938
939 if (ret)
ac3d33df 940 return git_error_set_after_callback(ret);
3c337a5d
CMN
941 }
942 }
943
944 return 0;
945}
946
0a32dca5 947static int find_deltas(git_packbuilder *pb, git_pobject **list,
60e15ecd 948 size_t *list_size, size_t window, size_t depth)
0a32dca5
MS
949{
950 git_pobject *po;
951 git_buf zbuf = GIT_BUF_INIT;
952 struct unpacked *array;
60e15ecd
ET
953 size_t idx = 0, count = 0;
954 size_t mem_usage = 0;
955 size_t i;
0a32dca5
MS
956 int error = -1;
957
958 array = git__calloc(window, sizeof(struct unpacked));
ac3d33df 959 GIT_ERROR_CHECK_ALLOC(array);
0a32dca5
MS
960
961 for (;;) {
962 struct unpacked *n = array + idx;
60e15ecd 963 size_t max_depth, j, best_base = SIZE_MAX;
0a32dca5 964
b4491b99 965 git_packbuilder__progress_lock(pb);
0a32dca5 966 if (!*list_size) {
b4491b99 967 git_packbuilder__progress_unlock(pb);
0a32dca5
MS
968 break;
969 }
970
3c337a5d
CMN
971 pb->nr_deltified += 1;
972 report_delta_progress(pb, pb->nr_deltified, false);
973
0a32dca5
MS
974 po = *list++;
975 (*list_size)--;
b4491b99 976 git_packbuilder__progress_unlock(pb);
0a32dca5
MS
977
978 mem_usage -= free_unpacked(n);
979 n->object = po;
980
981 while (pb->window_memory_limit &&
982 mem_usage > pb->window_memory_limit &&
983 count > 1) {
60e15ecd 984 size_t tail = (idx + window - count) % window;
0a32dca5
MS
985 mem_usage -= free_unpacked(array + tail);
986 count--;
987 }
988
989 /*
990 * If the current object is at pack edge, take the depth the
991 * objects that depend on the current object into account
992 * otherwise they would become too deep.
993 */
994 max_depth = depth;
995 if (po->delta_child) {
60e15ecd
ET
996 size_t delta_limit = check_delta_limit(po, 0);
997
998 if (delta_limit > max_depth)
0a32dca5 999 goto next;
60e15ecd
ET
1000
1001 max_depth -= delta_limit;
0a32dca5
MS
1002 }
1003
1004 j = window;
1005 while (--j > 0) {
1006 int ret;
60e15ecd 1007 size_t other_idx = idx + j;
0a32dca5
MS
1008 struct unpacked *m;
1009
1010 if (other_idx >= window)
1011 other_idx -= window;
1012
1013 m = array + other_idx;
1014 if (!m->object)
1015 break;
1016
1017 if (try_delta(pb, n, m, max_depth, &mem_usage, &ret) < 0)
1018 goto on_error;
1019 if (ret < 0)
1020 break;
1021 else if (ret > 0)
1022 best_base = other_idx;
1023 }
1024
1025 /*
1026 * If we decided to cache the delta data, then it is best
1027 * to compress it right away. First because we have to do
1028 * it anyway, and doing it here while we're threaded will
1029 * save a lot of time in the non threaded write phase,
1030 * as well as allow for caching more deltas within
1031 * the same cache size limit.
1032 * ...
1033 * But only if not writing to stdout, since in that case
1034 * the network is most likely throttling writes anyway,
1035 * and therefore it is best to go to the write phase ASAP
1036 * instead, as we can afford spending more time compressing
1037 * between writes at that moment.
1038 */
1039 if (po->delta_data) {
c6f26b48 1040 if (git_zstream_deflatebuf(&zbuf, po->delta_data, po->delta_size) < 0)
0a32dca5
MS
1041 goto on_error;
1042
1043 git__free(po->delta_data);
1044 po->delta_data = git__malloc(zbuf.size);
ac3d33df 1045 GIT_ERROR_CHECK_ALLOC(po->delta_data);
0a32dca5
MS
1046
1047 memcpy(po->delta_data, zbuf.ptr, zbuf.size);
60e15ecd 1048 po->z_delta_size = zbuf.size;
0a32dca5
MS
1049 git_buf_clear(&zbuf);
1050
b4491b99 1051 git_packbuilder__cache_lock(pb);
0a32dca5
MS
1052 pb->delta_cache_size -= po->delta_size;
1053 pb->delta_cache_size += po->z_delta_size;
b4491b99 1054 git_packbuilder__cache_unlock(pb);
0a32dca5
MS
1055 }
1056
1057 /*
1058 * If we made n a delta, and if n is already at max
1059 * depth, leaving it in the window is pointless. we
1060 * should evict it first.
1061 */
1062 if (po->delta && max_depth <= n->depth)
1063 continue;
1064
1065 /*
1066 * Move the best delta base up in the window, after the
1067 * currently deltified object, to keep it longer. It will
1068 * be the first base object to be attempted next.
1069 */
1070 if (po->delta) {
1071 struct unpacked swap = array[best_base];
60e15ecd
ET
1072 size_t dist = (window + idx - best_base) % window;
1073 size_t dst = best_base;
0a32dca5 1074 while (dist--) {
60e15ecd 1075 size_t src = (dst + 1) % window;
0a32dca5
MS
1076 array[dst] = array[src];
1077 dst = src;
1078 }
1079 array[dst] = swap;
1080 }
1081
1082 next:
1083 idx++;
1084 if (count + 1 < window)
1085 count++;
1086 if (idx >= window)
1087 idx = 0;
1088 }
1089 error = 0;
1090
1091on_error:
1092 for (i = 0; i < window; ++i) {
1093 git__free(array[i].index);
1094 git__free(array[i].data);
1095 }
1096 git__free(array);
ac3d33df 1097 git_buf_dispose(&zbuf);
0a32dca5
MS
1098
1099 return error;
1100}
1101
1102#ifdef GIT_THREADS
1103
1104struct thread_params {
1105 git_thread thread;
1106 git_packbuilder *pb;
1107
1108 git_pobject **list;
1109
1110 git_cond cond;
1111 git_mutex mutex;
1112
60e15ecd
ET
1113 size_t list_size;
1114 size_t remaining;
0a32dca5 1115
60e15ecd
ET
1116 size_t window;
1117 size_t depth;
1118 size_t working;
1119 size_t data_ready;
0a32dca5
MS
1120};
1121
0a32dca5
MS
1122static void *threaded_find_deltas(void *arg)
1123{
1124 struct thread_params *me = arg;
1125
1126 while (me->remaining) {
1127 if (find_deltas(me->pb, me->list, &me->remaining,
1128 me->window, me->depth) < 0) {
1129 ; /* TODO */
1130 }
1131
b4491b99 1132 git_packbuilder__progress_lock(me->pb);
0a32dca5 1133 me->working = 0;
b4491b99
PK
1134 git_cond_signal(&me->pb->progress_cond);
1135 git_packbuilder__progress_unlock(me->pb);
0a32dca5 1136
a35b3864 1137 if (git_mutex_lock(&me->mutex)) {
ac3d33df 1138 git_error_set(GIT_ERROR_THREAD, "unable to lock packfile condition mutex");
a35b3864
JSS
1139 return NULL;
1140 }
1141
1142 while (!me->data_ready)
1143 git_cond_wait(&me->cond, &me->mutex);
1144
0a32dca5
MS
1145 /*
1146 * We must not set ->data_ready before we wait on the
1147 * condition because the main thread may have set it to 1
1148 * before we get here. In order to be sure that new
1149 * work is available if we see 1 in ->data_ready, it
1150 * was initialized to 0 before this thread was spawned
1151 * and we reset it to 0 right away.
1152 */
0a32dca5
MS
1153 me->data_ready = 0;
1154 git_mutex_unlock(&me->mutex);
1155 }
1156 /* leave ->working 1 so that this doesn't get more work assigned */
1157 return NULL;
1158}
1159
1160static int ll_find_deltas(git_packbuilder *pb, git_pobject **list,
60e15ecd 1161 size_t list_size, size_t window, size_t depth)
0a32dca5
MS
1162{
1163 struct thread_params *p;
60e15ecd
ET
1164 size_t i;
1165 int ret, active_threads = 0;
0a32dca5 1166
0a32dca5
MS
1167 if (!pb->nr_threads)
1168 pb->nr_threads = git_online_cpus();
b4491b99 1169
0a32dca5
MS
1170 if (pb->nr_threads <= 1) {
1171 find_deltas(pb, list, &list_size, window, depth);
0a32dca5
MS
1172 return 0;
1173 }
1174
3603cb09 1175 p = git__mallocarray(pb->nr_threads, sizeof(*p));
ac3d33df 1176 GIT_ERROR_CHECK_ALLOC(p);
0a32dca5
MS
1177
1178 /* Partition the work among the threads */
1179 for (i = 0; i < pb->nr_threads; ++i) {
60e15ecd 1180 size_t sub_size = list_size / (pb->nr_threads - i);
0a32dca5
MS
1181
1182 /* don't use too small segments or no deltas will be found */
1183 if (sub_size < 2*window && i+1 < pb->nr_threads)
1184 sub_size = 0;
1185
1186 p[i].pb = pb;
1187 p[i].window = window;
1188 p[i].depth = depth;
1189 p[i].working = 1;
1190 p[i].data_ready = 0;
1191
1192 /* try to split chunks on "path" boundaries */
1193 while (sub_size && sub_size < list_size &&
1194 list[sub_size]->hash &&
1195 list[sub_size]->hash == list[sub_size-1]->hash)
1196 sub_size++;
1197
1198 p[i].list = list;
1199 p[i].list_size = sub_size;
1200 p[i].remaining = sub_size;
1201
1202 list += sub_size;
1203 list_size -= sub_size;
1204 }
1205
1206 /* Start work threads */
1207 for (i = 0; i < pb->nr_threads; ++i) {
1208 if (!p[i].list_size)
1209 continue;
1210
1211 git_mutex_init(&p[i].mutex);
1212 git_cond_init(&p[i].cond);
1213
faebc1c6 1214 ret = git_thread_create(&p[i].thread,
0a32dca5
MS
1215 threaded_find_deltas, &p[i]);
1216 if (ret) {
ac3d33df 1217 git_error_set(GIT_ERROR_THREAD, "unable to create thread");
0a32dca5
MS
1218 return -1;
1219 }
1220 active_threads++;
1221 }
1222
1223 /*
1224 * Now let's wait for work completion. Each time a thread is done
1225 * with its work, we steal half of the remaining work from the
1226 * thread with the largest number of unprocessed objects and give
1227 * it to that newly idle thread. This ensure good load balancing
1228 * until the remaining object list segments are simply too short
1229 * to be worth splitting anymore.
1230 */
1231 while (active_threads) {
1232 struct thread_params *target = NULL;
1233 struct thread_params *victim = NULL;
60e15ecd 1234 size_t sub_size = 0;
0a32dca5 1235
b4491b99
PK
1236 /* Start by locating a thread that has transitioned its
1237 * 'working' flag from 1 -> 0. This indicates that it is
1238 * ready to receive more work using our work-stealing
1239 * algorithm. */
1240 git_packbuilder__progress_lock(pb);
0a32dca5
MS
1241 for (;;) {
1242 for (i = 0; !target && i < pb->nr_threads; i++)
1243 if (!p[i].working)
1244 target = &p[i];
1245 if (target)
1246 break;
b4491b99 1247 git_cond_wait(&pb->progress_cond, &pb->progress_mutex);
0a32dca5
MS
1248 }
1249
b4491b99
PK
1250 /* At this point we hold the progress lock and have located
1251 * a thread to receive more work. We still need to locate a
1252 * thread from which to steal work (the victim). */
0a32dca5
MS
1253 for (i = 0; i < pb->nr_threads; i++)
1254 if (p[i].remaining > 2*window &&
1255 (!victim || victim->remaining < p[i].remaining))
1256 victim = &p[i];
b4491b99 1257
0a32dca5
MS
1258 if (victim) {
1259 sub_size = victim->remaining / 2;
1260 list = victim->list + victim->list_size - sub_size;
1261 while (sub_size && list[0]->hash &&
1262 list[0]->hash == list[-1]->hash) {
1263 list++;
1264 sub_size--;
1265 }
1266 if (!sub_size) {
1267 /*
1268 * It is possible for some "paths" to have
1269 * so many objects that no hash boundary
1270 * might be found. Let's just steal the
1271 * exact half in that case.
1272 */
1273 sub_size = victim->remaining / 2;
1274 list -= sub_size;
1275 }
1276 target->list = list;
1277 victim->list_size -= sub_size;
1278 victim->remaining -= sub_size;
1279 }
1280 target->list_size = sub_size;
1281 target->remaining = sub_size;
1282 target->working = 1;
b4491b99 1283 git_packbuilder__progress_unlock(pb);
0a32dca5 1284
a35b3864 1285 if (git_mutex_lock(&target->mutex)) {
ac3d33df 1286 git_error_set(GIT_ERROR_THREAD, "unable to lock packfile condition mutex");
a35b3864
JSS
1287 git__free(p);
1288 return -1;
1289 }
1290
0a32dca5
MS
1291 target->data_ready = 1;
1292 git_cond_signal(&target->cond);
1293 git_mutex_unlock(&target->mutex);
1294
1295 if (!sub_size) {
fb591767 1296 git_thread_join(&target->thread, NULL);
0a32dca5
MS
1297 git_cond_free(&target->cond);
1298 git_mutex_free(&target->mutex);
1299 active_threads--;
1300 }
1301 }
1302
0a32dca5
MS
1303 git__free(p);
1304 return 0;
1305}
1306
1307#else
1308#define ll_find_deltas(pb, l, ls, w, d) find_deltas(pb, l, &ls, w, d)
1309#endif
1310
0a32dca5
MS
1311static int prepare_pack(git_packbuilder *pb)
1312{
1313 git_pobject **delta_list;
60e15ecd 1314 size_t i, n = 0;
0a32dca5
MS
1315
1316 if (pb->nr_objects == 0 || pb->done)
1317 return 0; /* nothing to do */
1318
b176eded
JM
1319 /*
1320 * Although we do not report progress during deltafication, we
1321 * at least report that we are in the deltafication stage
1322 */
1323 if (pb->progress_cb)
1324 pb->progress_cb(GIT_PACKBUILDER_DELTAFICATION, 0, pb->nr_objects, pb->progress_cb_payload);
1325
3603cb09 1326 delta_list = git__mallocarray(pb->nr_objects, sizeof(*delta_list));
ac3d33df 1327 GIT_ERROR_CHECK_ALLOC(delta_list);
0a32dca5
MS
1328
1329 for (i = 0; i < pb->nr_objects; ++i) {
1330 git_pobject *po = pb->object_list + i;
1331
b4491b99
PK
1332 /* Make sure the item is within our size limits */
1333 if (po->size < 50 || po->size > pb->big_file_threshold)
0a32dca5
MS
1334 continue;
1335
1336 delta_list[n++] = po;
1337 }
1338
1339 if (n > 1) {
1340 git__tsort((void **)delta_list, n, type_size_sort);
1341 if (ll_find_deltas(pb, delta_list, n,
1342 GIT_PACK_WINDOW + 1,
1343 GIT_PACK_DEPTH) < 0) {
1344 git__free(delta_list);
1345 return -1;
1346 }
1347 }
1348
3c337a5d
CMN
1349 report_delta_progress(pb, pb->nr_objects, true);
1350
0a32dca5
MS
1351 pb->done = true;
1352 git__free(delta_list);
1353 return 0;
1354}
1355
1356#define PREPARE_PACK if (prepare_pack(pb) < 0) { return -1; }
1357
3dfed9cb
CMN
1358int git_packbuilder_foreach(git_packbuilder *pb, int (*cb)(void *buf, size_t size, void *payload), void *payload)
1359{
1360 PREPARE_PACK;
1361 return write_pack(pb, cb, payload);
1362}
1363
0a32dca5
MS
1364int git_packbuilder_write_buf(git_buf *buf, git_packbuilder *pb)
1365{
1366 PREPARE_PACK;
1e4976cb 1367 git_buf_sanitize(buf);
0a32dca5
MS
1368 return write_pack(pb, &write_pack_buf, buf);
1369}
1370
563c19a9 1371static int write_cb(void *buf, size_t len, void *payload)
0a32dca5 1372{
563c19a9 1373 struct pack_write_context *ctx = payload;
a6154f21 1374 return git_indexer_append(ctx->indexer, buf, len, ctx->stats);
563c19a9
MS
1375}
1376
1377int git_packbuilder_write(
1378 git_packbuilder *pb,
1379 const char *path,
1e60e5f4 1380 unsigned int mode,
22a2d3d5 1381 git_indexer_progress_cb progress_cb,
563c19a9
MS
1382 void *progress_cb_payload)
1383{
22a2d3d5
UG
1384 int error = -1;
1385 git_buf object_path = GIT_BUF_INIT;
ac3d33df 1386 git_indexer_options opts = GIT_INDEXER_OPTIONS_INIT;
22a2d3d5
UG
1387 git_indexer *indexer = NULL;
1388 git_indexer_progress stats;
563c19a9 1389 struct pack_write_context ctx;
1c04a96b 1390 int t;
563c19a9 1391
0a32dca5 1392 PREPARE_PACK;
563c19a9 1393
22a2d3d5
UG
1394 if (path == NULL) {
1395 if ((error = git_repository_item_path(&object_path, pb->repo, GIT_REPOSITORY_ITEM_OBJECTS)) < 0)
1396 goto cleanup;
1397 if ((error = git_buf_joinpath(&object_path, git_buf_cstr(&object_path), "pack")) < 0)
1398 goto cleanup;
1399 path = git_buf_cstr(&object_path);
1400 }
1401
ac3d33df
JK
1402 opts.progress_cb = progress_cb;
1403 opts.progress_cb_payload = progress_cb_payload;
1404
22a2d3d5
UG
1405 if ((error = git_indexer_new(&indexer, path, mode, pb->odb, &opts)) < 0)
1406 goto cleanup;
563c19a9 1407
22a2d3d5 1408 if (!git_repository__configmap_lookup(&t, pb->repo, GIT_CONFIGMAP_FSYNCOBJECTFILES) && t)
1c04a96b
ET
1409 git_indexer__set_fsync(indexer, 1);
1410
563c19a9
MS
1411 ctx.indexer = indexer;
1412 ctx.stats = &stats;
1413
22a2d3d5
UG
1414 if ((error = git_packbuilder_foreach(pb, write_cb, &ctx)) < 0)
1415 goto cleanup;
1416
1417 if ((error = git_indexer_commit(indexer, &stats)) < 0)
1418 goto cleanup;
563c19a9 1419
cc2447da
ET
1420 git_oid_cpy(&pb->pack_oid, git_indexer_hash(indexer));
1421
22a2d3d5 1422cleanup:
a6154f21 1423 git_indexer_free(indexer);
22a2d3d5
UG
1424 git_buf_dispose(&object_path);
1425 return error;
0a32dca5
MS
1426}
1427
1428#undef PREPARE_PACK
1429
cc2447da
ET
1430const git_oid *git_packbuilder_hash(git_packbuilder *pb)
1431{
1432 return &pb->pack_oid;
1433}
1434
04a36fef 1435
dab89f9b
RB
1436static int cb_tree_walk(
1437 const char *root, const git_tree_entry *entry, void *payload)
0a32dca5 1438{
dab89f9b 1439 int error;
f4b86126 1440 struct tree_walk_context *ctx = payload;
0a32dca5 1441
613d5eb9 1442 /* A commit inside a tree represents a submodule commit and should be skipped. */
ac3d33df 1443 if (git_tree_entry_type(entry) == GIT_OBJECT_COMMIT)
613d5eb9
PK
1444 return 0;
1445
dab89f9b
RB
1446 if (!(error = git_buf_sets(&ctx->buf, root)) &&
1447 !(error = git_buf_puts(&ctx->buf, git_tree_entry_name(entry))))
1448 error = git_packbuilder_insert(
1449 ctx->pb, git_tree_entry_id(entry), git_buf_cstr(&ctx->buf));
0a32dca5 1450
25e0b157 1451 return error;
0a32dca5
MS
1452}
1453
f0e37a8b
X
1454int git_packbuilder_insert_commit(git_packbuilder *pb, const git_oid *oid)
1455{
0b90366c 1456 git_commit *commit;
f0e37a8b 1457
0b90366c
X
1458 if (git_commit_lookup(&commit, pb->repo, oid) < 0 ||
1459 git_packbuilder_insert(pb, oid, NULL) < 0)
1460 return -1;
f0e37a8b 1461
0b90366c
X
1462 if (git_packbuilder_insert_tree(pb, git_commit_tree_id(commit)) < 0)
1463 return -1;
f0e37a8b 1464
0b90366c
X
1465 git_commit_free(commit);
1466 return 0;
f0e37a8b
X
1467}
1468
0a32dca5
MS
1469int git_packbuilder_insert_tree(git_packbuilder *pb, const git_oid *oid)
1470{
dab89f9b
RB
1471 int error;
1472 git_tree *tree = NULL;
f4b86126 1473 struct tree_walk_context context = { pb, GIT_BUF_INIT };
0a32dca5 1474
dab89f9b
RB
1475 if (!(error = git_tree_lookup(&tree, pb->repo, oid)) &&
1476 !(error = git_packbuilder_insert(pb, oid, NULL)))
1477 error = git_tree_walk(tree, GIT_TREEWALK_PRE, cb_tree_walk, &context);
0a32dca5 1478
0a32dca5 1479 git_tree_free(tree);
ac3d33df 1480 git_buf_dispose(&context.buf);
dab89f9b 1481 return error;
0a32dca5
MS
1482}
1483
a61fa4c0
CMN
1484int git_packbuilder_insert_recur(git_packbuilder *pb, const git_oid *id, const char *name)
1485{
1486 git_object *obj;
1487 int error;
1488
1489 assert(pb && id);
1490
ac3d33df 1491 if ((error = git_object_lookup(&obj, pb->repo, id, GIT_OBJECT_ANY)) < 0)
a61fa4c0
CMN
1492 return error;
1493
1494 switch (git_object_type(obj)) {
ac3d33df 1495 case GIT_OBJECT_BLOB:
a61fa4c0
CMN
1496 error = git_packbuilder_insert(pb, id, name);
1497 break;
ac3d33df 1498 case GIT_OBJECT_TREE:
a61fa4c0
CMN
1499 error = git_packbuilder_insert_tree(pb, id);
1500 break;
ac3d33df 1501 case GIT_OBJECT_COMMIT:
a61fa4c0
CMN
1502 error = git_packbuilder_insert_commit(pb, id);
1503 break;
ac3d33df 1504 case GIT_OBJECT_TAG:
a61fa4c0
CMN
1505 if ((error = git_packbuilder_insert(pb, id, name)) < 0)
1506 goto cleanup;
1507 error = git_packbuilder_insert_recur(pb, git_tag_target_id((git_tag *) obj), NULL);
1508 break;
1509
1510 default:
ac3d33df 1511 git_error_set(GIT_ERROR_INVALID, "unknown object type");
a61fa4c0
CMN
1512 error = -1;
1513 }
1514
1515cleanup:
1516 git_object_free(obj);
1517 return error;
1518}
1519
60e15ecd 1520size_t git_packbuilder_object_count(git_packbuilder *pb)
b4b935d8
CMN
1521{
1522 return pb->nr_objects;
1523}
1524
60e15ecd 1525size_t git_packbuilder_written(git_packbuilder *pb)
b4b935d8
CMN
1526{
1527 return pb->nr_written;
1528}
1529
ac3d33df 1530static int lookup_walk_object(struct walk_object **out, git_packbuilder *pb, const git_oid *id)
04a36fef 1531{
ac3d33df 1532 struct walk_object *obj;
04a36fef
CMN
1533
1534 obj = git_pool_mallocz(&pb->object_pool, 1);
1535 if (!obj) {
ac3d33df 1536 git_error_set_oom();
04a36fef
CMN
1537 return -1;
1538 }
1539
1540 git_oid_cpy(&obj->id, id);
1541
1542 *out = obj;
1543 return 0;
1544}
1545
ac3d33df 1546static int retrieve_object(struct walk_object **out, git_packbuilder *pb, const git_oid *id)
04a36fef 1547{
ac3d33df 1548 struct walk_object *obj;
22a2d3d5 1549 int error;
04a36fef 1550
22a2d3d5 1551 if ((obj = git_oidmap_get(pb->walk_objects, id)) == NULL) {
04a36fef
CMN
1552 if ((error = lookup_walk_object(&obj, pb, id)) < 0)
1553 return error;
1554
22a2d3d5
UG
1555 if ((error = git_oidmap_set(pb->walk_objects, &obj->id, obj)) < 0)
1556 return error;
04a36fef
CMN
1557 }
1558
1559 *out = obj;
1560 return 0;
1561}
1562
1563static int mark_blob_uninteresting(git_packbuilder *pb, const git_oid *id)
1564{
1565 int error;
ac3d33df 1566 struct walk_object *obj;
04a36fef
CMN
1567
1568 if ((error = retrieve_object(&obj, pb, id)) < 0)
1569 return error;
1570
1571 obj->uninteresting = 1;
1572
1573 return 0;
1574}
1575
1576static int mark_tree_uninteresting(git_packbuilder *pb, const git_oid *id)
1577{
ac3d33df 1578 struct walk_object *obj;
04a36fef
CMN
1579 git_tree *tree;
1580 int error;
1581 size_t i;
1582
1583 if ((error = retrieve_object(&obj, pb, id)) < 0)
1584 return error;
1585
1586 if (obj->uninteresting)
1587 return 0;
1588
1589 obj->uninteresting = 1;
1590
1591 if ((error = git_tree_lookup(&tree, pb->repo, id)) < 0)
1592 return error;
1593
1594 for (i = 0; i < git_tree_entrycount(tree); i++) {
1595 const git_tree_entry *entry = git_tree_entry_byindex(tree, i);
1596 const git_oid *entry_id = git_tree_entry_id(entry);
1597 switch (git_tree_entry_type(entry)) {
ac3d33df 1598 case GIT_OBJECT_TREE:
04a36fef
CMN
1599 if ((error = mark_tree_uninteresting(pb, entry_id)) < 0)
1600 goto cleanup;
1601 break;
ac3d33df 1602 case GIT_OBJECT_BLOB:
04a36fef
CMN
1603 if ((error = mark_blob_uninteresting(pb, entry_id)) < 0)
1604 goto cleanup;
1605 break;
1606 default:
1607 /* it's a submodule or something unknown, we don't want it */
1608 ;
1609 }
1610 }
1611
1612cleanup:
1613 git_tree_free(tree);
1614 return error;
1615}
1616
1617/*
1618 * Mark the edges of the graph uninteresting. Since we start from a
1619 * git_revwalk, the commits are already uninteresting, but we need to
1620 * mark the trees and blobs.
1621 */
1622static int mark_edges_uninteresting(git_packbuilder *pb, git_commit_list *commits)
1623{
1624 int error;
1625 git_commit_list *list;
1626 git_commit *commit;
1627
1628 for (list = commits; list; list = list->next) {
1629 if (!list->item->uninteresting)
1630 continue;
1631
1632 if ((error = git_commit_lookup(&commit, pb->repo, &list->item->oid)) < 0)
1633 return error;
1634
1635 error = mark_tree_uninteresting(pb, git_commit_tree_id(commit));
1636 git_commit_free(commit);
1637
1638 if (error < 0)
1639 return error;
1640 }
1641
1642 return 0;
1643}
1644
22a2d3d5 1645static int pack_objects_insert_tree(git_packbuilder *pb, git_tree *tree)
04a36fef
CMN
1646{
1647 size_t i;
1648 int error;
1649 git_tree *subtree;
ac3d33df 1650 struct walk_object *obj;
04a36fef
CMN
1651 const char *name;
1652
1653 if ((error = retrieve_object(&obj, pb, git_tree_id(tree))) < 0)
1654 return error;
1655
eae0bfdc 1656 if (obj->seen || obj->uninteresting)
04a36fef
CMN
1657 return 0;
1658
1659 obj->seen = 1;
1660
1661 if ((error = git_packbuilder_insert(pb, &obj->id, NULL)))
1662 return error;
1663
1664 for (i = 0; i < git_tree_entrycount(tree); i++) {
1665 const git_tree_entry *entry = git_tree_entry_byindex(tree, i);
1666 const git_oid *entry_id = git_tree_entry_id(entry);
1667 switch (git_tree_entry_type(entry)) {
ac3d33df 1668 case GIT_OBJECT_TREE:
04a36fef
CMN
1669 if ((error = git_tree_lookup(&subtree, pb->repo, entry_id)) < 0)
1670 return error;
1671
22a2d3d5 1672 error = pack_objects_insert_tree(pb, subtree);
04a36fef
CMN
1673 git_tree_free(subtree);
1674
1675 if (error < 0)
1676 return error;
1677
1678 break;
ac3d33df 1679 case GIT_OBJECT_BLOB:
6c7cee42 1680 if ((error = retrieve_object(&obj, pb, entry_id)) < 0)
eae0bfdc
PP
1681 return error;
1682 if (obj->uninteresting)
1683 continue;
04a36fef
CMN
1684 name = git_tree_entry_name(entry);
1685 if ((error = git_packbuilder_insert(pb, entry_id, name)) < 0)
1686 return error;
1687 break;
1688 default:
1689 /* it's a submodule or something unknown, we don't want it */
1690 ;
1691 }
1692 }
1693
1694
1695 return error;
1696}
1697
22a2d3d5 1698static int pack_objects_insert_commit(git_packbuilder *pb, struct walk_object *obj)
04a36fef
CMN
1699{
1700 int error;
1701 git_commit *commit = NULL;
1702 git_tree *tree = NULL;
1703
1704 obj->seen = 1;
1705
1706 if ((error = git_packbuilder_insert(pb, &obj->id, NULL)) < 0)
1707 return error;
1708
1709 if ((error = git_commit_lookup(&commit, pb->repo, &obj->id)) < 0)
1710 return error;
1711
1712 if ((error = git_tree_lookup(&tree, pb->repo, git_commit_tree_id(commit))) < 0)
1713 goto cleanup;
1714
22a2d3d5 1715 if ((error = pack_objects_insert_tree(pb, tree)) < 0)
04a36fef
CMN
1716 goto cleanup;
1717
1718cleanup:
1719 git_commit_free(commit);
1720 git_tree_free(tree);
1721 return error;
1722}
1723
1724int git_packbuilder_insert_walk(git_packbuilder *pb, git_revwalk *walk)
1725{
1726 int error;
1727 git_oid id;
ac3d33df 1728 struct walk_object *obj;
04a36fef
CMN
1729
1730 assert(pb && walk);
1731
1732 if ((error = mark_edges_uninteresting(pb, walk->user_input)) < 0)
1733 return error;
1734
1735 /*
1736 * TODO: git marks the parents of the edges
1737 * uninteresting. This may provide a speed advantage, but does
1738 * seem to assume the remote does not have a single-commit
1739 * history on the other end.
1740 */
1741
1742 /* walk down each tree up to the blobs and insert them, stopping when uninteresting */
1743 while ((error = git_revwalk_next(&id, walk)) == 0) {
1744 if ((error = retrieve_object(&obj, pb, &id)) < 0)
1745 return error;
1746
1747 if (obj->seen || obj->uninteresting)
1748 continue;
1749
22a2d3d5 1750 if ((error = pack_objects_insert_commit(pb, obj)) < 0)
04a36fef
CMN
1751 return error;
1752 }
1753
1754 if (error == GIT_ITEROVER)
1755 error = 0;
1756
49be45a1 1757 return error;
04a36fef
CMN
1758}
1759
b176eded
JM
1760int git_packbuilder_set_callbacks(git_packbuilder *pb, git_packbuilder_progress progress_cb, void *progress_cb_payload)
1761{
1762 if (!pb)
1763 return -1;
1764
1765 pb->progress_cb = progress_cb;
1766 pb->progress_cb_payload = progress_cb_payload;
1767
1768 return 0;
1769}
1770
0a32dca5
MS
1771void git_packbuilder_free(git_packbuilder *pb)
1772{
1773 if (pb == NULL)
1774 return;
1775
b4491b99
PK
1776#ifdef GIT_THREADS
1777
1778 git_mutex_free(&pb->cache_mutex);
1779 git_mutex_free(&pb->progress_mutex);
1780 git_cond_free(&pb->progress_cond);
1781
1782#endif
1783
1784 if (pb->odb)
1785 git_odb_free(pb->odb);
1786
b4491b99
PK
1787 if (pb->object_ix)
1788 git_oidmap_free(pb->object_ix);
1789
1790 if (pb->object_list)
1791 git__free(pb->object_list);
1792
04a36fef
CMN
1793 git_oidmap_free(pb->walk_objects);
1794 git_pool_clear(&pb->object_pool);
1795
603bee07 1796 git_hash_ctx_cleanup(&pb->ctx);
52a8a130 1797 git_zstream_free(&pb->zstream);
603bee07 1798
0a32dca5
MS
1799 git__free(pb);
1800}