]> git.proxmox.com Git - libgit2.git/blame - src/pack-objects.c
Update upstream source from tag 'upstream/1.0.0+dfsg.1'
[libgit2.git] / src / pack-objects.c
CommitLineData
0a32dca5 1/*
359fc2d2 2 * Copyright (C) the libgit2 contributors. All rights reserved.
0a32dca5
MS
3 *
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
6 */
7
8#include "pack-objects.h"
9
c6f26b48 10#include "zstream.h"
0a32dca5
MS
11#include "delta.h"
12#include "iterator.h"
13#include "netops.h"
14#include "pack.h"
15#include "thread-utils.h"
16#include "tree.h"
b176eded 17#include "util.h"
04a36fef
CMN
18#include "revwalk.h"
19#include "commit_list.h"
0a32dca5
MS
20
21#include "git2/pack.h"
22#include "git2/commit.h"
23#include "git2/tag.h"
24#include "git2/indexer.h"
25#include "git2/config.h"
26
27struct unpacked {
28 git_pobject *object;
29 void *data;
30 struct git_delta_index *index;
60e15ecd 31 size_t depth;
0a32dca5
MS
32};
33
f4b86126
PK
34struct tree_walk_context {
35 git_packbuilder *pb;
36 git_buf buf;
37};
38
563c19a9 39struct pack_write_context {
a6154f21 40 git_indexer *indexer;
0c9c969a 41 git_indexer_progress *stats;
563c19a9
MS
42};
43
ac3d33df
JK
44struct walk_object {
45 git_oid id;
46 unsigned int uninteresting:1,
47 seen:1;
48};
49
b4491b99
PK
50#ifdef GIT_THREADS
51
52#define GIT_PACKBUILDER__MUTEX_OP(pb, mtx, op) do { \
53 int result = git_mutex_##op(&(pb)->mtx); \
54 assert(!result); \
55 GIT_UNUSED(result); \
56 } while (0)
57
58#else
59
60#define GIT_PACKBUILDER__MUTEX_OP(pb,mtx,op) GIT_UNUSED(pb)
61
62#endif /* GIT_THREADS */
63
64#define git_packbuilder__cache_lock(pb) GIT_PACKBUILDER__MUTEX_OP(pb, cache_mutex, lock)
65#define git_packbuilder__cache_unlock(pb) GIT_PACKBUILDER__MUTEX_OP(pb, cache_mutex, unlock)
66#define git_packbuilder__progress_lock(pb) GIT_PACKBUILDER__MUTEX_OP(pb, progress_mutex, lock)
67#define git_packbuilder__progress_unlock(pb) GIT_PACKBUILDER__MUTEX_OP(pb, progress_mutex, unlock)
68
b176eded
JM
69/* The minimal interval between progress updates (in seconds). */
70#define MIN_PROGRESS_UPDATE_INTERVAL 0.5
71
0ade2f7a
ET
72/* Size of the buffer to feed to zlib */
73#define COMPRESS_BUFLEN (1024 * 1024)
74
0a32dca5
MS
75static unsigned name_hash(const char *name)
76{
77 unsigned c, hash = 0;
78
79 if (!name)
80 return 0;
81
82 /*
83 * This effectively just creates a sortable number from the
84 * last sixteen non-whitespace characters. Last characters
85 * count "most", so things that end in ".c" sort together.
86 */
87 while ((c = *name++) != 0) {
88 if (git__isspace(c))
89 continue;
90 hash = (hash >> 2) + (c << 24);
91 }
92 return hash;
93}
94
95static int packbuilder_config(git_packbuilder *pb)
96{
97 git_config *config;
39c9dd24 98 int ret = 0;
757b4065 99 int64_t val;
0a32dca5 100
ac99d86b 101 if ((ret = git_repository_config_snapshot(&config, pb->repo)) < 0)
29c4cb09 102 return ret;
0a32dca5 103
757b4065
RB
104#define config_get(KEY,DST,DFLT) do { \
105 ret = git_config_get_int64(&val, config, KEY); \
60e15ecd
ET
106 if (!ret) { \
107 if (!git__is_sizet(val)) { \
ac3d33df 108 git_error_set(GIT_ERROR_CONFIG, \
60e15ecd
ET
109 "configuration value '%s' is too large", KEY); \
110 ret = -1; \
111 goto out; \
112 } \
113 (DST) = (size_t)val; \
114 } else if (ret == GIT_ENOTFOUND) { \
39c9dd24
PS
115 (DST) = (DFLT); \
116 ret = 0; \
117 } else if (ret < 0) goto out; } while (0)
0a32dca5
MS
118
119 config_get("pack.deltaCacheSize", pb->max_delta_cache_size,
120 GIT_PACK_DELTA_CACHE_SIZE);
121 config_get("pack.deltaCacheLimit", pb->cache_max_small_delta_size,
122 GIT_PACK_DELTA_CACHE_LIMIT);
123 config_get("pack.deltaCacheSize", pb->big_file_threshold,
124 GIT_PACK_BIG_FILE_THRESHOLD);
125 config_get("pack.windowMemory", pb->window_memory_limit, 0);
126
127#undef config_get
128
39c9dd24 129out:
29c4cb09
CMN
130 git_config_free(config);
131
39c9dd24 132 return ret;
0a32dca5
MS
133}
134
135int git_packbuilder_new(git_packbuilder **out, git_repository *repo)
136{
137 git_packbuilder *pb;
138
139 *out = NULL;
140
603bee07 141 pb = git__calloc(1, sizeof(*pb));
ac3d33df 142 GIT_ERROR_CHECK_ALLOC(pb);
0a32dca5 143
0c9c969a 144 if (git_oidmap_new(&pb->object_ix) < 0)
b4491b99 145 goto on_error;
0cf49e10 146
0c9c969a 147 if (git_oidmap_new(&pb->walk_objects) < 0)
04a36fef
CMN
148 goto on_error;
149
ac3d33df 150 git_pool_init(&pb->object_pool, sizeof(struct walk_object));
04a36fef 151
0a32dca5
MS
152 pb->repo = repo;
153 pb->nr_threads = 1; /* do not spawn any thread by default */
0a32dca5 154
603bee07 155 if (git_hash_ctx_init(&pb->ctx) < 0 ||
b88f1713 156 git_zstream_init(&pb->zstream, GIT_ZSTREAM_DEFLATE) < 0 ||
b4491b99
PK
157 git_repository_odb(&pb->odb, repo) < 0 ||
158 packbuilder_config(pb) < 0)
0a32dca5
MS
159 goto on_error;
160
b4491b99
PK
161#ifdef GIT_THREADS
162
163 if (git_mutex_init(&pb->cache_mutex) ||
164 git_mutex_init(&pb->progress_mutex) ||
165 git_cond_init(&pb->progress_cond))
1a42dd17 166 {
ac3d33df 167 git_error_set(GIT_ERROR_OS, "failed to initialize packbuilder mutex");
0a32dca5 168 goto on_error;
1a42dd17 169 }
0a32dca5 170
b4491b99
PK
171#endif
172
0a32dca5
MS
173 *out = pb;
174 return 0;
175
176on_error:
b4491b99 177 git_packbuilder_free(pb);
0a32dca5
MS
178 return -1;
179}
180
2508cc66 181unsigned int git_packbuilder_set_threads(git_packbuilder *pb, unsigned int n)
0a32dca5
MS
182{
183 assert(pb);
b8b897bb
PK
184
185#ifdef GIT_THREADS
0a32dca5 186 pb->nr_threads = n;
b8b897bb
PK
187#else
188 GIT_UNUSED(n);
189 assert(1 == pb->nr_threads);
190#endif
191
2508cc66 192 return pb->nr_threads;
0a32dca5
MS
193}
194
0c9c969a 195static int rehash(git_packbuilder *pb)
0cf49e10
MS
196{
197 git_pobject *po;
0c9c969a 198 size_t i;
0cf49e10 199
c37b069b 200 git_oidmap_clear(pb->object_ix);
0c9c969a 201
0cf49e10 202 for (i = 0, po = pb->object_list; i < pb->nr_objects; i++, po++) {
0c9c969a
UG
203 if (git_oidmap_set(pb->object_ix, &po->id, po) < 0)
204 return -1;
0cf49e10 205 }
0c9c969a
UG
206
207 return 0;
0cf49e10
MS
208}
209
0a32dca5
MS
210int git_packbuilder_insert(git_packbuilder *pb, const git_oid *oid,
211 const char *name)
212{
213 git_pobject *po;
0c9c969a 214 size_t newsize;
0cf49e10 215 int ret;
0a32dca5
MS
216
217 assert(pb && oid);
218
b4491b99
PK
219 /* If the object already exists in the hash table, then we don't
220 * have any work to do */
036daa59 221 if (git_oidmap_exists(pb->object_ix, oid))
0a32dca5
MS
222 return 0;
223
224 if (pb->nr_objects >= pb->nr_alloc) {
ac3d33df 225 GIT_ERROR_CHECK_ALLOC_ADD(&newsize, pb->nr_alloc, 1024);
0c9c969a 226 GIT_ERROR_CHECK_ALLOC_MULTIPLY(&newsize, newsize / 2, 3);
ec3b4d35
ET
227
228 if (!git__is_uint32(newsize)) {
ac3d33df 229 git_error_set(GIT_ERROR_NOMEMORY, "packfile too large to fit in memory.");
ec3b4d35
ET
230 return -1;
231 }
232
60e15ecd 233 pb->nr_alloc = newsize;
392702ee 234
3603cb09
ET
235 pb->object_list = git__reallocarray(pb->object_list,
236 pb->nr_alloc, sizeof(*po));
ac3d33df 237 GIT_ERROR_CHECK_ALLOC(pb->object_list);
0c9c969a
UG
238
239 if (rehash(pb) < 0)
240 return -1;
0a32dca5
MS
241 }
242
b4491b99 243 po = pb->object_list + pb->nr_objects;
0a32dca5
MS
244 memset(po, 0x0, sizeof(*po));
245
25e0b157
RB
246 if ((ret = git_odb_read_header(&po->size, &po->type, pb->odb, oid)) < 0)
247 return ret;
b4491b99
PK
248
249 pb->nr_objects++;
0a32dca5 250 git_oid_cpy(&po->id, oid);
0cf49e10 251 po->hash = name_hash(name);
0a32dca5 252
0c9c969a 253 if (git_oidmap_set(pb->object_ix, &po->id, po) < 0) {
ac3d33df 254 git_error_set_oom();
0c9c969a 255 return -1;
25e0b157 256 }
0a32dca5 257
5b188225
JM
258 pb->done = false;
259
b176eded
JM
260 if (pb->progress_cb) {
261 double current_time = git__timer();
dab89f9b
RB
262 double elapsed = current_time - pb->last_progress_report_time;
263
264 if (elapsed >= MIN_PROGRESS_UPDATE_INTERVAL) {
b176eded 265 pb->last_progress_report_time = current_time;
dab89f9b 266
c7b3e1b3 267 ret = pb->progress_cb(
25e0b157 268 GIT_PACKBUILDER_ADDING_OBJECTS,
c7b3e1b3
RB
269 pb->nr_objects, 0, pb->progress_cb_payload);
270
271 if (ret)
ac3d33df 272 return git_error_set_after_callback(ret);
b176eded
JM
273 }
274 }
275
0a32dca5
MS
276 return 0;
277}
278
0a32dca5
MS
279static int get_delta(void **out, git_odb *odb, git_pobject *po)
280{
281 git_odb_object *src = NULL, *trg = NULL;
60e15ecd 282 size_t delta_size;
0a32dca5 283 void *delta_buf;
1cd65991 284 int error;
0a32dca5
MS
285
286 *out = NULL;
287
288 if (git_odb_read(&src, odb, &po->delta->id) < 0 ||
289 git_odb_read(&trg, odb, &po->id) < 0)
290 goto on_error;
291
1cd65991
ET
292 error = git_delta(&delta_buf, &delta_size,
293 git_odb_object_data(src), git_odb_object_size(src),
294 git_odb_object_data(trg), git_odb_object_size(trg),
295 0);
296
297 if (error < 0 && error != GIT_EBUFS)
298 goto on_error;
0a32dca5 299
1cd65991 300 if (error == GIT_EBUFS || delta_size != po->delta_size) {
ac3d33df 301 git_error_set(GIT_ERROR_INVALID, "delta size changed");
0a32dca5
MS
302 goto on_error;
303 }
304
305 *out = delta_buf;
306
307 git_odb_object_free(src);
308 git_odb_object_free(trg);
309 return 0;
310
311on_error:
312 git_odb_object_free(src);
313 git_odb_object_free(trg);
314 return -1;
315}
316
0ade2f7a
ET
317static int write_object(
318 git_packbuilder *pb,
319 git_pobject *po,
320 int (*write_cb)(void *buf, size_t size, void *cb_data),
321 void *cb_data)
0a32dca5
MS
322{
323 git_odb_object *obj = NULL;
ac3d33df 324 git_object_t type;
0ade2f7a 325 unsigned char hdr[10], *zbuf = NULL;
a14aa1e7 326 void *data = NULL;
0ade2f7a 327 size_t hdr_len, zbuf_len = COMPRESS_BUFLEN, data_len;
0ade2f7a 328 int error;
0a32dca5 329
a14aa1e7
CMN
330 /*
331 * If we have a delta base, let's use the delta to save space.
332 * Otherwise load the whole object. 'data' ends up pointing to
333 * whatever data we want to put into the packfile.
334 */
0a32dca5
MS
335 if (po->delta) {
336 if (po->delta_data)
a14aa1e7
CMN
337 data = po->delta_data;
338 else if ((error = get_delta(&data, pb->odb, po)) < 0)
0ade2f7a
ET
339 goto done;
340
341 data_len = po->delta_size;
ac3d33df 342 type = GIT_OBJECT_REF_DELTA;
0a32dca5 343 } else {
0ade2f7a
ET
344 if ((error = git_odb_read(&obj, pb->odb, &po->id)) < 0)
345 goto done;
0a32dca5
MS
346
347 data = (void *)git_odb_object_data(obj);
0ade2f7a 348 data_len = git_odb_object_size(obj);
0a32dca5
MS
349 type = git_odb_object_type(obj);
350 }
351
352 /* Write header */
0ade2f7a 353 hdr_len = git_packfile__object_header(hdr, data_len, type);
0a32dca5 354
0ade2f7a
ET
355 if ((error = write_cb(hdr, hdr_len, cb_data)) < 0 ||
356 (error = git_hash_update(&pb->ctx, hdr, hdr_len)) < 0)
357 goto done;
0a32dca5 358
ac3d33df 359 if (type == GIT_OBJECT_REF_DELTA) {
0ade2f7a
ET
360 if ((error = write_cb(po->delta->id.id, GIT_OID_RAWSZ, cb_data)) < 0 ||
361 (error = git_hash_update(&pb->ctx, po->delta->id.id, GIT_OID_RAWSZ)) < 0)
362 goto done;
0a32dca5
MS
363 }
364
365 /* Write data */
0ade2f7a
ET
366 if (po->z_delta_size) {
367 data_len = po->z_delta_size;
368
369 if ((error = write_cb(data, data_len, cb_data)) < 0 ||
370 (error = git_hash_update(&pb->ctx, data, data_len)) < 0)
371 goto done;
372 } else {
373 zbuf = git__malloc(zbuf_len);
ac3d33df 374 GIT_ERROR_CHECK_ALLOC(zbuf);
0ade2f7a 375
52a8a130 376 git_zstream_reset(&pb->zstream);
66a70851
UG
377
378 if ((error = git_zstream_set_input(&pb->zstream, data, data_len)) < 0)
379 goto done;
0ade2f7a 380
d9b04d78
RB
381 while (!git_zstream_done(&pb->zstream)) {
382 if ((error = git_zstream_get_output(zbuf, &zbuf_len, &pb->zstream)) < 0 ||
383 (error = write_cb(zbuf, zbuf_len, cb_data)) < 0 ||
384 (error = git_hash_update(&pb->ctx, zbuf, zbuf_len)) < 0)
0ade2f7a
ET
385 goto done;
386
d9b04d78 387 zbuf_len = COMPRESS_BUFLEN; /* reuse buffer */
0ade2f7a 388 }
0a32dca5
MS
389 }
390
a14aa1e7
CMN
391 /*
392 * If po->delta is true, data is a delta and it is our
393 * responsibility to free it (otherwise it's a git_object's
394 * data). We set po->delta_data to NULL in case we got the
395 * data from there instead of get_delta(). If we didn't,
396 * there's no harm.
397 */
398 if (po->delta) {
399 git__free(data);
7e3ed419
RB
400 po->delta_data = NULL;
401 }
0a32dca5 402
0a32dca5 403 pb->nr_written++;
0a32dca5 404
0ade2f7a
ET
405done:
406 git__free(zbuf);
0a32dca5 407 git_odb_object_free(obj);
0ade2f7a 408 return error;
0a32dca5
MS
409}
410
411enum write_one_status {
412 WRITE_ONE_SKIP = -1, /* already written */
413 WRITE_ONE_BREAK = 0, /* writing this will bust the limit; not written */
414 WRITE_ONE_WRITTEN = 1, /* normal */
415 WRITE_ONE_RECURSIVE = 2 /* already scheduled to be written */
416};
417
0ade2f7a
ET
418static int write_one(
419 enum write_one_status *status,
420 git_packbuilder *pb,
421 git_pobject *po,
422 int (*write_cb)(void *buf, size_t size, void *cb_data),
423 void *cb_data)
0a32dca5 424{
0ade2f7a
ET
425 int error;
426
0a32dca5
MS
427 if (po->recursing) {
428 *status = WRITE_ONE_RECURSIVE;
429 return 0;
430 } else if (po->written) {
431 *status = WRITE_ONE_SKIP;
432 return 0;
433 }
434
435 if (po->delta) {
436 po->recursing = 1;
0ade2f7a
ET
437
438 if ((error = write_one(status, pb, po->delta, write_cb, cb_data)) < 0)
439 return error;
440
441 /* we cannot depend on this one */
442 if (*status == WRITE_ONE_RECURSIVE)
0a32dca5 443 po->delta = NULL;
0a32dca5
MS
444 }
445
38d338b2 446 *status = WRITE_ONE_WRITTEN;
0a32dca5
MS
447 po->written = 1;
448 po->recursing = 0;
0ade2f7a
ET
449
450 return write_object(pb, po, write_cb, cb_data);
0a32dca5
MS
451}
452
60e15ecd
ET
453GIT_INLINE(void) add_to_write_order(git_pobject **wo, size_t *endp,
454 git_pobject *po)
0a32dca5
MS
455{
456 if (po->filled)
457 return;
458 wo[(*endp)++] = po;
459 po->filled = 1;
460}
461
60e15ecd
ET
462static void add_descendants_to_write_order(git_pobject **wo, size_t *endp,
463 git_pobject *po)
0a32dca5
MS
464{
465 int add_to_order = 1;
466 while (po) {
467 if (add_to_order) {
468 git_pobject *s;
469 /* add this node... */
470 add_to_write_order(wo, endp, po);
471 /* all its siblings... */
472 for (s = po->delta_sibling; s; s = s->delta_sibling) {
473 add_to_write_order(wo, endp, s);
474 }
475 }
476 /* drop down a level to add left subtree nodes if possible */
477 if (po->delta_child) {
478 add_to_order = 1;
479 po = po->delta_child;
480 } else {
481 add_to_order = 0;
482 /* our sibling might have some children, it is next */
483 if (po->delta_sibling) {
484 po = po->delta_sibling;
485 continue;
486 }
487 /* go back to our parent node */
488 po = po->delta;
489 while (po && !po->delta_sibling) {
490 /* we're on the right side of a subtree, keep
491 * going up until we can go right again */
492 po = po->delta;
493 }
494 if (!po) {
495 /* done- we hit our original root node */
496 return;
497 }
498 /* pass it off to sibling at this level */
499 po = po->delta_sibling;
500 }
501 };
502}
503
60e15ecd
ET
504static void add_family_to_write_order(git_pobject **wo, size_t *endp,
505 git_pobject *po)
0a32dca5
MS
506{
507 git_pobject *root;
508
509 for (root = po; root->delta; root = root->delta)
510 ; /* nothing */
511 add_descendants_to_write_order(wo, endp, root);
512}
513
514static int cb_tag_foreach(const char *name, git_oid *oid, void *data)
515{
516 git_packbuilder *pb = data;
0cf49e10 517 git_pobject *po;
0a32dca5
MS
518
519 GIT_UNUSED(name);
520
0c9c969a 521 if ((po = git_oidmap_get(pb->object_ix, oid)) == NULL)
0cf49e10
MS
522 return 0;
523
0cf49e10
MS
524 po->tagged = 1;
525
0a32dca5 526 /* TODO: peel objects */
0cf49e10 527
0a32dca5
MS
528 return 0;
529}
530
531static git_pobject **compute_write_order(git_packbuilder *pb)
532{
60e15ecd 533 size_t i, wo_end, last_untagged;
392702ee 534 git_pobject **wo;
0a32dca5 535
2884cc42 536 if ((wo = git__mallocarray(pb->nr_objects, sizeof(*wo))) == NULL)
392702ee 537 return NULL;
0a32dca5
MS
538
539 for (i = 0; i < pb->nr_objects; i++) {
540 git_pobject *po = pb->object_list + i;
541 po->tagged = 0;
542 po->filled = 0;
543 po->delta_child = NULL;
544 po->delta_sibling = NULL;
545 }
546
547 /*
548 * Fully connect delta_child/delta_sibling network.
549 * Make sure delta_sibling is sorted in the original
550 * recency order.
551 */
552 for (i = pb->nr_objects; i > 0;) {
553 git_pobject *po = &pb->object_list[--i];
554 if (!po->delta)
555 continue;
556 /* Mark me as the first child */
557 po->delta_sibling = po->delta->delta_child;
558 po->delta->delta_child = po;
559 }
560
561 /*
562 * Mark objects that are at the tip of tags.
563 */
050af8bb
RD
564 if (git_tag_foreach(pb->repo, &cb_tag_foreach, pb) < 0) {
565 git__free(wo);
0a32dca5 566 return NULL;
050af8bb 567 }
0a32dca5
MS
568
569 /*
570 * Give the objects in the original recency order until
571 * we see a tagged tip.
572 */
573 for (i = wo_end = 0; i < pb->nr_objects; i++) {
574 git_pobject *po = pb->object_list + i;
575 if (po->tagged)
576 break;
577 add_to_write_order(wo, &wo_end, po);
578 }
579 last_untagged = i;
580
581 /*
582 * Then fill all the tagged tips.
583 */
584 for (; i < pb->nr_objects; i++) {
585 git_pobject *po = pb->object_list + i;
586 if (po->tagged)
587 add_to_write_order(wo, &wo_end, po);
588 }
589
590 /*
591 * And then all remaining commits and tags.
592 */
593 for (i = last_untagged; i < pb->nr_objects; i++) {
594 git_pobject *po = pb->object_list + i;
ac3d33df
JK
595 if (po->type != GIT_OBJECT_COMMIT &&
596 po->type != GIT_OBJECT_TAG)
0a32dca5
MS
597 continue;
598 add_to_write_order(wo, &wo_end, po);
599 }
600
601 /*
602 * And then all the trees.
603 */
604 for (i = last_untagged; i < pb->nr_objects; i++) {
605 git_pobject *po = pb->object_list + i;
ac3d33df 606 if (po->type != GIT_OBJECT_TREE)
0a32dca5
MS
607 continue;
608 add_to_write_order(wo, &wo_end, po);
609 }
610
611 /*
612 * Finally all the rest in really tight order
613 */
614 for (i = last_untagged; i < pb->nr_objects; i++) {
615 git_pobject *po = pb->object_list + i;
616 if (!po->filled)
617 add_family_to_write_order(wo, &wo_end, po);
618 }
619
620 if (wo_end != pb->nr_objects) {
0b2437bb 621 git__free(wo);
ac3d33df 622 git_error_set(GIT_ERROR_INVALID, "invalid write order");
0a32dca5
MS
623 return NULL;
624 }
625
626 return wo;
627}
628
629static int write_pack(git_packbuilder *pb,
0ade2f7a
ET
630 int (*write_cb)(void *buf, size_t size, void *cb_data),
631 void *cb_data)
0a32dca5
MS
632{
633 git_pobject **write_order;
634 git_pobject *po;
0a32dca5
MS
635 enum write_one_status status;
636 struct git_pack_header ph;
cc2447da 637 git_oid entry_oid;
60e15ecd 638 size_t i = 0;
5b188225 639 int error = 0;
0a32dca5
MS
640
641 write_order = compute_write_order(pb);
bac52ab0
PS
642 if (write_order == NULL)
643 return -1;
0a32dca5 644
60e15ecd 645 if (!git__is_uint32(pb->nr_objects)) {
ac3d33df 646 git_error_set(GIT_ERROR_INVALID, "too many objects");
60e15ecd
ET
647 return -1;
648 }
649
0a32dca5
MS
650 /* Write pack header */
651 ph.hdr_signature = htonl(PACK_SIGNATURE);
652 ph.hdr_version = htonl(PACK_VERSION);
653 ph.hdr_entries = htonl(pb->nr_objects);
654
0ade2f7a
ET
655 if ((error = write_cb(&ph, sizeof(ph), cb_data)) < 0 ||
656 (error = git_hash_update(&pb->ctx, &ph, sizeof(ph))) < 0)
5b188225 657 goto done;
0a32dca5
MS
658
659 pb->nr_remaining = pb->nr_objects;
660 do {
661 pb->nr_written = 0;
662 for ( ; i < pb->nr_objects; ++i) {
663 po = write_order[i];
0ade2f7a
ET
664
665 if ((error = write_one(&status, pb, po, write_cb, cb_data)) < 0)
5b188225 666 goto done;
0a32dca5
MS
667 }
668
669 pb->nr_remaining -= pb->nr_written;
670 } while (pb->nr_remaining && i < pb->nr_objects);
671
cc2447da 672 if ((error = git_hash_final(&entry_oid, &pb->ctx)) < 0)
5b188225 673 goto done;
0a32dca5 674
0ade2f7a 675 error = write_cb(entry_oid.id, GIT_OID_RAWSZ, cb_data);
0a32dca5 676
5b188225 677done:
7e3ed419
RB
678 /* if callback cancelled writing, we must still free delta_data */
679 for ( ; i < pb->nr_objects; ++i) {
680 po = write_order[i];
681 if (po->delta_data) {
682 git__free(po->delta_data);
683 po->delta_data = NULL;
684 }
685 }
686
0a32dca5 687 git__free(write_order);
5b188225 688 return error;
0a32dca5
MS
689}
690
0a32dca5
MS
691static int write_pack_buf(void *buf, size_t size, void *data)
692{
693 git_buf *b = (git_buf *)data;
694 return git_buf_put(b, buf, size);
695}
696
0a32dca5
MS
697static int type_size_sort(const void *_a, const void *_b)
698{
699 const git_pobject *a = (git_pobject *)_a;
700 const git_pobject *b = (git_pobject *)_b;
701
702 if (a->type > b->type)
703 return -1;
704 if (a->type < b->type)
705 return 1;
706 if (a->hash > b->hash)
707 return -1;
708 if (a->hash < b->hash)
709 return 1;
710 /*
711 * TODO
712 *
713 if (a->preferred_base > b->preferred_base)
714 return -1;
715 if (a->preferred_base < b->preferred_base)
716 return 1;
717 */
718 if (a->size > b->size)
719 return -1;
720 if (a->size < b->size)
721 return 1;
722 return a < b ? -1 : (a > b); /* newest first */
723}
724
60e15ecd
ET
725static int delta_cacheable(
726 git_packbuilder *pb,
727 size_t src_size,
728 size_t trg_size,
729 size_t delta_size)
0a32dca5 730{
60e15ecd
ET
731 size_t new_size;
732
733 if (git__add_sizet_overflow(&new_size, pb->delta_cache_size, delta_size))
734 return 0;
735
736 if (pb->max_delta_cache_size && new_size > pb->max_delta_cache_size)
0a32dca5
MS
737 return 0;
738
739 if (delta_size < pb->cache_max_small_delta_size)
740 return 1;
741
742 /* cache delta, if objects are large enough compared to delta size */
743 if ((src_size >> 20) + (trg_size >> 21) > (delta_size >> 10))
744 return 1;
745
746 return 0;
747}
748
0a32dca5 749static int try_delta(git_packbuilder *pb, struct unpacked *trg,
60e15ecd
ET
750 struct unpacked *src, size_t max_depth,
751 size_t *mem_usage, int *ret)
0a32dca5
MS
752{
753 git_pobject *trg_object = trg->object;
754 git_pobject *src_object = src->object;
755 git_odb_object *obj;
60e15ecd
ET
756 size_t trg_size, src_size, delta_size, sizediff, max_size, sz;
757 size_t ref_depth;
0a32dca5
MS
758 void *delta_buf;
759
760 /* Don't bother doing diffs between different types */
761 if (trg_object->type != src_object->type) {
762 *ret = -1;
763 return 0;
764 }
765
766 *ret = 0;
767
768 /* TODO: support reuse-delta */
769
770 /* Let's not bust the allowed depth. */
771 if (src->depth >= max_depth)
772 return 0;
773
774 /* Now some size filtering heuristics. */
60e15ecd 775 trg_size = trg_object->size;
0a32dca5
MS
776 if (!trg_object->delta) {
777 max_size = trg_size/2 - 20;
778 ref_depth = 1;
779 } else {
780 max_size = trg_object->delta_size;
781 ref_depth = trg->depth;
782 }
783
784 max_size = (uint64_t)max_size * (max_depth - src->depth) /
785 (max_depth - ref_depth + 1);
786 if (max_size == 0)
787 return 0;
788
60e15ecd 789 src_size = src_object->size;
0a32dca5
MS
790 sizediff = src_size < trg_size ? trg_size - src_size : 0;
791 if (sizediff >= max_size)
792 return 0;
793 if (trg_size < src_size / 32)
794 return 0;
795
796 /* Load data if not already done */
797 if (!trg->data) {
798 if (git_odb_read(&obj, pb->odb, &trg_object->id) < 0)
799 return -1;
800
60e15ecd 801 sz = git_odb_object_size(obj);
0a32dca5 802 trg->data = git__malloc(sz);
ac3d33df 803 GIT_ERROR_CHECK_ALLOC(trg->data);
0a32dca5
MS
804 memcpy(trg->data, git_odb_object_data(obj), sz);
805
806 git_odb_object_free(obj);
807
808 if (sz != trg_size) {
ac3d33df 809 git_error_set(GIT_ERROR_INVALID,
909d5494 810 "inconsistent target object length");
0a32dca5
MS
811 return -1;
812 }
813
814 *mem_usage += sz;
815 }
816 if (!src->data) {
392702ee
ET
817 size_t obj_sz;
818
819 if (git_odb_read(&obj, pb->odb, &src_object->id) < 0 ||
820 !git__is_ulong(obj_sz = git_odb_object_size(obj)))
0a32dca5
MS
821 return -1;
822
60e15ecd 823 sz = obj_sz;
0a32dca5 824 src->data = git__malloc(sz);
ac3d33df 825 GIT_ERROR_CHECK_ALLOC(src->data);
0a32dca5
MS
826 memcpy(src->data, git_odb_object_data(obj), sz);
827
828 git_odb_object_free(obj);
829
830 if (sz != src_size) {
ac3d33df 831 git_error_set(GIT_ERROR_INVALID,
909d5494 832 "inconsistent source object length");
0a32dca5
MS
833 return -1;
834 }
835
836 *mem_usage += sz;
837 }
838 if (!src->index) {
1cd65991 839 if (git_delta_index_init(&src->index, src->data, src_size) < 0)
0a32dca5
MS
840 return 0; /* suboptimal pack - out of memory */
841
1cd65991 842 *mem_usage += git_delta_index_size(src->index);
0a32dca5
MS
843 }
844
1cd65991
ET
845 if (git_delta_create_from_index(&delta_buf, &delta_size, src->index, trg->data, trg_size,
846 max_size) < 0)
0a32dca5
MS
847 return 0;
848
849 if (trg_object->delta) {
850 /* Prefer only shallower same-sized deltas. */
851 if (delta_size == trg_object->delta_size &&
852 src->depth + 1 >= trg->depth) {
853 git__free(delta_buf);
854 return 0;
855 }
856 }
857
b4491b99 858 git_packbuilder__cache_lock(pb);
0a32dca5
MS
859 if (trg_object->delta_data) {
860 git__free(trg_object->delta_data);
60e15ecd 861 assert(pb->delta_cache_size >= trg_object->delta_size);
0a32dca5
MS
862 pb->delta_cache_size -= trg_object->delta_size;
863 trg_object->delta_data = NULL;
864 }
865 if (delta_cacheable(pb, src_size, trg_size, delta_size)) {
60e15ecd 866 bool overflow = git__add_sizet_overflow(
0f07d54b 867 &pb->delta_cache_size, pb->delta_cache_size, delta_size);
392702ee 868
b4491b99 869 git_packbuilder__cache_unlock(pb);
0a32dca5 870
3fe5768b
PS
871 if (overflow) {
872 git__free(delta_buf);
0f07d54b 873 return -1;
3fe5768b 874 }
d1c9a48d
PS
875
876 trg_object->delta_data = git__realloc(delta_buf, delta_size);
ac3d33df 877 GIT_ERROR_CHECK_ALLOC(trg_object->delta_data);
0a32dca5
MS
878 } else {
879 /* create delta when writing the pack */
b4491b99 880 git_packbuilder__cache_unlock(pb);
0a32dca5
MS
881 git__free(delta_buf);
882 }
883
884 trg_object->delta = src_object;
885 trg_object->delta_size = delta_size;
886 trg->depth = src->depth + 1;
887
888 *ret = 1;
889 return 0;
890}
891
60e15ecd 892static size_t check_delta_limit(git_pobject *me, size_t n)
0a32dca5
MS
893{
894 git_pobject *child = me->delta_child;
60e15ecd 895 size_t m = n;
0a32dca5
MS
896
897 while (child) {
60e15ecd 898 size_t c = check_delta_limit(child, n + 1);
0a32dca5
MS
899 if (m < c)
900 m = c;
901 child = child->delta_sibling;
902 }
903 return m;
904}
905
60e15ecd 906static size_t free_unpacked(struct unpacked *n)
0a32dca5 907{
60e15ecd 908 size_t freed_mem = 0;
a03952f0 909
1cd65991
ET
910 if (n->index) {
911 freed_mem += git_delta_index_size(n->index);
912 git_delta_index_free(n->index);
913 }
0a32dca5 914 n->index = NULL;
1cd65991 915
0a32dca5 916 if (n->data) {
60e15ecd 917 freed_mem += n->object->size;
0a32dca5
MS
918 git__free(n->data);
919 n->data = NULL;
920 }
921 n->object = NULL;
922 n->depth = 0;
923 return freed_mem;
924}
925
60e15ecd
ET
926static int report_delta_progress(
927 git_packbuilder *pb, uint32_t count, bool force)
3c337a5d
CMN
928{
929 int ret;
930
931 if (pb->progress_cb) {
932 double current_time = git__timer();
933 double elapsed = current_time - pb->last_progress_report_time;
934
935 if (force || elapsed >= MIN_PROGRESS_UPDATE_INTERVAL) {
936 pb->last_progress_report_time = current_time;
937
938 ret = pb->progress_cb(
939 GIT_PACKBUILDER_DELTAFICATION,
940 count, pb->nr_objects, pb->progress_cb_payload);
941
942 if (ret)
ac3d33df 943 return git_error_set_after_callback(ret);
3c337a5d
CMN
944 }
945 }
946
947 return 0;
948}
949
0a32dca5 950static int find_deltas(git_packbuilder *pb, git_pobject **list,
60e15ecd 951 size_t *list_size, size_t window, size_t depth)
0a32dca5
MS
952{
953 git_pobject *po;
954 git_buf zbuf = GIT_BUF_INIT;
955 struct unpacked *array;
60e15ecd
ET
956 size_t idx = 0, count = 0;
957 size_t mem_usage = 0;
958 size_t i;
0a32dca5
MS
959 int error = -1;
960
961 array = git__calloc(window, sizeof(struct unpacked));
ac3d33df 962 GIT_ERROR_CHECK_ALLOC(array);
0a32dca5
MS
963
964 for (;;) {
965 struct unpacked *n = array + idx;
60e15ecd 966 size_t max_depth, j, best_base = SIZE_MAX;
0a32dca5 967
b4491b99 968 git_packbuilder__progress_lock(pb);
0a32dca5 969 if (!*list_size) {
b4491b99 970 git_packbuilder__progress_unlock(pb);
0a32dca5
MS
971 break;
972 }
973
3c337a5d
CMN
974 pb->nr_deltified += 1;
975 report_delta_progress(pb, pb->nr_deltified, false);
976
0a32dca5
MS
977 po = *list++;
978 (*list_size)--;
b4491b99 979 git_packbuilder__progress_unlock(pb);
0a32dca5
MS
980
981 mem_usage -= free_unpacked(n);
982 n->object = po;
983
984 while (pb->window_memory_limit &&
985 mem_usage > pb->window_memory_limit &&
986 count > 1) {
60e15ecd 987 size_t tail = (idx + window - count) % window;
0a32dca5
MS
988 mem_usage -= free_unpacked(array + tail);
989 count--;
990 }
991
992 /*
993 * If the current object is at pack edge, take the depth the
994 * objects that depend on the current object into account
995 * otherwise they would become too deep.
996 */
997 max_depth = depth;
998 if (po->delta_child) {
60e15ecd
ET
999 size_t delta_limit = check_delta_limit(po, 0);
1000
1001 if (delta_limit > max_depth)
0a32dca5 1002 goto next;
60e15ecd
ET
1003
1004 max_depth -= delta_limit;
0a32dca5
MS
1005 }
1006
1007 j = window;
1008 while (--j > 0) {
1009 int ret;
60e15ecd 1010 size_t other_idx = idx + j;
0a32dca5
MS
1011 struct unpacked *m;
1012
1013 if (other_idx >= window)
1014 other_idx -= window;
1015
1016 m = array + other_idx;
1017 if (!m->object)
1018 break;
1019
1020 if (try_delta(pb, n, m, max_depth, &mem_usage, &ret) < 0)
1021 goto on_error;
1022 if (ret < 0)
1023 break;
1024 else if (ret > 0)
1025 best_base = other_idx;
1026 }
1027
1028 /*
1029 * If we decided to cache the delta data, then it is best
1030 * to compress it right away. First because we have to do
1031 * it anyway, and doing it here while we're threaded will
1032 * save a lot of time in the non threaded write phase,
1033 * as well as allow for caching more deltas within
1034 * the same cache size limit.
1035 * ...
1036 * But only if not writing to stdout, since in that case
1037 * the network is most likely throttling writes anyway,
1038 * and therefore it is best to go to the write phase ASAP
1039 * instead, as we can afford spending more time compressing
1040 * between writes at that moment.
1041 */
1042 if (po->delta_data) {
c6f26b48 1043 if (git_zstream_deflatebuf(&zbuf, po->delta_data, po->delta_size) < 0)
0a32dca5
MS
1044 goto on_error;
1045
1046 git__free(po->delta_data);
1047 po->delta_data = git__malloc(zbuf.size);
ac3d33df 1048 GIT_ERROR_CHECK_ALLOC(po->delta_data);
0a32dca5
MS
1049
1050 memcpy(po->delta_data, zbuf.ptr, zbuf.size);
60e15ecd 1051 po->z_delta_size = zbuf.size;
0a32dca5
MS
1052 git_buf_clear(&zbuf);
1053
b4491b99 1054 git_packbuilder__cache_lock(pb);
0a32dca5
MS
1055 pb->delta_cache_size -= po->delta_size;
1056 pb->delta_cache_size += po->z_delta_size;
b4491b99 1057 git_packbuilder__cache_unlock(pb);
0a32dca5
MS
1058 }
1059
1060 /*
1061 * If we made n a delta, and if n is already at max
1062 * depth, leaving it in the window is pointless. we
1063 * should evict it first.
1064 */
1065 if (po->delta && max_depth <= n->depth)
1066 continue;
1067
1068 /*
1069 * Move the best delta base up in the window, after the
1070 * currently deltified object, to keep it longer. It will
1071 * be the first base object to be attempted next.
1072 */
1073 if (po->delta) {
1074 struct unpacked swap = array[best_base];
60e15ecd
ET
1075 size_t dist = (window + idx - best_base) % window;
1076 size_t dst = best_base;
0a32dca5 1077 while (dist--) {
60e15ecd 1078 size_t src = (dst + 1) % window;
0a32dca5
MS
1079 array[dst] = array[src];
1080 dst = src;
1081 }
1082 array[dst] = swap;
1083 }
1084
1085 next:
1086 idx++;
1087 if (count + 1 < window)
1088 count++;
1089 if (idx >= window)
1090 idx = 0;
1091 }
1092 error = 0;
1093
1094on_error:
1095 for (i = 0; i < window; ++i) {
1096 git__free(array[i].index);
1097 git__free(array[i].data);
1098 }
1099 git__free(array);
ac3d33df 1100 git_buf_dispose(&zbuf);
0a32dca5
MS
1101
1102 return error;
1103}
1104
1105#ifdef GIT_THREADS
1106
1107struct thread_params {
1108 git_thread thread;
1109 git_packbuilder *pb;
1110
1111 git_pobject **list;
1112
1113 git_cond cond;
1114 git_mutex mutex;
1115
60e15ecd
ET
1116 size_t list_size;
1117 size_t remaining;
0a32dca5 1118
60e15ecd
ET
1119 size_t window;
1120 size_t depth;
1121 size_t working;
1122 size_t data_ready;
0a32dca5
MS
1123};
1124
0a32dca5
MS
1125static void *threaded_find_deltas(void *arg)
1126{
1127 struct thread_params *me = arg;
1128
1129 while (me->remaining) {
1130 if (find_deltas(me->pb, me->list, &me->remaining,
1131 me->window, me->depth) < 0) {
1132 ; /* TODO */
1133 }
1134
b4491b99 1135 git_packbuilder__progress_lock(me->pb);
0a32dca5 1136 me->working = 0;
b4491b99
PK
1137 git_cond_signal(&me->pb->progress_cond);
1138 git_packbuilder__progress_unlock(me->pb);
0a32dca5 1139
a35b3864 1140 if (git_mutex_lock(&me->mutex)) {
ac3d33df 1141 git_error_set(GIT_ERROR_THREAD, "unable to lock packfile condition mutex");
a35b3864
JSS
1142 return NULL;
1143 }
1144
1145 while (!me->data_ready)
1146 git_cond_wait(&me->cond, &me->mutex);
1147
0a32dca5
MS
1148 /*
1149 * We must not set ->data_ready before we wait on the
1150 * condition because the main thread may have set it to 1
1151 * before we get here. In order to be sure that new
1152 * work is available if we see 1 in ->data_ready, it
1153 * was initialized to 0 before this thread was spawned
1154 * and we reset it to 0 right away.
1155 */
0a32dca5
MS
1156 me->data_ready = 0;
1157 git_mutex_unlock(&me->mutex);
1158 }
1159 /* leave ->working 1 so that this doesn't get more work assigned */
1160 return NULL;
1161}
1162
1163static int ll_find_deltas(git_packbuilder *pb, git_pobject **list,
60e15ecd 1164 size_t list_size, size_t window, size_t depth)
0a32dca5
MS
1165{
1166 struct thread_params *p;
60e15ecd
ET
1167 size_t i;
1168 int ret, active_threads = 0;
0a32dca5 1169
0a32dca5
MS
1170 if (!pb->nr_threads)
1171 pb->nr_threads = git_online_cpus();
b4491b99 1172
0a32dca5
MS
1173 if (pb->nr_threads <= 1) {
1174 find_deltas(pb, list, &list_size, window, depth);
0a32dca5
MS
1175 return 0;
1176 }
1177
3603cb09 1178 p = git__mallocarray(pb->nr_threads, sizeof(*p));
ac3d33df 1179 GIT_ERROR_CHECK_ALLOC(p);
0a32dca5
MS
1180
1181 /* Partition the work among the threads */
1182 for (i = 0; i < pb->nr_threads; ++i) {
60e15ecd 1183 size_t sub_size = list_size / (pb->nr_threads - i);
0a32dca5
MS
1184
1185 /* don't use too small segments or no deltas will be found */
1186 if (sub_size < 2*window && i+1 < pb->nr_threads)
1187 sub_size = 0;
1188
1189 p[i].pb = pb;
1190 p[i].window = window;
1191 p[i].depth = depth;
1192 p[i].working = 1;
1193 p[i].data_ready = 0;
1194
1195 /* try to split chunks on "path" boundaries */
1196 while (sub_size && sub_size < list_size &&
1197 list[sub_size]->hash &&
1198 list[sub_size]->hash == list[sub_size-1]->hash)
1199 sub_size++;
1200
1201 p[i].list = list;
1202 p[i].list_size = sub_size;
1203 p[i].remaining = sub_size;
1204
1205 list += sub_size;
1206 list_size -= sub_size;
1207 }
1208
1209 /* Start work threads */
1210 for (i = 0; i < pb->nr_threads; ++i) {
1211 if (!p[i].list_size)
1212 continue;
1213
1214 git_mutex_init(&p[i].mutex);
1215 git_cond_init(&p[i].cond);
1216
faebc1c6 1217 ret = git_thread_create(&p[i].thread,
0a32dca5
MS
1218 threaded_find_deltas, &p[i]);
1219 if (ret) {
ac3d33df 1220 git_error_set(GIT_ERROR_THREAD, "unable to create thread");
0a32dca5
MS
1221 return -1;
1222 }
1223 active_threads++;
1224 }
1225
1226 /*
1227 * Now let's wait for work completion. Each time a thread is done
1228 * with its work, we steal half of the remaining work from the
1229 * thread with the largest number of unprocessed objects and give
1230 * it to that newly idle thread. This ensure good load balancing
1231 * until the remaining object list segments are simply too short
1232 * to be worth splitting anymore.
1233 */
1234 while (active_threads) {
1235 struct thread_params *target = NULL;
1236 struct thread_params *victim = NULL;
60e15ecd 1237 size_t sub_size = 0;
0a32dca5 1238
b4491b99
PK
1239 /* Start by locating a thread that has transitioned its
1240 * 'working' flag from 1 -> 0. This indicates that it is
1241 * ready to receive more work using our work-stealing
1242 * algorithm. */
1243 git_packbuilder__progress_lock(pb);
0a32dca5
MS
1244 for (;;) {
1245 for (i = 0; !target && i < pb->nr_threads; i++)
1246 if (!p[i].working)
1247 target = &p[i];
1248 if (target)
1249 break;
b4491b99 1250 git_cond_wait(&pb->progress_cond, &pb->progress_mutex);
0a32dca5
MS
1251 }
1252
b4491b99
PK
1253 /* At this point we hold the progress lock and have located
1254 * a thread to receive more work. We still need to locate a
1255 * thread from which to steal work (the victim). */
0a32dca5
MS
1256 for (i = 0; i < pb->nr_threads; i++)
1257 if (p[i].remaining > 2*window &&
1258 (!victim || victim->remaining < p[i].remaining))
1259 victim = &p[i];
b4491b99 1260
0a32dca5
MS
1261 if (victim) {
1262 sub_size = victim->remaining / 2;
1263 list = victim->list + victim->list_size - sub_size;
1264 while (sub_size && list[0]->hash &&
1265 list[0]->hash == list[-1]->hash) {
1266 list++;
1267 sub_size--;
1268 }
1269 if (!sub_size) {
1270 /*
1271 * It is possible for some "paths" to have
1272 * so many objects that no hash boundary
1273 * might be found. Let's just steal the
1274 * exact half in that case.
1275 */
1276 sub_size = victim->remaining / 2;
1277 list -= sub_size;
1278 }
1279 target->list = list;
1280 victim->list_size -= sub_size;
1281 victim->remaining -= sub_size;
1282 }
1283 target->list_size = sub_size;
1284 target->remaining = sub_size;
1285 target->working = 1;
b4491b99 1286 git_packbuilder__progress_unlock(pb);
0a32dca5 1287
a35b3864 1288 if (git_mutex_lock(&target->mutex)) {
ac3d33df 1289 git_error_set(GIT_ERROR_THREAD, "unable to lock packfile condition mutex");
a35b3864
JSS
1290 git__free(p);
1291 return -1;
1292 }
1293
0a32dca5
MS
1294 target->data_ready = 1;
1295 git_cond_signal(&target->cond);
1296 git_mutex_unlock(&target->mutex);
1297
1298 if (!sub_size) {
fb591767 1299 git_thread_join(&target->thread, NULL);
0a32dca5
MS
1300 git_cond_free(&target->cond);
1301 git_mutex_free(&target->mutex);
1302 active_threads--;
1303 }
1304 }
1305
0a32dca5
MS
1306 git__free(p);
1307 return 0;
1308}
1309
1310#else
1311#define ll_find_deltas(pb, l, ls, w, d) find_deltas(pb, l, &ls, w, d)
1312#endif
1313
0a32dca5
MS
1314static int prepare_pack(git_packbuilder *pb)
1315{
1316 git_pobject **delta_list;
60e15ecd 1317 size_t i, n = 0;
0a32dca5
MS
1318
1319 if (pb->nr_objects == 0 || pb->done)
1320 return 0; /* nothing to do */
1321
b176eded
JM
1322 /*
1323 * Although we do not report progress during deltafication, we
1324 * at least report that we are in the deltafication stage
1325 */
1326 if (pb->progress_cb)
1327 pb->progress_cb(GIT_PACKBUILDER_DELTAFICATION, 0, pb->nr_objects, pb->progress_cb_payload);
1328
3603cb09 1329 delta_list = git__mallocarray(pb->nr_objects, sizeof(*delta_list));
ac3d33df 1330 GIT_ERROR_CHECK_ALLOC(delta_list);
0a32dca5
MS
1331
1332 for (i = 0; i < pb->nr_objects; ++i) {
1333 git_pobject *po = pb->object_list + i;
1334
b4491b99
PK
1335 /* Make sure the item is within our size limits */
1336 if (po->size < 50 || po->size > pb->big_file_threshold)
0a32dca5
MS
1337 continue;
1338
1339 delta_list[n++] = po;
1340 }
1341
1342 if (n > 1) {
1343 git__tsort((void **)delta_list, n, type_size_sort);
1344 if (ll_find_deltas(pb, delta_list, n,
1345 GIT_PACK_WINDOW + 1,
1346 GIT_PACK_DEPTH) < 0) {
1347 git__free(delta_list);
1348 return -1;
1349 }
1350 }
1351
3c337a5d
CMN
1352 report_delta_progress(pb, pb->nr_objects, true);
1353
0a32dca5
MS
1354 pb->done = true;
1355 git__free(delta_list);
1356 return 0;
1357}
1358
1359#define PREPARE_PACK if (prepare_pack(pb) < 0) { return -1; }
1360
3dfed9cb
CMN
1361int git_packbuilder_foreach(git_packbuilder *pb, int (*cb)(void *buf, size_t size, void *payload), void *payload)
1362{
1363 PREPARE_PACK;
1364 return write_pack(pb, cb, payload);
1365}
1366
0a32dca5
MS
1367int git_packbuilder_write_buf(git_buf *buf, git_packbuilder *pb)
1368{
1369 PREPARE_PACK;
1e4976cb 1370 git_buf_sanitize(buf);
0a32dca5
MS
1371 return write_pack(pb, &write_pack_buf, buf);
1372}
1373
563c19a9 1374static int write_cb(void *buf, size_t len, void *payload)
0a32dca5 1375{
563c19a9 1376 struct pack_write_context *ctx = payload;
a6154f21 1377 return git_indexer_append(ctx->indexer, buf, len, ctx->stats);
563c19a9
MS
1378}
1379
1380int git_packbuilder_write(
1381 git_packbuilder *pb,
1382 const char *path,
1e60e5f4 1383 unsigned int mode,
0c9c969a 1384 git_indexer_progress_cb progress_cb,
563c19a9
MS
1385 void *progress_cb_payload)
1386{
ac3d33df 1387 git_indexer_options opts = GIT_INDEXER_OPTIONS_INIT;
a6154f21 1388 git_indexer *indexer;
0c9c969a 1389 git_indexer_progress stats;
563c19a9 1390 struct pack_write_context ctx;
1c04a96b 1391 int t;
563c19a9 1392
0a32dca5 1393 PREPARE_PACK;
563c19a9 1394
ac3d33df
JK
1395 opts.progress_cb = progress_cb;
1396 opts.progress_cb_payload = progress_cb_payload;
1397
a6154f21 1398 if (git_indexer_new(
ac3d33df 1399 &indexer, path, mode, pb->odb, &opts) < 0)
563c19a9
MS
1400 return -1;
1401
0c9c969a 1402 if (!git_repository__configmap_lookup(&t, pb->repo, GIT_CONFIGMAP_FSYNCOBJECTFILES) && t)
1c04a96b
ET
1403 git_indexer__set_fsync(indexer, 1);
1404
563c19a9
MS
1405 ctx.indexer = indexer;
1406 ctx.stats = &stats;
1407
1408 if (git_packbuilder_foreach(pb, write_cb, &ctx) < 0 ||
a6154f21
CMN
1409 git_indexer_commit(indexer, &stats) < 0) {
1410 git_indexer_free(indexer);
563c19a9
MS
1411 return -1;
1412 }
1413
cc2447da
ET
1414 git_oid_cpy(&pb->pack_oid, git_indexer_hash(indexer));
1415
a6154f21 1416 git_indexer_free(indexer);
563c19a9 1417 return 0;
0a32dca5
MS
1418}
1419
1420#undef PREPARE_PACK
1421
cc2447da
ET
1422const git_oid *git_packbuilder_hash(git_packbuilder *pb)
1423{
1424 return &pb->pack_oid;
1425}
1426
04a36fef 1427
dab89f9b
RB
1428static int cb_tree_walk(
1429 const char *root, const git_tree_entry *entry, void *payload)
0a32dca5 1430{
dab89f9b 1431 int error;
f4b86126 1432 struct tree_walk_context *ctx = payload;
0a32dca5 1433
613d5eb9 1434 /* A commit inside a tree represents a submodule commit and should be skipped. */
ac3d33df 1435 if (git_tree_entry_type(entry) == GIT_OBJECT_COMMIT)
613d5eb9
PK
1436 return 0;
1437
dab89f9b
RB
1438 if (!(error = git_buf_sets(&ctx->buf, root)) &&
1439 !(error = git_buf_puts(&ctx->buf, git_tree_entry_name(entry))))
1440 error = git_packbuilder_insert(
1441 ctx->pb, git_tree_entry_id(entry), git_buf_cstr(&ctx->buf));
0a32dca5 1442
25e0b157 1443 return error;
0a32dca5
MS
1444}
1445
f0e37a8b
X
1446int git_packbuilder_insert_commit(git_packbuilder *pb, const git_oid *oid)
1447{
0b90366c 1448 git_commit *commit;
f0e37a8b 1449
0b90366c
X
1450 if (git_commit_lookup(&commit, pb->repo, oid) < 0 ||
1451 git_packbuilder_insert(pb, oid, NULL) < 0)
1452 return -1;
f0e37a8b 1453
0b90366c
X
1454 if (git_packbuilder_insert_tree(pb, git_commit_tree_id(commit)) < 0)
1455 return -1;
f0e37a8b 1456
0b90366c
X
1457 git_commit_free(commit);
1458 return 0;
f0e37a8b
X
1459}
1460
0a32dca5
MS
1461int git_packbuilder_insert_tree(git_packbuilder *pb, const git_oid *oid)
1462{
dab89f9b
RB
1463 int error;
1464 git_tree *tree = NULL;
f4b86126 1465 struct tree_walk_context context = { pb, GIT_BUF_INIT };
0a32dca5 1466
dab89f9b
RB
1467 if (!(error = git_tree_lookup(&tree, pb->repo, oid)) &&
1468 !(error = git_packbuilder_insert(pb, oid, NULL)))
1469 error = git_tree_walk(tree, GIT_TREEWALK_PRE, cb_tree_walk, &context);
0a32dca5 1470
0a32dca5 1471 git_tree_free(tree);
ac3d33df 1472 git_buf_dispose(&context.buf);
dab89f9b 1473 return error;
0a32dca5
MS
1474}
1475
a61fa4c0
CMN
1476int git_packbuilder_insert_recur(git_packbuilder *pb, const git_oid *id, const char *name)
1477{
1478 git_object *obj;
1479 int error;
1480
1481 assert(pb && id);
1482
ac3d33df 1483 if ((error = git_object_lookup(&obj, pb->repo, id, GIT_OBJECT_ANY)) < 0)
a61fa4c0
CMN
1484 return error;
1485
1486 switch (git_object_type(obj)) {
ac3d33df 1487 case GIT_OBJECT_BLOB:
a61fa4c0
CMN
1488 error = git_packbuilder_insert(pb, id, name);
1489 break;
ac3d33df 1490 case GIT_OBJECT_TREE:
a61fa4c0
CMN
1491 error = git_packbuilder_insert_tree(pb, id);
1492 break;
ac3d33df 1493 case GIT_OBJECT_COMMIT:
a61fa4c0
CMN
1494 error = git_packbuilder_insert_commit(pb, id);
1495 break;
ac3d33df 1496 case GIT_OBJECT_TAG:
a61fa4c0
CMN
1497 if ((error = git_packbuilder_insert(pb, id, name)) < 0)
1498 goto cleanup;
1499 error = git_packbuilder_insert_recur(pb, git_tag_target_id((git_tag *) obj), NULL);
1500 break;
1501
1502 default:
ac3d33df 1503 git_error_set(GIT_ERROR_INVALID, "unknown object type");
a61fa4c0
CMN
1504 error = -1;
1505 }
1506
1507cleanup:
1508 git_object_free(obj);
1509 return error;
1510}
1511
60e15ecd 1512size_t git_packbuilder_object_count(git_packbuilder *pb)
b4b935d8
CMN
1513{
1514 return pb->nr_objects;
1515}
1516
60e15ecd 1517size_t git_packbuilder_written(git_packbuilder *pb)
b4b935d8
CMN
1518{
1519 return pb->nr_written;
1520}
1521
ac3d33df 1522static int lookup_walk_object(struct walk_object **out, git_packbuilder *pb, const git_oid *id)
04a36fef 1523{
ac3d33df 1524 struct walk_object *obj;
04a36fef
CMN
1525
1526 obj = git_pool_mallocz(&pb->object_pool, 1);
1527 if (!obj) {
ac3d33df 1528 git_error_set_oom();
04a36fef
CMN
1529 return -1;
1530 }
1531
1532 git_oid_cpy(&obj->id, id);
1533
1534 *out = obj;
1535 return 0;
1536}
1537
ac3d33df 1538static int retrieve_object(struct walk_object **out, git_packbuilder *pb, const git_oid *id)
04a36fef 1539{
ac3d33df 1540 struct walk_object *obj;
0c9c969a 1541 int error;
04a36fef 1542
0c9c969a 1543 if ((obj = git_oidmap_get(pb->walk_objects, id)) == NULL) {
04a36fef
CMN
1544 if ((error = lookup_walk_object(&obj, pb, id)) < 0)
1545 return error;
1546
0c9c969a
UG
1547 if ((error = git_oidmap_set(pb->walk_objects, &obj->id, obj)) < 0)
1548 return error;
04a36fef
CMN
1549 }
1550
1551 *out = obj;
1552 return 0;
1553}
1554
1555static int mark_blob_uninteresting(git_packbuilder *pb, const git_oid *id)
1556{
1557 int error;
ac3d33df 1558 struct walk_object *obj;
04a36fef
CMN
1559
1560 if ((error = retrieve_object(&obj, pb, id)) < 0)
1561 return error;
1562
1563 obj->uninteresting = 1;
1564
1565 return 0;
1566}
1567
1568static int mark_tree_uninteresting(git_packbuilder *pb, const git_oid *id)
1569{
ac3d33df 1570 struct walk_object *obj;
04a36fef
CMN
1571 git_tree *tree;
1572 int error;
1573 size_t i;
1574
1575 if ((error = retrieve_object(&obj, pb, id)) < 0)
1576 return error;
1577
1578 if (obj->uninteresting)
1579 return 0;
1580
1581 obj->uninteresting = 1;
1582
1583 if ((error = git_tree_lookup(&tree, pb->repo, id)) < 0)
1584 return error;
1585
1586 for (i = 0; i < git_tree_entrycount(tree); i++) {
1587 const git_tree_entry *entry = git_tree_entry_byindex(tree, i);
1588 const git_oid *entry_id = git_tree_entry_id(entry);
1589 switch (git_tree_entry_type(entry)) {
ac3d33df 1590 case GIT_OBJECT_TREE:
04a36fef
CMN
1591 if ((error = mark_tree_uninteresting(pb, entry_id)) < 0)
1592 goto cleanup;
1593 break;
ac3d33df 1594 case GIT_OBJECT_BLOB:
04a36fef
CMN
1595 if ((error = mark_blob_uninteresting(pb, entry_id)) < 0)
1596 goto cleanup;
1597 break;
1598 default:
1599 /* it's a submodule or something unknown, we don't want it */
1600 ;
1601 }
1602 }
1603
1604cleanup:
1605 git_tree_free(tree);
1606 return error;
1607}
1608
1609/*
1610 * Mark the edges of the graph uninteresting. Since we start from a
1611 * git_revwalk, the commits are already uninteresting, but we need to
1612 * mark the trees and blobs.
1613 */
1614static int mark_edges_uninteresting(git_packbuilder *pb, git_commit_list *commits)
1615{
1616 int error;
1617 git_commit_list *list;
1618 git_commit *commit;
1619
1620 for (list = commits; list; list = list->next) {
1621 if (!list->item->uninteresting)
1622 continue;
1623
1624 if ((error = git_commit_lookup(&commit, pb->repo, &list->item->oid)) < 0)
1625 return error;
1626
1627 error = mark_tree_uninteresting(pb, git_commit_tree_id(commit));
1628 git_commit_free(commit);
1629
1630 if (error < 0)
1631 return error;
1632 }
1633
1634 return 0;
1635}
1636
1637int insert_tree(git_packbuilder *pb, git_tree *tree)
1638{
1639 size_t i;
1640 int error;
1641 git_tree *subtree;
ac3d33df 1642 struct walk_object *obj;
04a36fef
CMN
1643 const char *name;
1644
1645 if ((error = retrieve_object(&obj, pb, git_tree_id(tree))) < 0)
1646 return error;
1647
eae0bfdc 1648 if (obj->seen || obj->uninteresting)
04a36fef
CMN
1649 return 0;
1650
1651 obj->seen = 1;
1652
1653 if ((error = git_packbuilder_insert(pb, &obj->id, NULL)))
1654 return error;
1655
1656 for (i = 0; i < git_tree_entrycount(tree); i++) {
1657 const git_tree_entry *entry = git_tree_entry_byindex(tree, i);
1658 const git_oid *entry_id = git_tree_entry_id(entry);
1659 switch (git_tree_entry_type(entry)) {
ac3d33df 1660 case GIT_OBJECT_TREE:
04a36fef
CMN
1661 if ((error = git_tree_lookup(&subtree, pb->repo, entry_id)) < 0)
1662 return error;
1663
1664 error = insert_tree(pb, subtree);
1665 git_tree_free(subtree);
1666
1667 if (error < 0)
1668 return error;
1669
1670 break;
ac3d33df 1671 case GIT_OBJECT_BLOB:
6c7cee42 1672 if ((error = retrieve_object(&obj, pb, entry_id)) < 0)
eae0bfdc
PP
1673 return error;
1674 if (obj->uninteresting)
1675 continue;
04a36fef
CMN
1676 name = git_tree_entry_name(entry);
1677 if ((error = git_packbuilder_insert(pb, entry_id, name)) < 0)
1678 return error;
1679 break;
1680 default:
1681 /* it's a submodule or something unknown, we don't want it */
1682 ;
1683 }
1684 }
1685
1686
1687 return error;
1688}
1689
ac3d33df 1690int insert_commit(git_packbuilder *pb, struct walk_object *obj)
04a36fef
CMN
1691{
1692 int error;
1693 git_commit *commit = NULL;
1694 git_tree *tree = NULL;
1695
1696 obj->seen = 1;
1697
1698 if ((error = git_packbuilder_insert(pb, &obj->id, NULL)) < 0)
1699 return error;
1700
1701 if ((error = git_commit_lookup(&commit, pb->repo, &obj->id)) < 0)
1702 return error;
1703
1704 if ((error = git_tree_lookup(&tree, pb->repo, git_commit_tree_id(commit))) < 0)
1705 goto cleanup;
1706
1707 if ((error = insert_tree(pb, tree)) < 0)
1708 goto cleanup;
1709
1710cleanup:
1711 git_commit_free(commit);
1712 git_tree_free(tree);
1713 return error;
1714}
1715
1716int git_packbuilder_insert_walk(git_packbuilder *pb, git_revwalk *walk)
1717{
1718 int error;
1719 git_oid id;
ac3d33df 1720 struct walk_object *obj;
04a36fef
CMN
1721
1722 assert(pb && walk);
1723
1724 if ((error = mark_edges_uninteresting(pb, walk->user_input)) < 0)
1725 return error;
1726
1727 /*
1728 * TODO: git marks the parents of the edges
1729 * uninteresting. This may provide a speed advantage, but does
1730 * seem to assume the remote does not have a single-commit
1731 * history on the other end.
1732 */
1733
1734 /* walk down each tree up to the blobs and insert them, stopping when uninteresting */
1735 while ((error = git_revwalk_next(&id, walk)) == 0) {
1736 if ((error = retrieve_object(&obj, pb, &id)) < 0)
1737 return error;
1738
1739 if (obj->seen || obj->uninteresting)
1740 continue;
1741
1742 if ((error = insert_commit(pb, obj)) < 0)
1743 return error;
1744 }
1745
1746 if (error == GIT_ITEROVER)
1747 error = 0;
1748
49be45a1 1749 return error;
04a36fef
CMN
1750}
1751
b176eded
JM
1752int git_packbuilder_set_callbacks(git_packbuilder *pb, git_packbuilder_progress progress_cb, void *progress_cb_payload)
1753{
1754 if (!pb)
1755 return -1;
1756
1757 pb->progress_cb = progress_cb;
1758 pb->progress_cb_payload = progress_cb_payload;
1759
1760 return 0;
1761}
1762
0a32dca5
MS
1763void git_packbuilder_free(git_packbuilder *pb)
1764{
1765 if (pb == NULL)
1766 return;
1767
b4491b99
PK
1768#ifdef GIT_THREADS
1769
1770 git_mutex_free(&pb->cache_mutex);
1771 git_mutex_free(&pb->progress_mutex);
1772 git_cond_free(&pb->progress_cond);
1773
1774#endif
1775
1776 if (pb->odb)
1777 git_odb_free(pb->odb);
1778
b4491b99
PK
1779 if (pb->object_ix)
1780 git_oidmap_free(pb->object_ix);
1781
1782 if (pb->object_list)
1783 git__free(pb->object_list);
1784
04a36fef
CMN
1785 git_oidmap_free(pb->walk_objects);
1786 git_pool_clear(&pb->object_pool);
1787
603bee07 1788 git_hash_ctx_cleanup(&pb->ctx);
52a8a130 1789 git_zstream_free(&pb->zstream);
603bee07 1790
0a32dca5
MS
1791 git__free(pb);
1792}