]>
Commit | Line | Data |
---|---|---|
3412391d | 1 | /* |
359fc2d2 | 2 | * Copyright (C) the libgit2 contributors. All rights reserved. |
3412391d | 3 | * |
bb742ede VM |
4 | * This file is part of libgit2, distributed under the GNU GPL v2 with |
5 | * a Linking Exception. For full terms see the included COPYING file. | |
3412391d CMN |
6 | */ |
7 | ||
eae0bfdc PP |
8 | #include "indexer.h" |
9 | ||
f23c4a66 | 10 | #include "git2/indexer.h" |
7d0cdf82 | 11 | #include "git2/object.h" |
f23c4a66 | 12 | |
ac3d33df JK |
13 | #include "commit.h" |
14 | #include "tree.h" | |
15 | #include "tag.h" | |
3412391d | 16 | #include "pack.h" |
f23c4a66 | 17 | #include "mwindow.h" |
3412391d | 18 | #include "posix.h" |
b7c44096 CMN |
19 | #include "pack.h" |
20 | #include "filebuf.h" | |
b7f167da | 21 | #include "oid.h" |
22a2d3d5 | 22 | #include "oidarray.h" |
0e040c03 | 23 | #include "oidmap.h" |
c6f26b48 | 24 | #include "zstream.h" |
1c2c0ae2 | 25 | #include "object.h" |
b7c44096 | 26 | |
ac3d33df JK |
27 | size_t git_indexer__max_objects = UINT32_MAX; |
28 | ||
b7c44096 | 29 | #define UINT31_MAX (0x7FFFFFFF) |
3412391d | 30 | |
b5b474dd | 31 | struct entry { |
b7c44096 | 32 | git_oid oid; |
b5b474dd CMN |
33 | uint32_t crc; |
34 | uint32_t offset; | |
35 | uint64_t offset_long; | |
36 | }; | |
37 | ||
a6154f21 | 38 | struct git_indexer { |
3f93e16c | 39 | unsigned int parsed_header :1, |
d030bba9 | 40 | pack_committed :1, |
5a3ad89d | 41 | have_stream :1, |
1c04a96b | 42 | have_delta :1, |
ac3d33df JK |
43 | do_fsync :1, |
44 | do_verify :1; | |
0b33fca0 | 45 | struct git_pack_header hdr; |
3f93e16c | 46 | struct git_pack_file *pack; |
1e60e5f4 | 47 | unsigned int mode; |
22a2d3d5 UG |
48 | off64_t off; |
49 | off64_t entry_start; | |
ac3d33df JK |
50 | git_object_t entry_type; |
51 | git_buf entry_data; | |
f56f8585 | 52 | git_packfile_stream stream; |
3f93e16c CMN |
53 | size_t nr_objects; |
54 | git_vector objects; | |
55 | git_vector deltas; | |
56 | unsigned int fanout[256]; | |
f56f8585 | 57 | git_hash_ctx hash_ctx; |
3f93e16c | 58 | git_oid hash; |
22a2d3d5 | 59 | git_indexer_progress_cb progress_cb; |
216863c4 | 60 | void *progress_payload; |
6481a68d | 61 | char objbuf[8*1024]; |
98eb2c59 | 62 | |
ac3d33df JK |
63 | /* OIDs referenced from pack objects. Used for verification. */ |
64 | git_oidmap *expected_oids; | |
65 | ||
0b33fca0 CMN |
66 | /* Needed to look up objects which we want to inject to fix a thin pack */ |
67 | git_odb *odb; | |
68 | ||
98eb2c59 CMN |
69 | /* Fields for calculating the packfile trailer (hash of everything before it) */ |
70 | char inbuf[GIT_OID_RAWSZ]; | |
af302aca | 71 | size_t inbuf_len; |
98eb2c59 | 72 | git_hash_ctx trailer; |
3f93e16c CMN |
73 | }; |
74 | ||
75 | struct delta_info { | |
22a2d3d5 | 76 | off64_t delta_off; |
3f93e16c CMN |
77 | }; |
78 | ||
a6154f21 | 79 | const git_oid *git_indexer_hash(const git_indexer *idx) |
1c9c081a CMN |
80 | { |
81 | return &idx->hash; | |
82 | } | |
83 | ||
3f93e16c | 84 | static int parse_header(struct git_pack_header *hdr, struct git_pack_file *pack) |
3412391d | 85 | { |
3412391d | 86 | int error; |
f7310540 | 87 | git_map map; |
3412391d | 88 | |
f7310540 | 89 | if ((error = p_mmap(&map, sizeof(*hdr), GIT_PROT_READ, GIT_MAP_SHARED, pack->mwf.fd, 0)) < 0) |
4aa7de15 | 90 | return error; |
3412391d | 91 | |
f7310540 CMN |
92 | memcpy(hdr, map.data, sizeof(*hdr)); |
93 | p_munmap(&map); | |
94 | ||
95 | /* Verify we recognize this pack file format. */ | |
3f93e16c | 96 | if (hdr->hdr_signature != ntohl(PACK_SIGNATURE)) { |
ac3d33df | 97 | git_error_set(GIT_ERROR_INDEXER, "wrong pack signature"); |
4aa7de15 RB |
98 | return -1; |
99 | } | |
3412391d | 100 | |
3f93e16c | 101 | if (!pack_version_ok(hdr->hdr_version)) { |
ac3d33df | 102 | git_error_set(GIT_ERROR_INDEXER, "wrong pack version"); |
4aa7de15 RB |
103 | return -1; |
104 | } | |
f23c4a66 | 105 | |
4aa7de15 | 106 | return 0; |
3412391d CMN |
107 | } |
108 | ||
c1af5a39 | 109 | static int objects_cmp(const void *a, const void *b) |
b7c44096 CMN |
110 | { |
111 | const struct entry *entrya = a; | |
112 | const struct entry *entryb = b; | |
113 | ||
b7f167da | 114 | return git_oid__cmp(&entrya->oid, &entryb->oid); |
b7c44096 CMN |
115 | } |
116 | ||
22a2d3d5 | 117 | int git_indexer_options_init(git_indexer_options *opts, unsigned int version) |
ac3d33df JK |
118 | { |
119 | GIT_INIT_STRUCTURE_FROM_TEMPLATE( | |
120 | opts, version, git_indexer_options, GIT_INDEXER_OPTIONS_INIT); | |
121 | return 0; | |
122 | } | |
123 | ||
22a2d3d5 UG |
124 | #ifndef GIT_DEPRECATE_HARD |
125 | int git_indexer_init_options(git_indexer_options *opts, unsigned int version) | |
126 | { | |
127 | return git_indexer_options_init(opts, version); | |
128 | } | |
129 | #endif | |
130 | ||
a6154f21 CMN |
131 | int git_indexer_new( |
132 | git_indexer **out, | |
216863c4 | 133 | const char *prefix, |
1e60e5f4 | 134 | unsigned int mode, |
0b33fca0 | 135 | git_odb *odb, |
ac3d33df | 136 | git_indexer_options *in_opts) |
3f93e16c | 137 | { |
ac3d33df | 138 | git_indexer_options opts = GIT_INDEXER_OPTIONS_INIT; |
a6154f21 | 139 | git_indexer *idx; |
f7310540 | 140 | git_buf path = GIT_BUF_INIT, tmp_path = GIT_BUF_INIT; |
37159957 | 141 | static const char suff[] = "/pack"; |
9c4feef9 | 142 | int error, fd = -1; |
c1af5a39 | 143 | |
ac3d33df JK |
144 | if (in_opts) |
145 | memcpy(&opts, in_opts, sizeof(opts)); | |
146 | ||
a6154f21 | 147 | idx = git__calloc(1, sizeof(git_indexer)); |
ac3d33df | 148 | GIT_ERROR_CHECK_ALLOC(idx); |
0b33fca0 | 149 | idx->odb = odb; |
ac3d33df JK |
150 | idx->progress_cb = opts.progress_cb; |
151 | idx->progress_payload = opts.progress_cb_payload; | |
1e60e5f4 | 152 | idx->mode = mode ? mode : GIT_PACK_FILE_MODE; |
ac3d33df | 153 | git_buf_init(&idx->entry_data, 0); |
22a2d3d5 UG |
154 | |
155 | if ((error = git_hash_ctx_init(&idx->hash_ctx)) < 0 || | |
156 | (error = git_hash_ctx_init(&idx->trailer)) < 0 || | |
157 | (error = git_oidmap_new(&idx->expected_oids)) < 0) | |
158 | goto cleanup; | |
ac3d33df JK |
159 | |
160 | idx->do_verify = opts.verify; | |
3f93e16c | 161 | |
6c23704d | 162 | if (git_repository__fsync_gitdir) |
1c04a96b ET |
163 | idx->do_fsync = 1; |
164 | ||
3f93e16c CMN |
165 | error = git_buf_joinpath(&path, prefix, suff); |
166 | if (error < 0) | |
167 | goto cleanup; | |
168 | ||
f7310540 | 169 | fd = git_futils_mktmp(&tmp_path, git_buf_cstr(&path), idx->mode); |
ac3d33df | 170 | git_buf_dispose(&path); |
f7310540 CMN |
171 | if (fd < 0) |
172 | goto cleanup; | |
173 | ||
174 | error = git_packfile_alloc(&idx->pack, git_buf_cstr(&tmp_path)); | |
ac3d33df | 175 | git_buf_dispose(&tmp_path); |
f7310540 | 176 | |
3f93e16c CMN |
177 | if (error < 0) |
178 | goto cleanup; | |
179 | ||
f7310540 CMN |
180 | idx->pack->mwf.fd = fd; |
181 | if ((error = git_mwindow_file_register(&idx->pack->mwf)) < 0) | |
182 | goto cleanup; | |
183 | ||
3f93e16c CMN |
184 | *out = idx; |
185 | return 0; | |
186 | ||
187 | cleanup: | |
f7310540 CMN |
188 | if (fd != -1) |
189 | p_close(fd); | |
190 | ||
f5586f5c | 191 | if (git_buf_len(&tmp_path) > 0) |
192 | p_unlink(git_buf_cstr(&tmp_path)); | |
db535d0a | 193 | |
194 | if (idx->pack != NULL) | |
f5586f5c | 195 | p_unlink(idx->pack->pack_name); |
db535d0a | 196 | |
ac3d33df JK |
197 | git_buf_dispose(&path); |
198 | git_buf_dispose(&tmp_path); | |
3f93e16c CMN |
199 | git__free(idx); |
200 | return -1; | |
201 | } | |
202 | ||
1c04a96b ET |
203 | void git_indexer__set_fsync(git_indexer *idx, int do_fsync) |
204 | { | |
205 | idx->do_fsync = !!do_fsync; | |
206 | } | |
207 | ||
3f93e16c | 208 | /* Try to store the delta so we can try to resolve it later */ |
a6154f21 | 209 | static int store_delta(git_indexer *idx) |
3412391d | 210 | { |
3f93e16c | 211 | struct delta_info *delta; |
3f93e16c | 212 | |
453ab98d | 213 | delta = git__calloc(1, sizeof(struct delta_info)); |
ac3d33df | 214 | GIT_ERROR_CHECK_ALLOC(delta); |
5a3ad89d | 215 | delta->delta_off = idx->entry_start; |
453ab98d | 216 | |
3f93e16c | 217 | if (git_vector_insert(&idx->deltas, delta) < 0) |
453ab98d CMN |
218 | return -1; |
219 | ||
220 | return 0; | |
221 | } | |
222 | ||
22a2d3d5 | 223 | static int hash_header(git_hash_ctx *ctx, off64_t len, git_object_t type) |
f56f8585 CMN |
224 | { |
225 | char buffer[64]; | |
226 | size_t hdrlen; | |
eae0bfdc PP |
227 | int error; |
228 | ||
229 | if ((error = git_odb__format_object_header(&hdrlen, | |
230 | buffer, sizeof(buffer), (size_t)len, type)) < 0) | |
231 | return error; | |
f56f8585 | 232 | |
eae0bfdc | 233 | return git_hash_update(ctx, buffer, hdrlen); |
f56f8585 CMN |
234 | } |
235 | ||
a6154f21 | 236 | static int hash_object_stream(git_indexer*idx, git_packfile_stream *stream) |
f56f8585 | 237 | { |
f56f8585 CMN |
238 | ssize_t read; |
239 | ||
c25aa7cd PP |
240 | GIT_ASSERT_ARG(idx); |
241 | GIT_ASSERT_ARG(stream); | |
f56f8585 CMN |
242 | |
243 | do { | |
6481a68d | 244 | if ((read = git_packfile_stream_read(stream, idx->objbuf, sizeof(idx->objbuf))) < 0) |
f56f8585 CMN |
245 | break; |
246 | ||
ac3d33df JK |
247 | if (idx->do_verify) |
248 | git_buf_put(&idx->entry_data, idx->objbuf, read); | |
249 | ||
6481a68d | 250 | git_hash_update(&idx->hash_ctx, idx->objbuf, read); |
f56f8585 CMN |
251 | } while (read > 0); |
252 | ||
253 | if (read < 0) | |
254 | return (int)read; | |
255 | ||
256 | return 0; | |
257 | } | |
258 | ||
5a3ad89d | 259 | /* In order to create the packfile stream, we need to skip over the delta base description */ |
ac3d33df | 260 | static int advance_delta_offset(git_indexer *idx, git_object_t type) |
5a3ad89d CMN |
261 | { |
262 | git_mwindow *w = NULL; | |
263 | ||
c25aa7cd | 264 | GIT_ASSERT_ARG(type == GIT_OBJECT_REF_DELTA || type == GIT_OBJECT_OFS_DELTA); |
5a3ad89d | 265 | |
ac3d33df | 266 | if (type == GIT_OBJECT_REF_DELTA) { |
5a3ad89d CMN |
267 | idx->off += GIT_OID_RAWSZ; |
268 | } else { | |
22a2d3d5 UG |
269 | off64_t base_off; |
270 | int error = get_delta_base(&base_off, idx->pack, &w, &idx->off, type, idx->entry_start); | |
5a3ad89d | 271 | git_mwindow_close(&w); |
22a2d3d5 UG |
272 | if (error < 0) |
273 | return error; | |
5a3ad89d CMN |
274 | } |
275 | ||
276 | return 0; | |
277 | } | |
278 | ||
279 | /* Read from the stream and discard any output */ | |
a6154f21 | 280 | static int read_object_stream(git_indexer *idx, git_packfile_stream *stream) |
5a3ad89d | 281 | { |
5a3ad89d CMN |
282 | ssize_t read; |
283 | ||
c25aa7cd | 284 | GIT_ASSERT_ARG(stream); |
5a3ad89d CMN |
285 | |
286 | do { | |
6481a68d | 287 | read = git_packfile_stream_read(stream, idx->objbuf, sizeof(idx->objbuf)); |
5a3ad89d CMN |
288 | } while (read > 0); |
289 | ||
290 | if (read < 0) | |
291 | return (int)read; | |
292 | ||
293 | return 0; | |
294 | } | |
295 | ||
22a2d3d5 | 296 | static int crc_object(uint32_t *crc_out, git_mwindow_file *mwf, off64_t start, off64_t size) |
3908c254 CMN |
297 | { |
298 | void *ptr; | |
299 | uint32_t crc; | |
300 | unsigned int left, len; | |
301 | git_mwindow *w = NULL; | |
302 | ||
303 | crc = crc32(0L, Z_NULL, 0); | |
304 | while (size) { | |
bdb94c21 | 305 | ptr = git_mwindow_open(mwf, &w, start, (size_t)size, &left); |
3908c254 CMN |
306 | if (ptr == NULL) |
307 | return -1; | |
308 | ||
090d5e1f | 309 | len = min(left, (unsigned int)size); |
3908c254 CMN |
310 | crc = crc32(crc, ptr, len); |
311 | size -= len; | |
312 | start += len; | |
313 | git_mwindow_close(&w); | |
314 | } | |
315 | ||
316 | *crc_out = htonl(crc); | |
317 | return 0; | |
318 | } | |
319 | ||
22a2d3d5 | 320 | static int add_expected_oid(git_indexer *idx, const git_oid *oid) |
ac3d33df | 321 | { |
ac3d33df JK |
322 | /* |
323 | * If we know about that object because it is stored in our ODB or | |
324 | * because we have already processed it as part of our pack file, we do | |
325 | * not have to expect it. | |
326 | */ | |
327 | if ((!idx->odb || !git_odb_exists(idx->odb, oid)) && | |
328 | !git_oidmap_exists(idx->pack->idx_cache, oid) && | |
329 | !git_oidmap_exists(idx->expected_oids, oid)) { | |
330 | git_oid *dup = git__malloc(sizeof(*oid)); | |
22a2d3d5 | 331 | GIT_ERROR_CHECK_ALLOC(dup); |
ac3d33df | 332 | git_oid_cpy(dup, oid); |
22a2d3d5 | 333 | return git_oidmap_set(idx->expected_oids, dup, dup); |
ac3d33df | 334 | } |
22a2d3d5 UG |
335 | |
336 | return 0; | |
ac3d33df JK |
337 | } |
338 | ||
339 | static int check_object_connectivity(git_indexer *idx, const git_rawobj *obj) | |
340 | { | |
341 | git_object *object; | |
22a2d3d5 | 342 | git_oid *expected; |
ac3d33df JK |
343 | int error; |
344 | ||
345 | if (obj->type != GIT_OBJECT_BLOB && | |
346 | obj->type != GIT_OBJECT_TREE && | |
347 | obj->type != GIT_OBJECT_COMMIT && | |
348 | obj->type != GIT_OBJECT_TAG) | |
349 | return 0; | |
350 | ||
351 | if ((error = git_object__from_raw(&object, obj->data, obj->len, obj->type)) < 0) | |
352 | goto out; | |
353 | ||
22a2d3d5 UG |
354 | if ((expected = git_oidmap_get(idx->expected_oids, &object->cached.oid)) != NULL) { |
355 | git_oidmap_delete(idx->expected_oids, &object->cached.oid); | |
356 | git__free(expected); | |
ac3d33df JK |
357 | } |
358 | ||
359 | /* | |
360 | * Check whether this is a known object. If so, we can just continue as | |
361 | * we assume that the ODB has a complete graph. | |
362 | */ | |
363 | if (idx->odb && git_odb_exists(idx->odb, &object->cached.oid)) | |
364 | return 0; | |
365 | ||
366 | switch (obj->type) { | |
367 | case GIT_OBJECT_TREE: | |
368 | { | |
369 | git_tree *tree = (git_tree *) object; | |
370 | git_tree_entry *entry; | |
371 | size_t i; | |
372 | ||
373 | git_array_foreach(tree->entries, i, entry) | |
22a2d3d5 UG |
374 | if (add_expected_oid(idx, entry->oid) < 0) |
375 | goto out; | |
ac3d33df JK |
376 | |
377 | break; | |
378 | } | |
379 | case GIT_OBJECT_COMMIT: | |
380 | { | |
381 | git_commit *commit = (git_commit *) object; | |
382 | git_oid *parent_oid; | |
383 | size_t i; | |
384 | ||
385 | git_array_foreach(commit->parent_ids, i, parent_oid) | |
22a2d3d5 UG |
386 | if (add_expected_oid(idx, parent_oid) < 0) |
387 | goto out; | |
ac3d33df | 388 | |
22a2d3d5 UG |
389 | if (add_expected_oid(idx, &commit->tree_id) < 0) |
390 | goto out; | |
ac3d33df JK |
391 | |
392 | break; | |
393 | } | |
394 | case GIT_OBJECT_TAG: | |
395 | { | |
396 | git_tag *tag = (git_tag *) object; | |
397 | ||
22a2d3d5 UG |
398 | if (add_expected_oid(idx, &tag->target) < 0) |
399 | goto out; | |
ac3d33df JK |
400 | |
401 | break; | |
402 | } | |
403 | case GIT_OBJECT_BLOB: | |
404 | default: | |
405 | break; | |
406 | } | |
407 | ||
408 | out: | |
409 | git_object_free(object); | |
410 | ||
411 | return error; | |
412 | } | |
413 | ||
a6154f21 | 414 | static int store_object(git_indexer *idx) |
f56f8585 | 415 | { |
0e040c03 | 416 | int i, error; |
f56f8585 | 417 | git_oid oid; |
f56f8585 | 418 | struct entry *entry; |
22a2d3d5 | 419 | off64_t entry_size; |
f56f8585 | 420 | struct git_pack_entry *pentry; |
22a2d3d5 | 421 | off64_t entry_start = idx->entry_start; |
f56f8585 CMN |
422 | |
423 | entry = git__calloc(1, sizeof(*entry)); | |
ac3d33df | 424 | GIT_ERROR_CHECK_ALLOC(entry); |
f56f8585 | 425 | |
7026ad89 | 426 | pentry = git__calloc(1, sizeof(struct git_pack_entry)); |
ac3d33df | 427 | GIT_ERROR_CHECK_ALLOC(pentry); |
f56f8585 | 428 | |
22a2d3d5 UG |
429 | if (git_hash_final(&oid, &idx->hash_ctx)) { |
430 | git__free(pentry); | |
431 | goto on_error; | |
432 | } | |
f56f8585 CMN |
433 | entry_size = idx->off - entry_start; |
434 | if (entry_start > UINT31_MAX) { | |
435 | entry->offset = UINT32_MAX; | |
436 | entry->offset_long = entry_start; | |
437 | } else { | |
438 | entry->offset = (uint32_t)entry_start; | |
439 | } | |
440 | ||
ac3d33df JK |
441 | if (idx->do_verify) { |
442 | git_rawobj rawobj = { | |
443 | idx->entry_data.ptr, | |
444 | idx->entry_data.size, | |
445 | idx->entry_type | |
446 | }; | |
447 | ||
448 | if ((error = check_object_connectivity(idx, &rawobj)) < 0) | |
449 | goto on_error; | |
450 | } | |
451 | ||
f56f8585 CMN |
452 | git_oid_cpy(&pentry->sha1, &oid); |
453 | pentry->offset = entry_start; | |
0e040c03 | 454 | |
22a2d3d5 UG |
455 | if (git_oidmap_exists(idx->pack->idx_cache, &pentry->sha1)) { |
456 | git_error_set(GIT_ERROR_INDEXER, "duplicate object %s found in pack", git_oid_tostr_s(&pentry->sha1)); | |
5a3ad89d | 457 | git__free(pentry); |
f56f8585 | 458 | goto on_error; |
5a3ad89d | 459 | } |
f56f8585 | 460 | |
22a2d3d5 | 461 | if ((error = git_oidmap_set(idx->pack->idx_cache, &pentry->sha1, pentry)) < 0) { |
7c63a33f | 462 | git__free(pentry); |
22a2d3d5 | 463 | git_error_set_oom(); |
7c63a33f CMN |
464 | goto on_error; |
465 | } | |
466 | ||
f56f8585 | 467 | git_oid_cpy(&entry->oid, &oid); |
f56f8585 | 468 | |
3908c254 | 469 | if (crc_object(&entry->crc, &idx->pack->mwf, entry_start, entry_size) < 0) |
f56f8585 CMN |
470 | goto on_error; |
471 | ||
f56f8585 CMN |
472 | /* Add the object to the list */ |
473 | if (git_vector_insert(&idx->objects, entry) < 0) | |
474 | goto on_error; | |
475 | ||
476 | for (i = oid.id[0]; i < 256; ++i) { | |
477 | idx->fanout[i]++; | |
478 | } | |
479 | ||
480 | return 0; | |
481 | ||
482 | on_error: | |
f56f8585 CMN |
483 | git__free(entry); |
484 | ||
485 | return -1; | |
486 | } | |
487 | ||
e2dd3735 ET |
488 | GIT_INLINE(bool) has_entry(git_indexer *idx, git_oid *id) |
489 | { | |
036daa59 | 490 | return git_oidmap_exists(idx->pack->idx_cache, id); |
e2dd3735 ET |
491 | } |
492 | ||
22a2d3d5 | 493 | static int save_entry(git_indexer *idx, struct entry *entry, struct git_pack_entry *pentry, off64_t entry_start) |
453ab98d | 494 | { |
22a2d3d5 | 495 | int i; |
453ab98d CMN |
496 | |
497 | if (entry_start > UINT31_MAX) { | |
498 | entry->offset = UINT32_MAX; | |
499 | entry->offset_long = entry_start; | |
500 | } else { | |
501 | entry->offset = (uint32_t)entry_start; | |
502 | } | |
503 | ||
0b33fca0 | 504 | pentry->offset = entry_start; |
e2dd3735 | 505 | |
22a2d3d5 UG |
506 | if (git_oidmap_exists(idx->pack->idx_cache, &pentry->sha1) || |
507 | git_oidmap_set(idx->pack->idx_cache, &pentry->sha1, pentry) < 0) { | |
ac3d33df | 508 | git_error_set(GIT_ERROR_INDEXER, "cannot insert object into pack"); |
0b33fca0 | 509 | return -1; |
e2dd3735 | 510 | } |
0b33fca0 | 511 | |
0b33fca0 CMN |
512 | /* Add the object to the list */ |
513 | if (git_vector_insert(&idx->objects, entry) < 0) | |
514 | return -1; | |
515 | ||
516 | for (i = entry->oid.id[0]; i < 256; ++i) { | |
517 | idx->fanout[i]++; | |
518 | } | |
519 | ||
520 | return 0; | |
521 | } | |
522 | ||
22a2d3d5 | 523 | static int hash_and_save(git_indexer *idx, git_rawobj *obj, off64_t entry_start) |
0b33fca0 CMN |
524 | { |
525 | git_oid oid; | |
526 | size_t entry_size; | |
527 | struct entry *entry; | |
ac44b3d2 | 528 | struct git_pack_entry *pentry = NULL; |
0b33fca0 CMN |
529 | |
530 | entry = git__calloc(1, sizeof(*entry)); | |
ac3d33df | 531 | GIT_ERROR_CHECK_ALLOC(entry); |
0b33fca0 | 532 | |
453ab98d | 533 | if (git_odb__hashobj(&oid, obj) < 0) { |
ac3d33df | 534 | git_error_set(GIT_ERROR_INDEXER, "failed to hash object"); |
8d6ef4bf | 535 | goto on_error; |
453ab98d CMN |
536 | } |
537 | ||
7026ad89 | 538 | pentry = git__calloc(1, sizeof(struct git_pack_entry)); |
ac3d33df | 539 | GIT_ERROR_CHECK_ALLOC(pentry); |
453ab98d CMN |
540 | |
541 | git_oid_cpy(&pentry->sha1, &oid); | |
453ab98d CMN |
542 | git_oid_cpy(&entry->oid, &oid); |
543 | entry->crc = crc32(0L, Z_NULL, 0); | |
544 | ||
545 | entry_size = (size_t)(idx->off - entry_start); | |
3908c254 | 546 | if (crc_object(&entry->crc, &idx->pack->mwf, entry_start, entry_size) < 0) |
453ab98d CMN |
547 | goto on_error; |
548 | ||
0b33fca0 | 549 | return save_entry(idx, entry, pentry, entry_start); |
3412391d | 550 | |
3f93e16c | 551 | on_error: |
ac44b3d2 | 552 | git__free(pentry); |
453ab98d | 553 | git__free(entry); |
453ab98d | 554 | git__free(obj->data); |
3f93e16c CMN |
555 | return -1; |
556 | } | |
3412391d | 557 | |
22a2d3d5 | 558 | static int do_progress_callback(git_indexer *idx, git_indexer_progress *stats) |
216863c4 | 559 | { |
25e0b157 | 560 | if (idx->progress_cb) |
ac3d33df | 561 | return git_error_set_after_callback_function( |
25e0b157 RB |
562 | idx->progress_cb(stats, idx->progress_payload), |
563 | "indexer progress"); | |
db4cbfe5 | 564 | return 0; |
216863c4 BS |
565 | } |
566 | ||
98eb2c59 | 567 | /* Hash everything but the last 20B of input */ |
a6154f21 | 568 | static void hash_partially(git_indexer *idx, const uint8_t *data, size_t size) |
98eb2c59 | 569 | { |
af302aca | 570 | size_t to_expell, to_keep; |
98eb2c59 CMN |
571 | |
572 | if (size == 0) | |
573 | return; | |
574 | ||
575 | /* Easy case, dump the buffer and the data minus the last 20 bytes */ | |
af302aca | 576 | if (size >= GIT_OID_RAWSZ) { |
98eb2c59 CMN |
577 | git_hash_update(&idx->trailer, idx->inbuf, idx->inbuf_len); |
578 | git_hash_update(&idx->trailer, data, size - GIT_OID_RAWSZ); | |
579 | ||
580 | data += size - GIT_OID_RAWSZ; | |
581 | memcpy(idx->inbuf, data, GIT_OID_RAWSZ); | |
582 | idx->inbuf_len = GIT_OID_RAWSZ; | |
583 | return; | |
584 | } | |
585 | ||
586 | /* We can just append */ | |
587 | if (idx->inbuf_len + size <= GIT_OID_RAWSZ) { | |
588 | memcpy(idx->inbuf + idx->inbuf_len, data, size); | |
589 | idx->inbuf_len += size; | |
590 | return; | |
591 | } | |
592 | ||
593 | /* We need to partially drain the buffer and then append */ | |
af302aca RB |
594 | to_keep = GIT_OID_RAWSZ - size; |
595 | to_expell = idx->inbuf_len - to_keep; | |
98eb2c59 CMN |
596 | |
597 | git_hash_update(&idx->trailer, idx->inbuf, to_expell); | |
598 | ||
599 | memmove(idx->inbuf, idx->inbuf + to_expell, to_keep); | |
600 | memcpy(idx->inbuf + to_keep, data, size); | |
601 | idx->inbuf_len += size - to_expell; | |
602 | } | |
603 | ||
c25aa7cd PP |
604 | #if defined(NO_MMAP) || !defined(GIT_WIN32) |
605 | ||
606 | static int write_at(git_indexer *idx, const void *data, off64_t offset, size_t size) | |
607 | { | |
608 | size_t remaining_size = size; | |
609 | const char *ptr = (const char *)data; | |
610 | ||
611 | /* Handle data size larger that ssize_t */ | |
612 | while (remaining_size > 0) { | |
613 | ssize_t nb; | |
614 | HANDLE_EINTR(nb, p_pwrite(idx->pack->mwf.fd, (void *)ptr, | |
615 | remaining_size, offset)); | |
616 | if (nb <= 0) | |
617 | return -1; | |
618 | ||
619 | ptr += nb; | |
620 | offset += nb; | |
621 | remaining_size -= nb; | |
622 | } | |
623 | ||
624 | return 0; | |
625 | } | |
626 | ||
627 | static int append_to_pack(git_indexer *idx, const void *data, size_t size) | |
628 | { | |
629 | if (write_at(idx, data, idx->pack->mwf.size, size) < 0) { | |
630 | git_error_set(GIT_ERROR_OS, "cannot extend packfile '%s'", idx->pack->pack_name); | |
631 | return -1; | |
632 | } | |
633 | ||
634 | return 0; | |
635 | } | |
636 | ||
637 | #else | |
638 | ||
639 | /* | |
640 | * Windows may keep different views to a networked file for the mmap- and | |
641 | * open-accessed versions of a file, so any writes done through | |
642 | * `write(2)`/`pwrite(2)` may not be reflected on the data that `mmap(2)` is | |
643 | * able to read. | |
644 | */ | |
645 | ||
22a2d3d5 | 646 | static int write_at(git_indexer *idx, const void *data, off64_t offset, size_t size) |
f7310540 CMN |
647 | { |
648 | git_file fd = idx->pack->mwf.fd; | |
87c18197 | 649 | size_t mmap_alignment; |
62e562f9 | 650 | size_t page_offset; |
22a2d3d5 | 651 | off64_t page_start; |
0731a5b4 | 652 | unsigned char *map_data; |
f7310540 CMN |
653 | git_map map; |
654 | int error; | |
655 | ||
c25aa7cd PP |
656 | GIT_ASSERT_ARG(data); |
657 | GIT_ASSERT_ARG(size); | |
bc8a0886 | 658 | |
87c18197 | 659 | if ((error = git__mmap_alignment(&mmap_alignment)) < 0) |
62e562f9 AM |
660 | return error; |
661 | ||
87c18197 CMN |
662 | /* the offset needs to be at the mmap boundary for the platform */ |
663 | page_offset = offset % mmap_alignment; | |
62e562f9 | 664 | page_start = offset - page_offset; |
f7310540 CMN |
665 | |
666 | if ((error = p_mmap(&map, page_offset + size, GIT_PROT_WRITE, GIT_MAP_SHARED, fd, page_start)) < 0) | |
667 | return error; | |
668 | ||
0731a5b4 CMN |
669 | map_data = (unsigned char *)map.data; |
670 | memcpy(map_data + page_offset, data, size); | |
f7310540 CMN |
671 | p_munmap(&map); |
672 | ||
673 | return 0; | |
674 | } | |
675 | ||
676 | static int append_to_pack(git_indexer *idx, const void *data, size_t size) | |
677 | { | |
22a2d3d5 | 678 | off64_t new_size; |
c7a1535f CH |
679 | size_t mmap_alignment; |
680 | size_t page_offset; | |
22a2d3d5 UG |
681 | off64_t page_start; |
682 | off64_t current_size = idx->pack->mwf.size; | |
c7a1535f | 683 | int error; |
f7310540 | 684 | |
bc8a0886 PK |
685 | if (!size) |
686 | return 0; | |
687 | ||
c7a1535f CH |
688 | if ((error = git__mmap_alignment(&mmap_alignment)) < 0) |
689 | return error; | |
690 | ||
691 | /* Write a single byte to force the file system to allocate space now or | |
692 | * report an error, since we can't report errors when writing using mmap. | |
693 | * Round the size up to the nearest page so that we only need to perform file | |
694 | * I/O when we add a page, instead of whenever we write even a single byte. */ | |
695 | new_size = current_size + size; | |
696 | page_offset = new_size % mmap_alignment; | |
697 | page_start = new_size - page_offset; | |
698 | ||
c25aa7cd | 699 | if (p_pwrite(idx->pack->mwf.fd, data, 1, page_start + mmap_alignment - 1) < 0) { |
ac3d33df | 700 | git_error_set(GIT_ERROR_OS, "cannot extend packfile '%s'", idx->pack->pack_name); |
f7310540 CMN |
701 | return -1; |
702 | } | |
703 | ||
704 | return write_at(idx, data, idx->pack->mwf.size, size); | |
705 | } | |
706 | ||
c25aa7cd PP |
707 | #endif |
708 | ||
22a2d3d5 | 709 | static int read_stream_object(git_indexer *idx, git_indexer_progress *stats) |
ac3d33df JK |
710 | { |
711 | git_packfile_stream *stream = &idx->stream; | |
22a2d3d5 | 712 | off64_t entry_start = idx->off; |
ac3d33df JK |
713 | size_t entry_size; |
714 | git_object_t type; | |
715 | git_mwindow *w = NULL; | |
716 | int error; | |
717 | ||
718 | if (idx->pack->mwf.size <= idx->off + 20) | |
719 | return GIT_EBUFS; | |
720 | ||
721 | if (!idx->have_stream) { | |
c25aa7cd | 722 | error = git_packfile_unpack_header(&entry_size, &type, idx->pack, &w, &idx->off); |
ac3d33df JK |
723 | if (error == GIT_EBUFS) { |
724 | idx->off = entry_start; | |
725 | return error; | |
726 | } | |
727 | if (error < 0) | |
728 | return error; | |
729 | ||
730 | git_mwindow_close(&w); | |
731 | idx->entry_start = entry_start; | |
732 | git_hash_init(&idx->hash_ctx); | |
733 | git_buf_clear(&idx->entry_data); | |
734 | ||
735 | if (type == GIT_OBJECT_REF_DELTA || type == GIT_OBJECT_OFS_DELTA) { | |
736 | error = advance_delta_offset(idx, type); | |
737 | if (error == GIT_EBUFS) { | |
738 | idx->off = entry_start; | |
739 | return error; | |
740 | } | |
741 | if (error < 0) | |
742 | return error; | |
743 | ||
744 | idx->have_delta = 1; | |
745 | } else { | |
746 | idx->have_delta = 0; | |
747 | ||
748 | error = hash_header(&idx->hash_ctx, entry_size, type); | |
749 | if (error < 0) | |
750 | return error; | |
751 | } | |
752 | ||
753 | idx->have_stream = 1; | |
754 | idx->entry_type = type; | |
755 | ||
756 | error = git_packfile_stream_open(stream, idx->pack, idx->off); | |
757 | if (error < 0) | |
758 | return error; | |
759 | } | |
760 | ||
761 | if (idx->have_delta) { | |
762 | error = read_object_stream(idx, stream); | |
763 | } else { | |
764 | error = hash_object_stream(idx, stream); | |
765 | } | |
766 | ||
767 | idx->off = stream->curpos; | |
768 | if (error == GIT_EBUFS) | |
769 | return error; | |
770 | ||
771 | /* We want to free the stream reasorces no matter what here */ | |
772 | idx->have_stream = 0; | |
773 | git_packfile_stream_dispose(stream); | |
774 | ||
775 | if (error < 0) | |
776 | return error; | |
777 | ||
778 | if (idx->have_delta) { | |
779 | error = store_delta(idx); | |
780 | } else { | |
781 | error = store_object(idx); | |
782 | } | |
783 | ||
784 | if (error < 0) | |
785 | return error; | |
786 | ||
787 | if (!idx->have_delta) { | |
788 | stats->indexed_objects++; | |
789 | } | |
790 | stats->received_objects++; | |
791 | ||
792 | if ((error = do_progress_callback(idx, stats)) != 0) | |
793 | return error; | |
794 | ||
795 | return 0; | |
796 | } | |
797 | ||
22a2d3d5 | 798 | int git_indexer_append(git_indexer *idx, const void *data, size_t size, git_indexer_progress *stats) |
3f93e16c | 799 | { |
fe95ac1b | 800 | int error = -1; |
0b33fca0 | 801 | struct git_pack_header *hdr = &idx->hdr; |
3f93e16c | 802 | git_mwindow_file *mwf = &idx->pack->mwf; |
3412391d | 803 | |
c25aa7cd PP |
804 | GIT_ASSERT_ARG(idx); |
805 | GIT_ASSERT_ARG(data); | |
806 | GIT_ASSERT_ARG(stats); | |
3f93e16c | 807 | |
f7310540 | 808 | if ((error = append_to_pack(idx, data, size)) < 0) |
7697e541 | 809 | return error; |
3f93e16c | 810 | |
8a1e925d | 811 | hash_partially(idx, data, (int)size); |
98eb2c59 | 812 | |
3f93e16c | 813 | /* Make sure we set the new size of the pack */ |
f7310540 | 814 | idx->pack->mwf.size += size; |
3f93e16c CMN |
815 | |
816 | if (!idx->parsed_header) { | |
10c06114 AS |
817 | unsigned int total_objects; |
818 | ||
0b33fca0 | 819 | if ((unsigned)idx->pack->mwf.size < sizeof(struct git_pack_header)) |
3f93e16c CMN |
820 | return 0; |
821 | ||
7697e541 RB |
822 | if ((error = parse_header(&idx->hdr, idx->pack)) < 0) |
823 | return error; | |
3f93e16c CMN |
824 | |
825 | idx->parsed_header = 1; | |
0b33fca0 | 826 | idx->nr_objects = ntohl(hdr->hdr_entries); |
3f93e16c CMN |
827 | idx->off = sizeof(struct git_pack_header); |
828 | ||
ac3d33df | 829 | if (idx->nr_objects <= git_indexer__max_objects) { |
10c06114 | 830 | total_objects = (unsigned int)idx->nr_objects; |
ac3d33df JK |
831 | } else { |
832 | git_error_set(GIT_ERROR_INDEXER, "too many objects"); | |
833 | return -1; | |
834 | } | |
3f93e16c | 835 | |
22a2d3d5 UG |
836 | if (git_oidmap_new(&idx->pack->idx_cache) < 0) |
837 | return -1; | |
3f93e16c CMN |
838 | |
839 | idx->pack->has_cache = 1; | |
10c06114 | 840 | if (git_vector_init(&idx->objects, total_objects, objects_cmp) < 0) |
3f93e16c CMN |
841 | return -1; |
842 | ||
10c06114 | 843 | if (git_vector_init(&idx->deltas, total_objects / 2, NULL) < 0) |
3f93e16c CMN |
844 | return -1; |
845 | ||
81eecc34 | 846 | stats->received_objects = 0; |
0b33fca0 | 847 | stats->local_objects = 0; |
ab46b1d8 CMN |
848 | stats->total_deltas = 0; |
849 | stats->indexed_deltas = 0; | |
ac3d33df | 850 | stats->indexed_objects = 0; |
10c06114 | 851 | stats->total_objects = total_objects; |
db4cbfe5 | 852 | |
25e0b157 | 853 | if ((error = do_progress_callback(idx, stats)) != 0) |
db4cbfe5 | 854 | return error; |
3f93e16c CMN |
855 | } |
856 | ||
857 | /* Now that we have data in the pack, let's try to parse it */ | |
858 | ||
859 | /* As the file grows any windows we try to use will be out of date */ | |
c25aa7cd PP |
860 | if ((error = git_mwindow_free_all(mwf)) < 0) |
861 | goto on_error; | |
7697e541 | 862 | |
ac3d33df JK |
863 | while (stats->indexed_objects < idx->nr_objects) { |
864 | if ((error = read_stream_object(idx, stats)) != 0) { | |
865 | if (error == GIT_EBUFS) | |
866 | break; | |
867 | else | |
7697e541 | 868 | goto on_error; |
5a3ad89d | 869 | } |
453ab98d | 870 | } |
3f93e16c | 871 | |
453ab98d | 872 | return 0; |
3f93e16c | 873 | |
453ab98d CMN |
874 | on_error: |
875 | git_mwindow_free_all(mwf); | |
fe95ac1b | 876 | return error; |
453ab98d | 877 | } |
3f93e16c | 878 | |
a6154f21 | 879 | static int index_path(git_buf *path, git_indexer *idx, const char *suffix) |
453ab98d CMN |
880 | { |
881 | const char prefix[] = "pack-"; | |
882 | size_t slash = (size_t)path->size; | |
3f93e16c | 883 | |
453ab98d CMN |
884 | /* search backwards for '/' */ |
885 | while (slash > 0 && path->ptr[slash - 1] != '/') | |
886 | slash--; | |
3f93e16c | 887 | |
453ab98d CMN |
888 | if (git_buf_grow(path, slash + 1 + strlen(prefix) + |
889 | GIT_OID_HEXSZ + strlen(suffix) + 1) < 0) | |
890 | return -1; | |
891 | ||
892 | git_buf_truncate(path, slash); | |
893 | git_buf_puts(path, prefix); | |
fa6420f7 | 894 | git_oid_fmt(path->ptr + git_buf_len(path), &idx->hash); |
453ab98d CMN |
895 | path->size += GIT_OID_HEXSZ; |
896 | git_buf_puts(path, suffix); | |
897 | ||
898 | return git_buf_oom(path) ? -1 : 0; | |
899 | } | |
900 | ||
0b33fca0 CMN |
901 | /** |
902 | * Rewind the packfile by the trailer, as we might need to fix the | |
903 | * packfile by injecting objects at the tail and must overwrite it. | |
904 | */ | |
c25aa7cd | 905 | static int seek_back_trailer(git_indexer *idx) |
0b33fca0 | 906 | { |
0b33fca0 | 907 | idx->pack->mwf.size -= GIT_OID_RAWSZ; |
c25aa7cd | 908 | return git_mwindow_free_all(&idx->pack->mwf); |
0b33fca0 CMN |
909 | } |
910 | ||
a6154f21 | 911 | static int inject_object(git_indexer *idx, git_oid *id) |
453ab98d | 912 | { |
c25aa7cd PP |
913 | git_odb_object *obj = NULL; |
914 | struct entry *entry = NULL; | |
ac44b3d2 | 915 | struct git_pack_entry *pentry = NULL; |
0b33fca0 CMN |
916 | git_oid foo = {{0}}; |
917 | unsigned char hdr[64]; | |
918 | git_buf buf = GIT_BUF_INIT; | |
22a2d3d5 | 919 | off64_t entry_start; |
0b33fca0 CMN |
920 | const void *data; |
921 | size_t len, hdr_len; | |
922 | int error; | |
923 | ||
c25aa7cd PP |
924 | if ((error = seek_back_trailer(idx)) < 0) |
925 | goto cleanup; | |
926 | ||
f7310540 | 927 | entry_start = idx->pack->mwf.size; |
0b33fca0 | 928 | |
c25aa7cd | 929 | if ((error = git_odb_read(&obj, idx->odb, id)) < 0) { |
ac3d33df | 930 | git_error_set(GIT_ERROR_INDEXER, "missing delta bases"); |
c25aa7cd | 931 | goto cleanup; |
ec7e680c | 932 | } |
0b33fca0 CMN |
933 | |
934 | data = git_odb_object_data(obj); | |
935 | len = git_odb_object_size(obj); | |
936 | ||
24953757 | 937 | entry = git__calloc(1, sizeof(*entry)); |
ac3d33df | 938 | GIT_ERROR_CHECK_ALLOC(entry); |
24953757 | 939 | |
0b33fca0 CMN |
940 | entry->crc = crc32(0L, Z_NULL, 0); |
941 | ||
942 | /* Write out the object header */ | |
c25aa7cd PP |
943 | if ((error = git_packfile__object_header(&hdr_len, hdr, len, git_odb_object_type(obj))) < 0 || |
944 | (error = append_to_pack(idx, hdr, hdr_len)) < 0) | |
f7310540 CMN |
945 | goto cleanup; |
946 | ||
0b33fca0 | 947 | idx->pack->mwf.size += hdr_len; |
e9d5e5f3 | 948 | entry->crc = crc32(entry->crc, hdr, (uInt)hdr_len); |
0b33fca0 | 949 | |
c6f26b48 | 950 | if ((error = git_zstream_deflatebuf(&buf, data, len)) < 0) |
0b33fca0 CMN |
951 | goto cleanup; |
952 | ||
953 | /* And then the compressed object */ | |
f7310540 CMN |
954 | if ((error = append_to_pack(idx, buf.ptr, buf.size)) < 0) |
955 | goto cleanup; | |
956 | ||
0b33fca0 | 957 | idx->pack->mwf.size += buf.size; |
3343b5ff | 958 | entry->crc = htonl(crc32(entry->crc, (unsigned char *)buf.ptr, (uInt)buf.size)); |
ac3d33df | 959 | git_buf_dispose(&buf); |
0b33fca0 CMN |
960 | |
961 | /* Write a fake trailer so the pack functions play ball */ | |
f7310540 CMN |
962 | |
963 | if ((error = append_to_pack(idx, &foo, GIT_OID_RAWSZ)) < 0) | |
0b33fca0 CMN |
964 | goto cleanup; |
965 | ||
966 | idx->pack->mwf.size += GIT_OID_RAWSZ; | |
967 | ||
968 | pentry = git__calloc(1, sizeof(struct git_pack_entry)); | |
ac3d33df | 969 | GIT_ERROR_CHECK_ALLOC(pentry); |
0b33fca0 CMN |
970 | |
971 | git_oid_cpy(&pentry->sha1, id); | |
972 | git_oid_cpy(&entry->oid, id); | |
973 | idx->off = entry_start + hdr_len + len; | |
974 | ||
24953757 | 975 | error = save_entry(idx, entry, pentry, entry_start); |
0b33fca0 CMN |
976 | |
977 | cleanup: | |
24953757 AS |
978 | if (error) { |
979 | git__free(entry); | |
980 | git__free(pentry); | |
981 | } | |
ac44b3d2 | 982 | |
0b33fca0 CMN |
983 | git_odb_object_free(obj); |
984 | return error; | |
985 | } | |
986 | ||
22a2d3d5 | 987 | static int fix_thin_pack(git_indexer *idx, git_indexer_progress *stats) |
0b33fca0 | 988 | { |
7fb6eb27 | 989 | int error, found_ref_delta = 0; |
453ab98d CMN |
990 | unsigned int i; |
991 | struct delta_info *delta; | |
7fb6eb27 | 992 | size_t size; |
ac3d33df | 993 | git_object_t type; |
7fb6eb27 | 994 | git_mwindow *w = NULL; |
22a2d3d5 | 995 | off64_t curpos = 0; |
7fb6eb27 CMN |
996 | unsigned char *base_info; |
997 | unsigned int left = 0; | |
998 | git_oid base; | |
999 | ||
c25aa7cd | 1000 | GIT_ASSERT(git_vector_length(&idx->deltas) > 0); |
0b33fca0 CMN |
1001 | |
1002 | if (idx->odb == NULL) { | |
ac3d33df | 1003 | git_error_set(GIT_ERROR_INDEXER, "cannot fix a thin pack without an ODB"); |
0b33fca0 CMN |
1004 | return -1; |
1005 | } | |
453ab98d | 1006 | |
7fb6eb27 | 1007 | /* Loop until we find the first REF delta */ |
453ab98d | 1008 | git_vector_foreach(&idx->deltas, i, delta) { |
2dde1e0c CMN |
1009 | if (!delta) |
1010 | continue; | |
1011 | ||
7fb6eb27 | 1012 | curpos = delta->delta_off; |
c25aa7cd | 1013 | error = git_packfile_unpack_header(&size, &type, idx->pack, &w, &curpos); |
0b33fca0 CMN |
1014 | if (error < 0) |
1015 | return error; | |
1016 | ||
ac3d33df | 1017 | if (type == GIT_OBJECT_REF_DELTA) { |
7fb6eb27 CMN |
1018 | found_ref_delta = 1; |
1019 | break; | |
0b33fca0 | 1020 | } |
7fb6eb27 | 1021 | } |
0b33fca0 | 1022 | |
7fb6eb27 | 1023 | if (!found_ref_delta) { |
ac3d33df | 1024 | git_error_set(GIT_ERROR_INDEXER, "no REF_DELTA found, cannot inject object"); |
7fb6eb27 CMN |
1025 | return -1; |
1026 | } | |
0b33fca0 | 1027 | |
7fb6eb27 CMN |
1028 | /* curpos now points to the base information, which is an OID */ |
1029 | base_info = git_mwindow_open(&idx->pack->mwf, &w, curpos, GIT_OID_RAWSZ, &left); | |
1030 | if (base_info == NULL) { | |
ac3d33df | 1031 | git_error_set(GIT_ERROR_INDEXER, "failed to map delta information"); |
7fb6eb27 CMN |
1032 | return -1; |
1033 | } | |
0b33fca0 | 1034 | |
7fb6eb27 CMN |
1035 | git_oid_fromraw(&base, base_info); |
1036 | git_mwindow_close(&w); | |
0b33fca0 | 1037 | |
e2dd3735 ET |
1038 | if (has_entry(idx, &base)) |
1039 | return 0; | |
1040 | ||
7fb6eb27 CMN |
1041 | if (inject_object(idx, &base) < 0) |
1042 | return -1; | |
1043 | ||
7fb6eb27 | 1044 | stats->local_objects++; |
0b33fca0 CMN |
1045 | |
1046 | return 0; | |
1047 | } | |
1048 | ||
22a2d3d5 | 1049 | static int resolve_deltas(git_indexer *idx, git_indexer_progress *stats) |
453ab98d CMN |
1050 | { |
1051 | unsigned int i; | |
eae0bfdc | 1052 | int error; |
453ab98d | 1053 | struct delta_info *delta; |
2dde1e0c | 1054 | int progressed = 0, non_null = 0, progress_cb_result; |
cf0582b4 CMN |
1055 | |
1056 | while (idx->deltas.length > 0) { | |
1057 | progressed = 0; | |
2dde1e0c | 1058 | non_null = 0; |
cf0582b4 | 1059 | git_vector_foreach(&idx->deltas, i, delta) { |
ac3d33df | 1060 | git_rawobj obj = {0}; |
cf0582b4 | 1061 | |
2dde1e0c CMN |
1062 | if (!delta) |
1063 | continue; | |
1064 | ||
1065 | non_null = 1; | |
cf0582b4 | 1066 | idx->off = delta->delta_off; |
eae0bfdc PP |
1067 | if ((error = git_packfile_unpack(&obj, idx->pack, &idx->off)) < 0) { |
1068 | if (error == GIT_PASSTHROUGH) { | |
1069 | /* We have not seen the base object, we'll try again later. */ | |
1070 | continue; | |
1071 | } | |
1072 | return -1; | |
1073 | } | |
cf0582b4 | 1074 | |
ac3d33df JK |
1075 | if (idx->do_verify && check_object_connectivity(idx, &obj) < 0) |
1076 | /* TODO: error? continue? */ | |
1077 | continue; | |
1078 | ||
cf0582b4 CMN |
1079 | if (hash_and_save(idx, &obj, delta->delta_off) < 0) |
1080 | continue; | |
1081 | ||
1082 | git__free(obj.data); | |
1083 | stats->indexed_objects++; | |
ab46b1d8 | 1084 | stats->indexed_deltas++; |
cf0582b4 | 1085 | progressed = 1; |
db4cbfe5 JM |
1086 | if ((progress_cb_result = do_progress_callback(idx, stats)) < 0) |
1087 | return progress_cb_result; | |
cf0582b4 | 1088 | |
2dde1e0c CMN |
1089 | /* remove from the list */ |
1090 | git_vector_set(NULL, &idx->deltas, i, NULL); | |
0b33fca0 | 1091 | git__free(delta); |
cf0582b4 | 1092 | } |
453ab98d | 1093 | |
2dde1e0c CMN |
1094 | /* if none were actually set, we're done */ |
1095 | if (!non_null) | |
1096 | break; | |
1097 | ||
7fb6eb27 | 1098 | if (!progressed && (fix_thin_pack(idx, stats) < 0)) { |
453ab98d | 1099 | return -1; |
7fb6eb27 | 1100 | } |
0b33fca0 CMN |
1101 | } |
1102 | ||
1103 | return 0; | |
1104 | } | |
1105 | ||
22a2d3d5 | 1106 | static int update_header_and_rehash(git_indexer *idx, git_indexer_progress *stats) |
0b33fca0 CMN |
1107 | { |
1108 | void *ptr; | |
1109 | size_t chunk = 1024*1024; | |
22a2d3d5 | 1110 | off64_t hashed = 0; |
0b33fca0 CMN |
1111 | git_mwindow *w = NULL; |
1112 | git_mwindow_file *mwf; | |
1113 | unsigned int left; | |
0b33fca0 CMN |
1114 | |
1115 | mwf = &idx->pack->mwf; | |
0b33fca0 | 1116 | |
7561f98d | 1117 | git_hash_init(&idx->trailer); |
f7310540 | 1118 | |
0b33fca0 CMN |
1119 | |
1120 | /* Update the header to include the numer of local objects we injected */ | |
893055f2 | 1121 | idx->hdr.hdr_entries = htonl(stats->total_objects + stats->local_objects); |
f7310540 | 1122 | if (write_at(idx, &idx->hdr, 0, sizeof(struct git_pack_header)) < 0) |
0b33fca0 | 1123 | return -1; |
453ab98d | 1124 | |
0b33fca0 CMN |
1125 | /* |
1126 | * We now use the same technique as before to determine the | |
1127 | * hash. We keep reading up to the end and let | |
1128 | * hash_partially() keep the existing trailer out of the | |
1129 | * calculation. | |
1130 | */ | |
c25aa7cd PP |
1131 | if (git_mwindow_free_all(mwf) < 0) |
1132 | return -1; | |
1133 | ||
0b33fca0 CMN |
1134 | idx->inbuf_len = 0; |
1135 | while (hashed < mwf->size) { | |
1136 | ptr = git_mwindow_open(mwf, &w, hashed, chunk, &left); | |
1137 | if (ptr == NULL) | |
453ab98d | 1138 | return -1; |
3f93e16c | 1139 | |
0b33fca0 CMN |
1140 | hash_partially(idx, ptr, left); |
1141 | hashed += left; | |
1142 | ||
1143 | git_mwindow_close(&w); | |
453ab98d | 1144 | } |
3f93e16c | 1145 | |
453ab98d CMN |
1146 | return 0; |
1147 | } | |
1148 | ||
22a2d3d5 | 1149 | int git_indexer_commit(git_indexer *idx, git_indexer_progress *stats) |
453ab98d CMN |
1150 | { |
1151 | git_mwindow *w = NULL; | |
1152 | unsigned int i, long_offsets = 0, left; | |
db4cbfe5 | 1153 | int error; |
453ab98d CMN |
1154 | struct git_pack_idx_header hdr; |
1155 | git_buf filename = GIT_BUF_INIT; | |
1156 | struct entry *entry; | |
98eb2c59 | 1157 | git_oid trailer_hash, file_hash; |
2fe67aeb | 1158 | git_filebuf index_file = {0}; |
98eb2c59 | 1159 | void *packfile_trailer; |
d6fb0924 | 1160 | |
b3eb2cde | 1161 | if (!idx->parsed_header) { |
ac3d33df | 1162 | git_error_set(GIT_ERROR_INDEXER, "incomplete pack header"); |
b3eb2cde YL |
1163 | return -1; |
1164 | } | |
1165 | ||
6a9d61ef | 1166 | /* Test for this before resolve_deltas(), as it plays with idx->off */ |
b3eb2cde | 1167 | if (idx->off + 20 < idx->pack->mwf.size) { |
ac3d33df | 1168 | git_error_set(GIT_ERROR_INDEXER, "unexpected data at the end of the pack"); |
6a9d61ef CMN |
1169 | return -1; |
1170 | } | |
eae0bfdc | 1171 | if (idx->off + 20 > idx->pack->mwf.size) { |
ac3d33df | 1172 | git_error_set(GIT_ERROR_INDEXER, "missing trailer at the end of the pack"); |
eae0bfdc PP |
1173 | return -1; |
1174 | } | |
6a9d61ef | 1175 | |
98eb2c59 CMN |
1176 | packfile_trailer = git_mwindow_open(&idx->pack->mwf, &w, idx->pack->mwf.size - GIT_OID_RAWSZ, GIT_OID_RAWSZ, &left); |
1177 | if (packfile_trailer == NULL) { | |
1178 | git_mwindow_close(&w); | |
1179 | goto on_error; | |
1180 | } | |
1181 | ||
1182 | /* Compare the packfile trailer as it was sent to us and what we calculated */ | |
1183 | git_oid_fromraw(&file_hash, packfile_trailer); | |
1184 | git_mwindow_close(&w); | |
1185 | ||
1186 | git_hash_final(&trailer_hash, &idx->trailer); | |
1187 | if (git_oid_cmp(&file_hash, &trailer_hash)) { | |
ac3d33df | 1188 | git_error_set(GIT_ERROR_INDEXER, "packfile trailer mismatch"); |
98eb2c59 CMN |
1189 | return -1; |
1190 | } | |
1191 | ||
ab46b1d8 CMN |
1192 | /* Freeze the number of deltas */ |
1193 | stats->total_deltas = stats->total_objects - stats->indexed_objects; | |
1194 | ||
db4cbfe5 JM |
1195 | if ((error = resolve_deltas(idx, stats)) < 0) |
1196 | return error; | |
453ab98d | 1197 | |
7d222e13 | 1198 | if (stats->indexed_objects != stats->total_objects) { |
ac3d33df | 1199 | git_error_set(GIT_ERROR_INDEXER, "early EOF"); |
6a9d61ef CMN |
1200 | return -1; |
1201 | } | |
1202 | ||
0b33fca0 CMN |
1203 | if (stats->local_objects > 0) { |
1204 | if (update_header_and_rehash(idx, stats) < 0) | |
1205 | return -1; | |
1206 | ||
1207 | git_hash_final(&trailer_hash, &idx->trailer); | |
f7310540 | 1208 | write_at(idx, &trailer_hash, idx->pack->mwf.size - GIT_OID_RAWSZ, GIT_OID_RAWSZ); |
0b33fca0 CMN |
1209 | } |
1210 | ||
ac3d33df JK |
1211 | /* |
1212 | * Is the resulting graph fully connected or are we still | |
1213 | * missing some objects? In the second case, we can | |
1214 | * bail out due to an incomplete and thus corrupt | |
1215 | * packfile. | |
1216 | */ | |
1217 | if (git_oidmap_size(idx->expected_oids) > 0) { | |
1218 | git_error_set(GIT_ERROR_INDEXER, "packfile is missing %"PRIuZ" objects", | |
1219 | git_oidmap_size(idx->expected_oids)); | |
1220 | return -1; | |
1221 | } | |
1222 | ||
453ab98d CMN |
1223 | git_vector_sort(&idx->objects); |
1224 | ||
c0e54155 CH |
1225 | /* Use the trailer hash as the pack file name to ensure |
1226 | * files with different contents have different names */ | |
1227 | git_oid_cpy(&idx->hash, &trailer_hash); | |
1228 | ||
453ab98d | 1229 | git_buf_sets(&filename, idx->pack->pack_name); |
278ce746 | 1230 | git_buf_shorten(&filename, strlen("pack")); |
453ab98d CMN |
1231 | git_buf_puts(&filename, "idx"); |
1232 | if (git_buf_oom(&filename)) | |
1233 | return -1; | |
1234 | ||
1d3a8aeb | 1235 | if (git_filebuf_open(&index_file, filename.ptr, |
1c2c0ae2 | 1236 | GIT_FILEBUF_HASH_CONTENTS | |
1c04a96b | 1237 | (idx->do_fsync ? GIT_FILEBUF_FSYNC : 0), |
1c2c0ae2 | 1238 | idx->mode) < 0) |
453ab98d CMN |
1239 | goto on_error; |
1240 | ||
1241 | /* Write out the header */ | |
1242 | hdr.idx_signature = htonl(PACK_IDX_SIGNATURE); | |
1243 | hdr.idx_version = htonl(2); | |
2fe67aeb | 1244 | git_filebuf_write(&index_file, &hdr, sizeof(hdr)); |
453ab98d CMN |
1245 | |
1246 | /* Write out the fanout table */ | |
1247 | for (i = 0; i < 256; ++i) { | |
1248 | uint32_t n = htonl(idx->fanout[i]); | |
2fe67aeb | 1249 | git_filebuf_write(&index_file, &n, sizeof(n)); |
3412391d CMN |
1250 | } |
1251 | ||
453ab98d | 1252 | /* Write out the object names (SHA-1 hashes) */ |
453ab98d | 1253 | git_vector_foreach(&idx->objects, i, entry) { |
2fe67aeb | 1254 | git_filebuf_write(&index_file, &entry->oid, sizeof(git_oid)); |
453ab98d | 1255 | } |
453ab98d CMN |
1256 | |
1257 | /* Write out the CRC32 values */ | |
1258 | git_vector_foreach(&idx->objects, i, entry) { | |
2fe67aeb | 1259 | git_filebuf_write(&index_file, &entry->crc, sizeof(uint32_t)); |
453ab98d CMN |
1260 | } |
1261 | ||
1262 | /* Write out the offsets */ | |
1263 | git_vector_foreach(&idx->objects, i, entry) { | |
1264 | uint32_t n; | |
1265 | ||
1266 | if (entry->offset == UINT32_MAX) | |
1267 | n = htonl(0x80000000 | long_offsets++); | |
1268 | else | |
1269 | n = htonl(entry->offset); | |
1270 | ||
2fe67aeb | 1271 | git_filebuf_write(&index_file, &n, sizeof(uint32_t)); |
453ab98d CMN |
1272 | } |
1273 | ||
1274 | /* Write out the long offsets */ | |
1275 | git_vector_foreach(&idx->objects, i, entry) { | |
1276 | uint32_t split[2]; | |
1277 | ||
1278 | if (entry->offset != UINT32_MAX) | |
1279 | continue; | |
1280 | ||
1281 | split[0] = htonl(entry->offset_long >> 32); | |
1282 | split[1] = htonl(entry->offset_long & 0xffffffff); | |
1283 | ||
2fe67aeb | 1284 | git_filebuf_write(&index_file, &split, sizeof(uint32_t) * 2); |
453ab98d CMN |
1285 | } |
1286 | ||
98eb2c59 CMN |
1287 | /* Write out the packfile trailer to the index */ |
1288 | if (git_filebuf_write(&index_file, &trailer_hash, GIT_OID_RAWSZ) < 0) | |
453ab98d | 1289 | goto on_error; |
453ab98d | 1290 | |
98eb2c59 CMN |
1291 | /* Write out the hash of the idx */ |
1292 | if (git_filebuf_hash(&trailer_hash, &index_file) < 0) | |
453ab98d CMN |
1293 | goto on_error; |
1294 | ||
98eb2c59 | 1295 | git_filebuf_write(&index_file, &trailer_hash, sizeof(git_oid)); |
453ab98d CMN |
1296 | |
1297 | /* Figure out what the final name should be */ | |
a6154f21 | 1298 | if (index_path(&filename, idx, ".idx") < 0) |
453ab98d CMN |
1299 | goto on_error; |
1300 | ||
1301 | /* Commit file */ | |
1d3a8aeb | 1302 | if (git_filebuf_commit_at(&index_file, filename.ptr) < 0) |
453ab98d CMN |
1303 | goto on_error; |
1304 | ||
c25aa7cd PP |
1305 | if (git_mwindow_free_all(&idx->pack->mwf) < 0) |
1306 | goto on_error; | |
c7a1535f | 1307 | |
c25aa7cd PP |
1308 | #if !defined(NO_MMAP) && defined(GIT_WIN32) |
1309 | /* | |
1310 | * Some non-Windows remote filesystems fail when truncating files if the | |
1311 | * file permissions change after opening the file (done by p_mkstemp). | |
1312 | * | |
1313 | * Truncation is only needed when mmap is used to undo rounding up to next | |
1314 | * page_size in append_to_pack. | |
1315 | */ | |
c7a1535f | 1316 | if (p_ftruncate(idx->pack->mwf.fd, idx->pack->mwf.size) < 0) { |
ac3d33df | 1317 | git_error_set(GIT_ERROR_OS, "failed to truncate pack file '%s'", idx->pack->pack_name); |
c7a1535f CH |
1318 | return -1; |
1319 | } | |
c25aa7cd | 1320 | #endif |
c7a1535f | 1321 | |
1c04a96b | 1322 | if (idx->do_fsync && p_fsync(idx->pack->mwf.fd) < 0) { |
ac3d33df | 1323 | git_error_set(GIT_ERROR_OS, "failed to fsync packfile"); |
1c2c0ae2 ET |
1324 | goto on_error; |
1325 | } | |
1326 | ||
96c9b9f0 | 1327 | /* We need to close the descriptor here so Windows doesn't choke on commit_at */ |
f7310540 | 1328 | if (p_close(idx->pack->mwf.fd) < 0) { |
ac3d33df | 1329 | git_error_set(GIT_ERROR_OS, "failed to close packfile"); |
f7310540 CMN |
1330 | goto on_error; |
1331 | } | |
1332 | ||
96c9b9f0 | 1333 | idx->pack->mwf.fd = -1; |
453ab98d | 1334 | |
a6154f21 | 1335 | if (index_path(&filename, idx, ".pack") < 0) |
453ab98d | 1336 | goto on_error; |
f7310540 | 1337 | |
453ab98d | 1338 | /* And don't forget to rename the packfile to its new place. */ |
1229e1c4 ET |
1339 | if (p_rename(idx->pack->pack_name, git_buf_cstr(&filename)) < 0) |
1340 | goto on_error; | |
1341 | ||
1342 | /* And fsync the parent directory if we're asked to. */ | |
1c04a96b | 1343 | if (idx->do_fsync && |
1229e1c4 ET |
1344 | git_futils_fsync_parent(git_buf_cstr(&filename)) < 0) |
1345 | goto on_error; | |
1346 | ||
d030bba9 | 1347 | idx->pack_committed = 1; |
453ab98d | 1348 | |
ac3d33df | 1349 | git_buf_dispose(&filename); |
3f93e16c CMN |
1350 | return 0; |
1351 | ||
1352 | on_error: | |
453ab98d | 1353 | git_mwindow_free_all(&idx->pack->mwf); |
2fe67aeb | 1354 | git_filebuf_cleanup(&index_file); |
ac3d33df | 1355 | git_buf_dispose(&filename); |
3f93e16c CMN |
1356 | return -1; |
1357 | } | |
1358 | ||
a6154f21 | 1359 | void git_indexer_free(git_indexer *idx) |
1c9c081a | 1360 | { |
22a2d3d5 UG |
1361 | const git_oid *key; |
1362 | git_oid *value; | |
1363 | size_t iter; | |
ac3d33df | 1364 | |
1c9c081a CMN |
1365 | if (idx == NULL) |
1366 | return; | |
1367 | ||
eae0bfdc | 1368 | if (idx->have_stream) |
ac3d33df | 1369 | git_packfile_stream_dispose(&idx->stream); |
eae0bfdc | 1370 | |
9cfce273 | 1371 | git_vector_free_deep(&idx->objects); |
0e040c03 | 1372 | |
87b7a705 | 1373 | if (idx->pack->idx_cache) { |
fcd324c6 | 1374 | struct git_pack_entry *pentry; |
9694d9ba PS |
1375 | git_oidmap_foreach_value(idx->pack->idx_cache, pentry, { |
1376 | git__free(pentry); | |
1377 | }); | |
0e040c03 CMN |
1378 | |
1379 | git_oidmap_free(idx->pack->idx_cache); | |
7a57ae54 | 1380 | } |
0e040c03 | 1381 | |
9cfce273 | 1382 | git_vector_free_deep(&idx->deltas); |
b3b66c57 | 1383 | |
c25aa7cd | 1384 | git_packfile_free(idx->pack, !idx->pack_committed); |
b3b66c57 | 1385 | |
22a2d3d5 UG |
1386 | iter = 0; |
1387 | while (git_oidmap_iterate((void **) &value, idx->expected_oids, &iter, &key) == 0) | |
1388 | git__free(value); | |
ac3d33df | 1389 | |
7561f98d RP |
1390 | git_hash_ctx_cleanup(&idx->trailer); |
1391 | git_hash_ctx_cleanup(&idx->hash_ctx); | |
ac3d33df JK |
1392 | git_buf_dispose(&idx->entry_data); |
1393 | git_oidmap_free(idx->expected_oids); | |
1c9c081a CMN |
1394 | git__free(idx); |
1395 | } |