]>
Commit | Line | Data |
---|---|---|
3412391d | 1 | /* |
bb742ede | 2 | * Copyright (C) 2009-2011 the libgit2 contributors |
3412391d | 3 | * |
bb742ede VM |
4 | * This file is part of libgit2, distributed under the GNU GPL v2 with |
5 | * a Linking Exception. For full terms see the included COPYING file. | |
3412391d CMN |
6 | */ |
7 | ||
f23c4a66 | 8 | #include "git2/indexer.h" |
7d0cdf82 | 9 | #include "git2/object.h" |
ab525a74 | 10 | #include "git2/zlib.h" |
b7c44096 | 11 | #include "git2/oid.h" |
f23c4a66 | 12 | |
3412391d CMN |
13 | #include "common.h" |
14 | #include "pack.h" | |
f23c4a66 | 15 | #include "mwindow.h" |
3412391d | 16 | #include "posix.h" |
b7c44096 CMN |
17 | #include "pack.h" |
18 | #include "filebuf.h" | |
19 | #include "sha1.h" | |
20 | ||
21 | #define UINT31_MAX (0x7FFFFFFF) | |
3412391d | 22 | |
b5b474dd | 23 | struct entry { |
b7c44096 | 24 | git_oid oid; |
b5b474dd CMN |
25 | uint32_t crc; |
26 | uint32_t offset; | |
27 | uint64_t offset_long; | |
28 | }; | |
29 | ||
2d3e417e | 30 | struct git_indexer { |
a070f152 | 31 | struct git_pack_file *pack; |
f23c4a66 | 32 | struct stat st; |
b5b474dd | 33 | struct git_pack_header hdr; |
b7c44096 CMN |
34 | size_t nr_objects; |
35 | git_vector objects; | |
36 | git_filebuf file; | |
37 | unsigned int fanout[256]; | |
38 | git_oid hash; | |
2d3e417e | 39 | }; |
f23c4a66 | 40 | |
b7c44096 CMN |
41 | const git_oid *git_indexer_hash(git_indexer *idx) |
42 | { | |
43 | return &idx->hash; | |
44 | } | |
45 | ||
ab525a74 | 46 | static int parse_header(git_indexer *idx) |
3412391d | 47 | { |
3412391d CMN |
48 | int error; |
49 | ||
50 | /* Verify we recognize this pack file format. */ | |
b5b474dd CMN |
51 | if ((error = p_read(idx->pack->mwf.fd, &idx->hdr, sizeof(idx->hdr))) < GIT_SUCCESS) |
52 | return git__rethrow(error, "Failed to read in pack header"); | |
3412391d | 53 | |
b7c44096 | 54 | if (idx->hdr.hdr_signature != ntohl(PACK_SIGNATURE)) |
b5b474dd | 55 | return git__throw(GIT_EOBJCORRUPTED, "Wrong pack signature"); |
3412391d | 56 | |
b5b474dd CMN |
57 | if (!pack_version_ok(idx->hdr.hdr_version)) |
58 | return git__throw(GIT_EOBJCORRUPTED, "Wrong pack version"); | |
3412391d | 59 | |
f23c4a66 | 60 | |
3412391d | 61 | return GIT_SUCCESS; |
3412391d CMN |
62 | } |
63 | ||
c1af5a39 | 64 | static int objects_cmp(const void *a, const void *b) |
b7c44096 CMN |
65 | { |
66 | const struct entry *entrya = a; | |
67 | const struct entry *entryb = b; | |
68 | ||
69 | return git_oid_cmp(&entrya->oid, &entryb->oid); | |
70 | } | |
71 | ||
c1af5a39 CMN |
72 | static int cache_cmp(const void *a, const void *b) |
73 | { | |
74 | const struct git_pack_entry *ea = a; | |
75 | const struct git_pack_entry *eb = b; | |
76 | ||
77 | return git_oid_cmp(&ea->sha1, &eb->sha1); | |
78 | } | |
79 | ||
80 | ||
ab525a74 | 81 | int git_indexer_new(git_indexer **out, const char *packname) |
3412391d | 82 | { |
ab525a74 | 83 | git_indexer *idx; |
26e74c6a | 84 | size_t namelen; |
3412391d CMN |
85 | int ret, error; |
86 | ||
ade3c9bb CMN |
87 | assert(out && packname); |
88 | ||
b7c44096 CMN |
89 | if (git_path_root(packname) < 0) |
90 | return git__throw(GIT_EINVALIDPATH, "Path is not absolute"); | |
91 | ||
ab525a74 | 92 | idx = git__malloc(sizeof(git_indexer)); |
3412391d CMN |
93 | if (idx == NULL) |
94 | return GIT_ENOMEM; | |
95 | ||
96 | memset(idx, 0x0, sizeof(*idx)); | |
97 | ||
98 | namelen = strlen(packname); | |
a070f152 | 99 | idx->pack = git__malloc(sizeof(struct git_pack_file) + namelen + 1); |
f6867e63 VM |
100 | if (idx->pack == NULL) { |
101 | error = GIT_ENOMEM; | |
3412391d | 102 | goto cleanup; |
f6867e63 | 103 | } |
3412391d | 104 | |
a070f152 | 105 | memset(idx->pack, 0x0, sizeof(struct git_pack_file)); |
b7c44096 | 106 | memcpy(idx->pack->pack_name, packname, namelen + 1); |
3412391d CMN |
107 | |
108 | ret = p_stat(packname, &idx->st); | |
109 | if (ret < 0) { | |
110 | if (errno == ENOENT) | |
111 | error = git__throw(GIT_ENOTFOUND, "Failed to stat packfile. File not found"); | |
112 | else | |
113 | error = git__throw(GIT_EOSERR, "Failed to stat packfile."); | |
114 | ||
115 | goto cleanup; | |
116 | } | |
117 | ||
118 | ret = p_open(idx->pack->pack_name, O_RDONLY); | |
119 | if (ret < 0) { | |
120 | error = git__throw(GIT_EOSERR, "Failed to open packfile"); | |
121 | goto cleanup; | |
122 | } | |
123 | ||
7d0cdf82 | 124 | idx->pack->mwf.fd = ret; |
f6867e63 | 125 | idx->pack->mwf.size = (git_off_t)idx->st.st_size; |
3412391d CMN |
126 | |
127 | error = parse_header(idx); | |
128 | if (error < GIT_SUCCESS) { | |
129 | error = git__rethrow(error, "Failed to parse packfile header"); | |
130 | goto cleanup; | |
131 | } | |
132 | ||
b7c44096 CMN |
133 | idx->nr_objects = ntohl(idx->hdr.hdr_entries); |
134 | ||
c1af5a39 CMN |
135 | error = git_vector_init(&idx->pack->cache, idx->nr_objects, cache_cmp); |
136 | if (error < GIT_SUCCESS) | |
137 | goto cleanup; | |
138 | ||
139 | idx->pack->has_cache = 1; | |
b7c44096 | 140 | error = git_vector_init(&idx->objects, idx->nr_objects, objects_cmp); |
c1af5a39 | 141 | if (error < GIT_SUCCESS) |
b5b474dd | 142 | goto cleanup; |
b5b474dd | 143 | |
3412391d CMN |
144 | *out = idx; |
145 | ||
146 | return GIT_SUCCESS; | |
147 | ||
148 | cleanup: | |
b7c44096 | 149 | git_indexer_free(idx); |
3412391d CMN |
150 | |
151 | return error; | |
152 | } | |
153 | ||
97769280 | 154 | static int index_path(git_buf *path, git_indexer *idx) |
f23c4a66 | 155 | { |
72d6a20b | 156 | const char prefix[] = "pack-", suffix[] = ".idx"; |
97769280 | 157 | size_t slash = (size_t)path->size; |
b7c44096 | 158 | |
97769280 RB |
159 | /* search backwards for '/' */ |
160 | while (slash > 0 && path->ptr[slash - 1] != '/') | |
161 | slash--; | |
b7c44096 | 162 | |
97769280 RB |
163 | if (git_buf_grow(path, slash + 1 + strlen(prefix) + |
164 | GIT_OID_HEXSZ + strlen(suffix) + 1) < GIT_SUCCESS) | |
165 | return GIT_ENOMEM; | |
166 | ||
d0ec3fb8 | 167 | git_buf_truncate(path, slash); |
97769280 RB |
168 | git_buf_puts(path, prefix); |
169 | git_oid_fmt(path->ptr + path->size, &idx->hash); | |
170 | path->size += GIT_OID_HEXSZ; | |
171 | git_buf_puts(path, suffix); | |
172 | ||
173 | return git_buf_lasterror(path); | |
b7c44096 CMN |
174 | } |
175 | ||
48b3ad4f | 176 | int git_indexer_write(git_indexer *idx) |
b7c44096 CMN |
177 | { |
178 | git_mwindow *w = NULL; | |
26e74c6a | 179 | int error; |
f6867e63 | 180 | unsigned int i, long_offsets = 0, left; |
b7c44096 | 181 | struct git_pack_idx_header hdr; |
97769280 | 182 | git_buf filename = GIT_BUF_INIT; |
b7c44096 CMN |
183 | struct entry *entry; |
184 | void *packfile_hash; | |
185 | git_oid file_hash; | |
186 | SHA_CTX ctx; | |
187 | ||
188 | git_vector_sort(&idx->objects); | |
189 | ||
97769280 RB |
190 | git_buf_sets(&filename, idx->pack->pack_name); |
191 | git_buf_truncate(&filename, filename.size - strlen("pack")); | |
192 | git_buf_puts(&filename, "idx"); | |
b7c44096 | 193 | |
97769280 RB |
194 | if ((error = git_buf_lasterror(&filename)) < GIT_SUCCESS) |
195 | goto cleanup; | |
196 | ||
197 | error = git_filebuf_open(&idx->file, filename.ptr, GIT_FILEBUF_HASH_CONTENTS); | |
198 | if (error < GIT_SUCCESS) | |
199 | goto cleanup; | |
b7c44096 CMN |
200 | |
201 | /* Write out the header */ | |
202 | hdr.idx_signature = htonl(PACK_IDX_SIGNATURE); | |
203 | hdr.idx_version = htonl(2); | |
204 | error = git_filebuf_write(&idx->file, &hdr, sizeof(hdr)); | |
97769280 RB |
205 | if (error < GIT_SUCCESS) |
206 | goto cleanup; | |
b7c44096 CMN |
207 | |
208 | /* Write out the fanout table */ | |
209 | for (i = 0; i < 256; ++i) { | |
210 | uint32_t n = htonl(idx->fanout[i]); | |
211 | error = git_filebuf_write(&idx->file, &n, sizeof(n)); | |
212 | if (error < GIT_SUCCESS) | |
213 | goto cleanup; | |
214 | } | |
215 | ||
216 | /* Write out the object names (SHA-1 hashes) */ | |
217 | SHA1_Init(&ctx); | |
218 | git_vector_foreach(&idx->objects, i, entry) { | |
219 | error = git_filebuf_write(&idx->file, &entry->oid, sizeof(git_oid)); | |
220 | SHA1_Update(&ctx, &entry->oid, GIT_OID_RAWSZ); | |
221 | if (error < GIT_SUCCESS) | |
222 | goto cleanup; | |
223 | } | |
224 | SHA1_Final(idx->hash.id, &ctx); | |
225 | ||
226 | /* Write out the CRC32 values */ | |
227 | git_vector_foreach(&idx->objects, i, entry) { | |
228 | error = git_filebuf_write(&idx->file, &entry->crc, sizeof(uint32_t)); | |
229 | if (error < GIT_SUCCESS) | |
230 | goto cleanup; | |
231 | } | |
232 | ||
233 | /* Write out the offsets */ | |
234 | git_vector_foreach(&idx->objects, i, entry) { | |
235 | uint32_t n; | |
236 | ||
237 | if (entry->offset == UINT32_MAX) | |
238 | n = htonl(0x80000000 | long_offsets++); | |
239 | else | |
240 | n = htonl(entry->offset); | |
241 | ||
242 | error = git_filebuf_write(&idx->file, &n, sizeof(uint32_t)); | |
243 | if (error < GIT_SUCCESS) | |
244 | goto cleanup; | |
245 | } | |
246 | ||
247 | /* Write out the long offsets */ | |
248 | git_vector_foreach(&idx->objects, i, entry) { | |
249 | uint32_t split[2]; | |
250 | ||
251 | if (entry->offset != UINT32_MAX) | |
252 | continue; | |
253 | ||
254 | split[0] = htonl(entry->offset_long >> 32); | |
255 | split[1] = htonl(entry->offset_long & 0xffffffff); | |
256 | ||
257 | error = git_filebuf_write(&idx->file, &split, sizeof(uint32_t) * 2); | |
258 | if (error < GIT_SUCCESS) | |
259 | goto cleanup; | |
260 | } | |
261 | ||
262 | /* Write out the packfile trailer */ | |
263 | ||
264 | packfile_hash = git_mwindow_open(&idx->pack->mwf, &w, idx->st.st_size - GIT_OID_RAWSZ, GIT_OID_RAWSZ, &left); | |
c1af5a39 | 265 | git_mwindow_close(&w); |
b7c44096 CMN |
266 | if (packfile_hash == NULL) { |
267 | error = git__rethrow(GIT_ENOMEM, "Failed to open window to packfile hash"); | |
268 | goto cleanup; | |
269 | } | |
270 | ||
271 | memcpy(&file_hash, packfile_hash, GIT_OID_RAWSZ); | |
272 | ||
273 | git_mwindow_close(&w); | |
274 | ||
275 | error = git_filebuf_write(&idx->file, &file_hash, sizeof(git_oid)); | |
276 | ||
277 | /* Write out the index sha */ | |
278 | error = git_filebuf_hash(&file_hash, &idx->file); | |
279 | if (error < GIT_SUCCESS) | |
280 | goto cleanup; | |
281 | ||
282 | error = git_filebuf_write(&idx->file, &file_hash, sizeof(git_oid)); | |
283 | if (error < GIT_SUCCESS) | |
284 | goto cleanup; | |
285 | ||
286 | /* Figure out what the final name should be */ | |
97769280 RB |
287 | error = index_path(&filename, idx); |
288 | if (error < GIT_SUCCESS) | |
289 | goto cleanup; | |
290 | ||
b7c44096 | 291 | /* Commit file */ |
97769280 | 292 | error = git_filebuf_commit_at(&idx->file, filename.ptr, GIT_PACK_FILE_MODE); |
b7c44096 CMN |
293 | |
294 | cleanup: | |
c1af5a39 | 295 | git_mwindow_free_all(&idx->pack->mwf); |
b7c44096 CMN |
296 | if (error < GIT_SUCCESS) |
297 | git_filebuf_cleanup(&idx->file); | |
97769280 | 298 | git_buf_free(&filename); |
b7c44096 CMN |
299 | |
300 | return error; | |
301 | } | |
302 | ||
303 | int git_indexer_run(git_indexer *idx, git_indexer_stats *stats) | |
304 | { | |
305 | git_mwindow_file *mwf; | |
306 | off_t off = sizeof(struct git_pack_header); | |
f23c4a66 | 307 | int error; |
b7c44096 CMN |
308 | struct entry *entry; |
309 | unsigned int left, processed; | |
f23c4a66 | 310 | |
b7c44096 | 311 | assert(idx && stats); |
b5b474dd | 312 | |
b7c44096 | 313 | mwf = &idx->pack->mwf; |
f23c4a66 CMN |
314 | error = git_mwindow_file_register(mwf); |
315 | if (error < GIT_SUCCESS) | |
316 | return git__rethrow(error, "Failed to register mwindow file"); | |
317 | ||
b7c44096 CMN |
318 | stats->total = idx->nr_objects; |
319 | stats->processed = processed = 0; | |
f23c4a66 | 320 | |
b7c44096 | 321 | while (processed < idx->nr_objects) { |
b5b474dd CMN |
322 | git_rawobj obj; |
323 | git_oid oid; | |
c1af5a39 | 324 | struct git_pack_entry *pentry; |
b7c44096 | 325 | git_mwindow *w = NULL; |
c85e08b1 | 326 | int i; |
b7c44096 CMN |
327 | off_t entry_start = off; |
328 | void *packed; | |
329 | size_t entry_size; | |
b5b474dd | 330 | |
b7c44096 CMN |
331 | entry = git__malloc(sizeof(struct entry)); |
332 | memset(entry, 0x0, sizeof(struct entry)); | |
b5b474dd CMN |
333 | |
334 | if (off > UINT31_MAX) { | |
b7c44096 CMN |
335 | entry->offset = UINT32_MAX; |
336 | entry->offset_long = off; | |
b5b474dd | 337 | } else { |
b7c44096 | 338 | entry->offset = off; |
b5b474dd CMN |
339 | } |
340 | ||
341 | error = git_packfile_unpack(&obj, idx->pack, &off); | |
342 | if (error < GIT_SUCCESS) { | |
343 | error = git__rethrow(error, "Failed to unpack object"); | |
344 | goto cleanup; | |
345 | } | |
346 | ||
c1af5a39 | 347 | /* FIXME: Parse the object instead of hashing it */ |
c85e08b1 | 348 | error = git_odb__hash_obj(&oid, &obj); |
b5b474dd CMN |
349 | if (error < GIT_SUCCESS) { |
350 | error = git__rethrow(error, "Failed to hash object"); | |
ab525a74 CMN |
351 | goto cleanup; |
352 | } | |
353 | ||
c1af5a39 CMN |
354 | pentry = git__malloc(sizeof(struct git_pack_entry)); |
355 | if (pentry == NULL) { | |
356 | error = GIT_ENOMEM; | |
357 | goto cleanup; | |
358 | } | |
359 | git_oid_cpy(&pentry->sha1, &oid); | |
360 | pentry->offset = entry_start; | |
361 | error = git_vector_insert(&idx->pack->cache, pentry); | |
362 | if (error < GIT_SUCCESS) | |
363 | goto cleanup; | |
364 | ||
b7c44096 CMN |
365 | git_oid_cpy(&entry->oid, &oid); |
366 | entry->crc = crc32(0L, Z_NULL, 0); | |
367 | ||
368 | entry_size = off - entry_start; | |
369 | packed = git_mwindow_open(mwf, &w, entry_start, entry_size, &left); | |
370 | if (packed == NULL) { | |
371 | error = git__rethrow(error, "Failed to open window to read packed data"); | |
372 | goto cleanup; | |
373 | } | |
374 | entry->crc = htonl(crc32(entry->crc, packed, entry_size)); | |
375 | git_mwindow_close(&w); | |
ab525a74 | 376 | |
b5b474dd | 377 | /* Add the object to the list */ |
b7c44096 CMN |
378 | error = git_vector_insert(&idx->objects, entry); |
379 | if (error < GIT_SUCCESS) { | |
380 | error = git__rethrow(error, "Failed to add entry to list"); | |
381 | goto cleanup; | |
382 | } | |
b5b474dd CMN |
383 | |
384 | for (i = oid.id[0]; i < 256; ++i) { | |
b7c44096 | 385 | idx->fanout[i]++; |
b5b474dd CMN |
386 | } |
387 | ||
3286c408 | 388 | git__free(obj.data); |
ab525a74 | 389 | |
b7c44096 | 390 | stats->processed = ++processed; |
ab525a74 CMN |
391 | } |
392 | ||
393 | cleanup: | |
394 | git_mwindow_free_all(mwf); | |
395 | ||
f23c4a66 | 396 | return error; |
ab525a74 | 397 | |
f23c4a66 CMN |
398 | } |
399 | ||
ab525a74 | 400 | void git_indexer_free(git_indexer *idx) |
3412391d | 401 | { |
b7c44096 CMN |
402 | unsigned int i; |
403 | struct entry *e; | |
c1af5a39 | 404 | struct git_pack_entry *pe; |
b7c44096 | 405 | |
92be7908 CMN |
406 | if (idx == NULL) |
407 | return; | |
408 | ||
7d0cdf82 | 409 | p_close(idx->pack->mwf.fd); |
b7c44096 | 410 | git_vector_foreach(&idx->objects, i, e) |
3286c408 | 411 | git__free(e); |
b7c44096 | 412 | git_vector_free(&idx->objects); |
c1af5a39 | 413 | git_vector_foreach(&idx->pack->cache, i, pe) |
3286c408 | 414 | git__free(pe); |
c1af5a39 | 415 | git_vector_free(&idx->pack->cache); |
3286c408 VM |
416 | git__free(idx->pack); |
417 | git__free(idx); | |
3412391d | 418 | } |
ab525a74 | 419 |