]>
Commit | Line | Data |
---|---|---|
3412391d | 1 | /* |
bb742ede | 2 | * Copyright (C) 2009-2011 the libgit2 contributors |
3412391d | 3 | * |
bb742ede VM |
4 | * This file is part of libgit2, distributed under the GNU GPL v2 with |
5 | * a Linking Exception. For full terms see the included COPYING file. | |
3412391d CMN |
6 | */ |
7 | ||
f23c4a66 | 8 | #include "git2/indexer.h" |
7d0cdf82 | 9 | #include "git2/object.h" |
ab525a74 | 10 | #include "git2/zlib.h" |
b7c44096 | 11 | #include "git2/oid.h" |
f23c4a66 | 12 | |
3412391d CMN |
13 | #include "common.h" |
14 | #include "pack.h" | |
f23c4a66 | 15 | #include "mwindow.h" |
3412391d | 16 | #include "posix.h" |
b7c44096 CMN |
17 | #include "pack.h" |
18 | #include "filebuf.h" | |
19 | #include "sha1.h" | |
20 | ||
21 | #define UINT31_MAX (0x7FFFFFFF) | |
3412391d | 22 | |
b5b474dd | 23 | struct entry { |
b7c44096 | 24 | git_oid oid; |
b5b474dd CMN |
25 | uint32_t crc; |
26 | uint32_t offset; | |
27 | uint64_t offset_long; | |
28 | }; | |
29 | ||
2d3e417e | 30 | struct git_indexer { |
a070f152 | 31 | struct git_pack_file *pack; |
f23c4a66 | 32 | struct stat st; |
b5b474dd | 33 | struct git_pack_header hdr; |
b7c44096 CMN |
34 | size_t nr_objects; |
35 | git_vector objects; | |
36 | git_filebuf file; | |
37 | unsigned int fanout[256]; | |
38 | git_oid hash; | |
2d3e417e | 39 | }; |
f23c4a66 | 40 | |
b7c44096 CMN |
41 | const git_oid *git_indexer_hash(git_indexer *idx) |
42 | { | |
43 | return &idx->hash; | |
44 | } | |
45 | ||
ab525a74 | 46 | static int parse_header(git_indexer *idx) |
3412391d | 47 | { |
3412391d CMN |
48 | int error; |
49 | ||
50 | /* Verify we recognize this pack file format. */ | |
b5b474dd CMN |
51 | if ((error = p_read(idx->pack->mwf.fd, &idx->hdr, sizeof(idx->hdr))) < GIT_SUCCESS) |
52 | return git__rethrow(error, "Failed to read in pack header"); | |
3412391d | 53 | |
b7c44096 | 54 | if (idx->hdr.hdr_signature != ntohl(PACK_SIGNATURE)) |
b5b474dd | 55 | return git__throw(GIT_EOBJCORRUPTED, "Wrong pack signature"); |
3412391d | 56 | |
b5b474dd CMN |
57 | if (!pack_version_ok(idx->hdr.hdr_version)) |
58 | return git__throw(GIT_EOBJCORRUPTED, "Wrong pack version"); | |
3412391d | 59 | |
f23c4a66 | 60 | |
3412391d | 61 | return GIT_SUCCESS; |
3412391d CMN |
62 | } |
63 | ||
c1af5a39 | 64 | static int objects_cmp(const void *a, const void *b) |
b7c44096 CMN |
65 | { |
66 | const struct entry *entrya = a; | |
67 | const struct entry *entryb = b; | |
68 | ||
69 | return git_oid_cmp(&entrya->oid, &entryb->oid); | |
70 | } | |
71 | ||
c1af5a39 CMN |
72 | static int cache_cmp(const void *a, const void *b) |
73 | { | |
74 | const struct git_pack_entry *ea = a; | |
75 | const struct git_pack_entry *eb = b; | |
76 | ||
77 | return git_oid_cmp(&ea->sha1, &eb->sha1); | |
78 | } | |
79 | ||
80 | ||
ab525a74 | 81 | int git_indexer_new(git_indexer **out, const char *packname) |
3412391d | 82 | { |
ab525a74 | 83 | git_indexer *idx; |
26e74c6a | 84 | size_t namelen; |
3412391d CMN |
85 | int ret, error; |
86 | ||
ade3c9bb CMN |
87 | assert(out && packname); |
88 | ||
b7c44096 CMN |
89 | if (git_path_root(packname) < 0) |
90 | return git__throw(GIT_EINVALIDPATH, "Path is not absolute"); | |
91 | ||
ab525a74 | 92 | idx = git__malloc(sizeof(git_indexer)); |
3412391d CMN |
93 | if (idx == NULL) |
94 | return GIT_ENOMEM; | |
95 | ||
96 | memset(idx, 0x0, sizeof(*idx)); | |
97 | ||
98 | namelen = strlen(packname); | |
a070f152 | 99 | idx->pack = git__malloc(sizeof(struct git_pack_file) + namelen + 1); |
f6867e63 VM |
100 | if (idx->pack == NULL) { |
101 | error = GIT_ENOMEM; | |
3412391d | 102 | goto cleanup; |
f6867e63 | 103 | } |
3412391d | 104 | |
a070f152 | 105 | memset(idx->pack, 0x0, sizeof(struct git_pack_file)); |
b7c44096 | 106 | memcpy(idx->pack->pack_name, packname, namelen + 1); |
3412391d CMN |
107 | |
108 | ret = p_stat(packname, &idx->st); | |
109 | if (ret < 0) { | |
110 | if (errno == ENOENT) | |
111 | error = git__throw(GIT_ENOTFOUND, "Failed to stat packfile. File not found"); | |
112 | else | |
113 | error = git__throw(GIT_EOSERR, "Failed to stat packfile."); | |
114 | ||
115 | goto cleanup; | |
116 | } | |
117 | ||
118 | ret = p_open(idx->pack->pack_name, O_RDONLY); | |
119 | if (ret < 0) { | |
120 | error = git__throw(GIT_EOSERR, "Failed to open packfile"); | |
121 | goto cleanup; | |
122 | } | |
123 | ||
7d0cdf82 | 124 | idx->pack->mwf.fd = ret; |
f6867e63 | 125 | idx->pack->mwf.size = (git_off_t)idx->st.st_size; |
3412391d CMN |
126 | |
127 | error = parse_header(idx); | |
128 | if (error < GIT_SUCCESS) { | |
129 | error = git__rethrow(error, "Failed to parse packfile header"); | |
130 | goto cleanup; | |
131 | } | |
132 | ||
b7c44096 CMN |
133 | idx->nr_objects = ntohl(idx->hdr.hdr_entries); |
134 | ||
c1af5a39 CMN |
135 | error = git_vector_init(&idx->pack->cache, idx->nr_objects, cache_cmp); |
136 | if (error < GIT_SUCCESS) | |
137 | goto cleanup; | |
138 | ||
139 | idx->pack->has_cache = 1; | |
b7c44096 | 140 | error = git_vector_init(&idx->objects, idx->nr_objects, objects_cmp); |
c1af5a39 | 141 | if (error < GIT_SUCCESS) |
b5b474dd | 142 | goto cleanup; |
b5b474dd | 143 | |
3412391d CMN |
144 | *out = idx; |
145 | ||
146 | return GIT_SUCCESS; | |
147 | ||
148 | cleanup: | |
b7c44096 | 149 | git_indexer_free(idx); |
3412391d CMN |
150 | |
151 | return error; | |
152 | } | |
153 | ||
b7c44096 | 154 | static void index_path(char *path, git_indexer *idx) |
f23c4a66 | 155 | { |
b7c44096 CMN |
156 | char *ptr; |
157 | const char prefix[] = "pack-", suffix[] = ".idx\0"; | |
158 | ||
159 | ptr = strrchr(path, '/') + 1; | |
160 | ||
932669b8 KS |
161 | memcpy(ptr, prefix, strlen(prefix)); |
162 | ptr += strlen(prefix); | |
b7c44096 CMN |
163 | git_oid_fmt(ptr, &idx->hash); |
164 | ptr += GIT_OID_HEXSZ; | |
932669b8 | 165 | memcpy(ptr, suffix, strlen(suffix)); |
b7c44096 CMN |
166 | } |
167 | ||
48b3ad4f | 168 | int git_indexer_write(git_indexer *idx) |
b7c44096 CMN |
169 | { |
170 | git_mwindow *w = NULL; | |
26e74c6a SS |
171 | int error; |
172 | size_t namelen; | |
f6867e63 | 173 | unsigned int i, long_offsets = 0, left; |
b7c44096 CMN |
174 | struct git_pack_idx_header hdr; |
175 | char filename[GIT_PATH_MAX]; | |
176 | struct entry *entry; | |
177 | void *packfile_hash; | |
178 | git_oid file_hash; | |
179 | SHA_CTX ctx; | |
180 | ||
181 | git_vector_sort(&idx->objects); | |
182 | ||
183 | namelen = strlen(idx->pack->pack_name); | |
184 | memcpy(filename, idx->pack->pack_name, namelen); | |
932669b8 | 185 | memcpy(filename + namelen - strlen("pack"), "idx", strlen("idx") + 1); |
b7c44096 CMN |
186 | |
187 | error = git_filebuf_open(&idx->file, filename, GIT_FILEBUF_HASH_CONTENTS); | |
188 | ||
189 | /* Write out the header */ | |
190 | hdr.idx_signature = htonl(PACK_IDX_SIGNATURE); | |
191 | hdr.idx_version = htonl(2); | |
192 | error = git_filebuf_write(&idx->file, &hdr, sizeof(hdr)); | |
193 | ||
194 | /* Write out the fanout table */ | |
195 | for (i = 0; i < 256; ++i) { | |
196 | uint32_t n = htonl(idx->fanout[i]); | |
197 | error = git_filebuf_write(&idx->file, &n, sizeof(n)); | |
198 | if (error < GIT_SUCCESS) | |
199 | goto cleanup; | |
200 | } | |
201 | ||
202 | /* Write out the object names (SHA-1 hashes) */ | |
203 | SHA1_Init(&ctx); | |
204 | git_vector_foreach(&idx->objects, i, entry) { | |
205 | error = git_filebuf_write(&idx->file, &entry->oid, sizeof(git_oid)); | |
206 | SHA1_Update(&ctx, &entry->oid, GIT_OID_RAWSZ); | |
207 | if (error < GIT_SUCCESS) | |
208 | goto cleanup; | |
209 | } | |
210 | SHA1_Final(idx->hash.id, &ctx); | |
211 | ||
212 | /* Write out the CRC32 values */ | |
213 | git_vector_foreach(&idx->objects, i, entry) { | |
214 | error = git_filebuf_write(&idx->file, &entry->crc, sizeof(uint32_t)); | |
215 | if (error < GIT_SUCCESS) | |
216 | goto cleanup; | |
217 | } | |
218 | ||
219 | /* Write out the offsets */ | |
220 | git_vector_foreach(&idx->objects, i, entry) { | |
221 | uint32_t n; | |
222 | ||
223 | if (entry->offset == UINT32_MAX) | |
224 | n = htonl(0x80000000 | long_offsets++); | |
225 | else | |
226 | n = htonl(entry->offset); | |
227 | ||
228 | error = git_filebuf_write(&idx->file, &n, sizeof(uint32_t)); | |
229 | if (error < GIT_SUCCESS) | |
230 | goto cleanup; | |
231 | } | |
232 | ||
233 | /* Write out the long offsets */ | |
234 | git_vector_foreach(&idx->objects, i, entry) { | |
235 | uint32_t split[2]; | |
236 | ||
237 | if (entry->offset != UINT32_MAX) | |
238 | continue; | |
239 | ||
240 | split[0] = htonl(entry->offset_long >> 32); | |
241 | split[1] = htonl(entry->offset_long & 0xffffffff); | |
242 | ||
243 | error = git_filebuf_write(&idx->file, &split, sizeof(uint32_t) * 2); | |
244 | if (error < GIT_SUCCESS) | |
245 | goto cleanup; | |
246 | } | |
247 | ||
248 | /* Write out the packfile trailer */ | |
249 | ||
250 | packfile_hash = git_mwindow_open(&idx->pack->mwf, &w, idx->st.st_size - GIT_OID_RAWSZ, GIT_OID_RAWSZ, &left); | |
c1af5a39 | 251 | git_mwindow_close(&w); |
b7c44096 CMN |
252 | if (packfile_hash == NULL) { |
253 | error = git__rethrow(GIT_ENOMEM, "Failed to open window to packfile hash"); | |
254 | goto cleanup; | |
255 | } | |
256 | ||
257 | memcpy(&file_hash, packfile_hash, GIT_OID_RAWSZ); | |
258 | ||
259 | git_mwindow_close(&w); | |
260 | ||
261 | error = git_filebuf_write(&idx->file, &file_hash, sizeof(git_oid)); | |
262 | ||
263 | /* Write out the index sha */ | |
264 | error = git_filebuf_hash(&file_hash, &idx->file); | |
265 | if (error < GIT_SUCCESS) | |
266 | goto cleanup; | |
267 | ||
268 | error = git_filebuf_write(&idx->file, &file_hash, sizeof(git_oid)); | |
269 | if (error < GIT_SUCCESS) | |
270 | goto cleanup; | |
271 | ||
272 | /* Figure out what the final name should be */ | |
273 | index_path(filename, idx); | |
274 | /* Commit file */ | |
275 | error = git_filebuf_commit_at(&idx->file, filename); | |
276 | ||
277 | cleanup: | |
c1af5a39 | 278 | git_mwindow_free_all(&idx->pack->mwf); |
b7c44096 CMN |
279 | if (error < GIT_SUCCESS) |
280 | git_filebuf_cleanup(&idx->file); | |
281 | ||
282 | return error; | |
283 | } | |
284 | ||
285 | int git_indexer_run(git_indexer *idx, git_indexer_stats *stats) | |
286 | { | |
287 | git_mwindow_file *mwf; | |
288 | off_t off = sizeof(struct git_pack_header); | |
f23c4a66 | 289 | int error; |
b7c44096 CMN |
290 | struct entry *entry; |
291 | unsigned int left, processed; | |
f23c4a66 | 292 | |
b7c44096 | 293 | assert(idx && stats); |
b5b474dd | 294 | |
b7c44096 | 295 | mwf = &idx->pack->mwf; |
f23c4a66 CMN |
296 | error = git_mwindow_file_register(mwf); |
297 | if (error < GIT_SUCCESS) | |
298 | return git__rethrow(error, "Failed to register mwindow file"); | |
299 | ||
b7c44096 CMN |
300 | stats->total = idx->nr_objects; |
301 | stats->processed = processed = 0; | |
f23c4a66 | 302 | |
b7c44096 | 303 | while (processed < idx->nr_objects) { |
b5b474dd CMN |
304 | git_rawobj obj; |
305 | git_oid oid; | |
c1af5a39 | 306 | struct git_pack_entry *pentry; |
b7c44096 | 307 | git_mwindow *w = NULL; |
c85e08b1 | 308 | int i; |
b7c44096 CMN |
309 | off_t entry_start = off; |
310 | void *packed; | |
311 | size_t entry_size; | |
b5b474dd | 312 | |
b7c44096 CMN |
313 | entry = git__malloc(sizeof(struct entry)); |
314 | memset(entry, 0x0, sizeof(struct entry)); | |
b5b474dd CMN |
315 | |
316 | if (off > UINT31_MAX) { | |
b7c44096 CMN |
317 | entry->offset = UINT32_MAX; |
318 | entry->offset_long = off; | |
b5b474dd | 319 | } else { |
b7c44096 | 320 | entry->offset = off; |
b5b474dd CMN |
321 | } |
322 | ||
323 | error = git_packfile_unpack(&obj, idx->pack, &off); | |
324 | if (error < GIT_SUCCESS) { | |
325 | error = git__rethrow(error, "Failed to unpack object"); | |
326 | goto cleanup; | |
327 | } | |
328 | ||
c1af5a39 | 329 | /* FIXME: Parse the object instead of hashing it */ |
c85e08b1 | 330 | error = git_odb__hash_obj(&oid, &obj); |
b5b474dd CMN |
331 | if (error < GIT_SUCCESS) { |
332 | error = git__rethrow(error, "Failed to hash object"); | |
ab525a74 CMN |
333 | goto cleanup; |
334 | } | |
335 | ||
c1af5a39 CMN |
336 | pentry = git__malloc(sizeof(struct git_pack_entry)); |
337 | if (pentry == NULL) { | |
338 | error = GIT_ENOMEM; | |
339 | goto cleanup; | |
340 | } | |
341 | git_oid_cpy(&pentry->sha1, &oid); | |
342 | pentry->offset = entry_start; | |
343 | error = git_vector_insert(&idx->pack->cache, pentry); | |
344 | if (error < GIT_SUCCESS) | |
345 | goto cleanup; | |
346 | ||
b7c44096 CMN |
347 | git_oid_cpy(&entry->oid, &oid); |
348 | entry->crc = crc32(0L, Z_NULL, 0); | |
349 | ||
350 | entry_size = off - entry_start; | |
351 | packed = git_mwindow_open(mwf, &w, entry_start, entry_size, &left); | |
352 | if (packed == NULL) { | |
353 | error = git__rethrow(error, "Failed to open window to read packed data"); | |
354 | goto cleanup; | |
355 | } | |
356 | entry->crc = htonl(crc32(entry->crc, packed, entry_size)); | |
357 | git_mwindow_close(&w); | |
ab525a74 | 358 | |
b5b474dd | 359 | /* Add the object to the list */ |
b7c44096 CMN |
360 | error = git_vector_insert(&idx->objects, entry); |
361 | if (error < GIT_SUCCESS) { | |
362 | error = git__rethrow(error, "Failed to add entry to list"); | |
363 | goto cleanup; | |
364 | } | |
b5b474dd CMN |
365 | |
366 | for (i = oid.id[0]; i < 256; ++i) { | |
b7c44096 | 367 | idx->fanout[i]++; |
b5b474dd CMN |
368 | } |
369 | ||
370 | free(obj.data); | |
ab525a74 | 371 | |
b7c44096 | 372 | stats->processed = ++processed; |
ab525a74 CMN |
373 | } |
374 | ||
375 | cleanup: | |
376 | git_mwindow_free_all(mwf); | |
377 | ||
f23c4a66 | 378 | return error; |
ab525a74 | 379 | |
f23c4a66 CMN |
380 | } |
381 | ||
ab525a74 | 382 | void git_indexer_free(git_indexer *idx) |
3412391d | 383 | { |
b7c44096 CMN |
384 | unsigned int i; |
385 | struct entry *e; | |
c1af5a39 | 386 | struct git_pack_entry *pe; |
b7c44096 | 387 | |
7d0cdf82 | 388 | p_close(idx->pack->mwf.fd); |
b7c44096 CMN |
389 | git_vector_foreach(&idx->objects, i, e) |
390 | free(e); | |
391 | git_vector_free(&idx->objects); | |
c1af5a39 CMN |
392 | git_vector_foreach(&idx->pack->cache, i, pe) |
393 | free(pe); | |
394 | git_vector_free(&idx->pack->cache); | |
3412391d CMN |
395 | free(idx->pack); |
396 | free(idx); | |
397 | } | |
ab525a74 | 398 |