]> git.proxmox.com Git - libgit2.git/blame - src/indexer.c
Cleanup legal data
[libgit2.git] / src / indexer.c
CommitLineData
3412391d 1/*
bb742ede 2 * Copyright (C) 2009-2011 the libgit2 contributors
3412391d 3 *
bb742ede
VM
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
3412391d
CMN
6 */
7
f23c4a66 8#include "git2/indexer.h"
7d0cdf82 9#include "git2/object.h"
ab525a74 10#include "git2/zlib.h"
b7c44096 11#include "git2/oid.h"
f23c4a66 12
3412391d
CMN
13#include "common.h"
14#include "pack.h"
f23c4a66 15#include "mwindow.h"
3412391d 16#include "posix.h"
b7c44096
CMN
17#include "pack.h"
18#include "filebuf.h"
19#include "sha1.h"
20
21#define UINT31_MAX (0x7FFFFFFF)
3412391d 22
b5b474dd 23struct entry {
b7c44096 24 git_oid oid;
b5b474dd
CMN
25 uint32_t crc;
26 uint32_t offset;
27 uint64_t offset_long;
28};
29
2d3e417e 30struct git_indexer {
a070f152 31 struct git_pack_file *pack;
f23c4a66 32 struct stat st;
b5b474dd 33 struct git_pack_header hdr;
b7c44096
CMN
34 size_t nr_objects;
35 git_vector objects;
36 git_filebuf file;
37 unsigned int fanout[256];
38 git_oid hash;
2d3e417e 39};
f23c4a66 40
b7c44096
CMN
41const git_oid *git_indexer_hash(git_indexer *idx)
42{
43 return &idx->hash;
44}
45
ab525a74 46static int parse_header(git_indexer *idx)
3412391d 47{
3412391d
CMN
48 int error;
49
50 /* Verify we recognize this pack file format. */
b5b474dd
CMN
51 if ((error = p_read(idx->pack->mwf.fd, &idx->hdr, sizeof(idx->hdr))) < GIT_SUCCESS)
52 return git__rethrow(error, "Failed to read in pack header");
3412391d 53
b7c44096 54 if (idx->hdr.hdr_signature != ntohl(PACK_SIGNATURE))
b5b474dd 55 return git__throw(GIT_EOBJCORRUPTED, "Wrong pack signature");
3412391d 56
b5b474dd
CMN
57 if (!pack_version_ok(idx->hdr.hdr_version))
58 return git__throw(GIT_EOBJCORRUPTED, "Wrong pack version");
3412391d 59
f23c4a66 60
3412391d 61 return GIT_SUCCESS;
3412391d
CMN
62}
63
c1af5a39 64static int objects_cmp(const void *a, const void *b)
b7c44096
CMN
65{
66 const struct entry *entrya = a;
67 const struct entry *entryb = b;
68
69 return git_oid_cmp(&entrya->oid, &entryb->oid);
70}
71
c1af5a39
CMN
72static int cache_cmp(const void *a, const void *b)
73{
74 const struct git_pack_entry *ea = a;
75 const struct git_pack_entry *eb = b;
76
77 return git_oid_cmp(&ea->sha1, &eb->sha1);
78}
79
80
ab525a74 81int git_indexer_new(git_indexer **out, const char *packname)
3412391d 82{
ab525a74 83 git_indexer *idx;
26e74c6a 84 size_t namelen;
3412391d
CMN
85 int ret, error;
86
ade3c9bb
CMN
87 assert(out && packname);
88
b7c44096
CMN
89 if (git_path_root(packname) < 0)
90 return git__throw(GIT_EINVALIDPATH, "Path is not absolute");
91
ab525a74 92 idx = git__malloc(sizeof(git_indexer));
3412391d
CMN
93 if (idx == NULL)
94 return GIT_ENOMEM;
95
96 memset(idx, 0x0, sizeof(*idx));
97
98 namelen = strlen(packname);
a070f152 99 idx->pack = git__malloc(sizeof(struct git_pack_file) + namelen + 1);
f6867e63
VM
100 if (idx->pack == NULL) {
101 error = GIT_ENOMEM;
3412391d 102 goto cleanup;
f6867e63 103 }
3412391d 104
a070f152 105 memset(idx->pack, 0x0, sizeof(struct git_pack_file));
b7c44096 106 memcpy(idx->pack->pack_name, packname, namelen + 1);
3412391d
CMN
107
108 ret = p_stat(packname, &idx->st);
109 if (ret < 0) {
110 if (errno == ENOENT)
111 error = git__throw(GIT_ENOTFOUND, "Failed to stat packfile. File not found");
112 else
113 error = git__throw(GIT_EOSERR, "Failed to stat packfile.");
114
115 goto cleanup;
116 }
117
118 ret = p_open(idx->pack->pack_name, O_RDONLY);
119 if (ret < 0) {
120 error = git__throw(GIT_EOSERR, "Failed to open packfile");
121 goto cleanup;
122 }
123
7d0cdf82 124 idx->pack->mwf.fd = ret;
f6867e63 125 idx->pack->mwf.size = (git_off_t)idx->st.st_size;
3412391d
CMN
126
127 error = parse_header(idx);
128 if (error < GIT_SUCCESS) {
129 error = git__rethrow(error, "Failed to parse packfile header");
130 goto cleanup;
131 }
132
b7c44096
CMN
133 idx->nr_objects = ntohl(idx->hdr.hdr_entries);
134
c1af5a39
CMN
135 error = git_vector_init(&idx->pack->cache, idx->nr_objects, cache_cmp);
136 if (error < GIT_SUCCESS)
137 goto cleanup;
138
139 idx->pack->has_cache = 1;
b7c44096 140 error = git_vector_init(&idx->objects, idx->nr_objects, objects_cmp);
c1af5a39 141 if (error < GIT_SUCCESS)
b5b474dd 142 goto cleanup;
b5b474dd 143
3412391d
CMN
144 *out = idx;
145
146 return GIT_SUCCESS;
147
148cleanup:
b7c44096 149 git_indexer_free(idx);
3412391d
CMN
150
151 return error;
152}
153
b7c44096 154static void index_path(char *path, git_indexer *idx)
f23c4a66 155{
b7c44096
CMN
156 char *ptr;
157 const char prefix[] = "pack-", suffix[] = ".idx\0";
158
159 ptr = strrchr(path, '/') + 1;
160
932669b8
KS
161 memcpy(ptr, prefix, strlen(prefix));
162 ptr += strlen(prefix);
b7c44096
CMN
163 git_oid_fmt(ptr, &idx->hash);
164 ptr += GIT_OID_HEXSZ;
932669b8 165 memcpy(ptr, suffix, strlen(suffix));
b7c44096
CMN
166}
167
48b3ad4f 168int git_indexer_write(git_indexer *idx)
b7c44096
CMN
169{
170 git_mwindow *w = NULL;
26e74c6a
SS
171 int error;
172 size_t namelen;
f6867e63 173 unsigned int i, long_offsets = 0, left;
b7c44096
CMN
174 struct git_pack_idx_header hdr;
175 char filename[GIT_PATH_MAX];
176 struct entry *entry;
177 void *packfile_hash;
178 git_oid file_hash;
179 SHA_CTX ctx;
180
181 git_vector_sort(&idx->objects);
182
183 namelen = strlen(idx->pack->pack_name);
184 memcpy(filename, idx->pack->pack_name, namelen);
932669b8 185 memcpy(filename + namelen - strlen("pack"), "idx", strlen("idx") + 1);
b7c44096
CMN
186
187 error = git_filebuf_open(&idx->file, filename, GIT_FILEBUF_HASH_CONTENTS);
188
189 /* Write out the header */
190 hdr.idx_signature = htonl(PACK_IDX_SIGNATURE);
191 hdr.idx_version = htonl(2);
192 error = git_filebuf_write(&idx->file, &hdr, sizeof(hdr));
193
194 /* Write out the fanout table */
195 for (i = 0; i < 256; ++i) {
196 uint32_t n = htonl(idx->fanout[i]);
197 error = git_filebuf_write(&idx->file, &n, sizeof(n));
198 if (error < GIT_SUCCESS)
199 goto cleanup;
200 }
201
202 /* Write out the object names (SHA-1 hashes) */
203 SHA1_Init(&ctx);
204 git_vector_foreach(&idx->objects, i, entry) {
205 error = git_filebuf_write(&idx->file, &entry->oid, sizeof(git_oid));
206 SHA1_Update(&ctx, &entry->oid, GIT_OID_RAWSZ);
207 if (error < GIT_SUCCESS)
208 goto cleanup;
209 }
210 SHA1_Final(idx->hash.id, &ctx);
211
212 /* Write out the CRC32 values */
213 git_vector_foreach(&idx->objects, i, entry) {
214 error = git_filebuf_write(&idx->file, &entry->crc, sizeof(uint32_t));
215 if (error < GIT_SUCCESS)
216 goto cleanup;
217 }
218
219 /* Write out the offsets */
220 git_vector_foreach(&idx->objects, i, entry) {
221 uint32_t n;
222
223 if (entry->offset == UINT32_MAX)
224 n = htonl(0x80000000 | long_offsets++);
225 else
226 n = htonl(entry->offset);
227
228 error = git_filebuf_write(&idx->file, &n, sizeof(uint32_t));
229 if (error < GIT_SUCCESS)
230 goto cleanup;
231 }
232
233 /* Write out the long offsets */
234 git_vector_foreach(&idx->objects, i, entry) {
235 uint32_t split[2];
236
237 if (entry->offset != UINT32_MAX)
238 continue;
239
240 split[0] = htonl(entry->offset_long >> 32);
241 split[1] = htonl(entry->offset_long & 0xffffffff);
242
243 error = git_filebuf_write(&idx->file, &split, sizeof(uint32_t) * 2);
244 if (error < GIT_SUCCESS)
245 goto cleanup;
246 }
247
248 /* Write out the packfile trailer */
249
250 packfile_hash = git_mwindow_open(&idx->pack->mwf, &w, idx->st.st_size - GIT_OID_RAWSZ, GIT_OID_RAWSZ, &left);
c1af5a39 251 git_mwindow_close(&w);
b7c44096
CMN
252 if (packfile_hash == NULL) {
253 error = git__rethrow(GIT_ENOMEM, "Failed to open window to packfile hash");
254 goto cleanup;
255 }
256
257 memcpy(&file_hash, packfile_hash, GIT_OID_RAWSZ);
258
259 git_mwindow_close(&w);
260
261 error = git_filebuf_write(&idx->file, &file_hash, sizeof(git_oid));
262
263 /* Write out the index sha */
264 error = git_filebuf_hash(&file_hash, &idx->file);
265 if (error < GIT_SUCCESS)
266 goto cleanup;
267
268 error = git_filebuf_write(&idx->file, &file_hash, sizeof(git_oid));
269 if (error < GIT_SUCCESS)
270 goto cleanup;
271
272 /* Figure out what the final name should be */
273 index_path(filename, idx);
274 /* Commit file */
275 error = git_filebuf_commit_at(&idx->file, filename);
276
277cleanup:
c1af5a39 278 git_mwindow_free_all(&idx->pack->mwf);
b7c44096
CMN
279 if (error < GIT_SUCCESS)
280 git_filebuf_cleanup(&idx->file);
281
282 return error;
283}
284
285int git_indexer_run(git_indexer *idx, git_indexer_stats *stats)
286{
287 git_mwindow_file *mwf;
288 off_t off = sizeof(struct git_pack_header);
f23c4a66 289 int error;
b7c44096
CMN
290 struct entry *entry;
291 unsigned int left, processed;
f23c4a66 292
b7c44096 293 assert(idx && stats);
b5b474dd 294
b7c44096 295 mwf = &idx->pack->mwf;
f23c4a66
CMN
296 error = git_mwindow_file_register(mwf);
297 if (error < GIT_SUCCESS)
298 return git__rethrow(error, "Failed to register mwindow file");
299
b7c44096
CMN
300 stats->total = idx->nr_objects;
301 stats->processed = processed = 0;
f23c4a66 302
b7c44096 303 while (processed < idx->nr_objects) {
b5b474dd
CMN
304 git_rawobj obj;
305 git_oid oid;
c1af5a39 306 struct git_pack_entry *pentry;
b7c44096 307 git_mwindow *w = NULL;
c85e08b1 308 int i;
b7c44096
CMN
309 off_t entry_start = off;
310 void *packed;
311 size_t entry_size;
b5b474dd 312
b7c44096
CMN
313 entry = git__malloc(sizeof(struct entry));
314 memset(entry, 0x0, sizeof(struct entry));
b5b474dd
CMN
315
316 if (off > UINT31_MAX) {
b7c44096
CMN
317 entry->offset = UINT32_MAX;
318 entry->offset_long = off;
b5b474dd 319 } else {
b7c44096 320 entry->offset = off;
b5b474dd
CMN
321 }
322
323 error = git_packfile_unpack(&obj, idx->pack, &off);
324 if (error < GIT_SUCCESS) {
325 error = git__rethrow(error, "Failed to unpack object");
326 goto cleanup;
327 }
328
c1af5a39 329 /* FIXME: Parse the object instead of hashing it */
c85e08b1 330 error = git_odb__hash_obj(&oid, &obj);
b5b474dd
CMN
331 if (error < GIT_SUCCESS) {
332 error = git__rethrow(error, "Failed to hash object");
ab525a74
CMN
333 goto cleanup;
334 }
335
c1af5a39
CMN
336 pentry = git__malloc(sizeof(struct git_pack_entry));
337 if (pentry == NULL) {
338 error = GIT_ENOMEM;
339 goto cleanup;
340 }
341 git_oid_cpy(&pentry->sha1, &oid);
342 pentry->offset = entry_start;
343 error = git_vector_insert(&idx->pack->cache, pentry);
344 if (error < GIT_SUCCESS)
345 goto cleanup;
346
b7c44096
CMN
347 git_oid_cpy(&entry->oid, &oid);
348 entry->crc = crc32(0L, Z_NULL, 0);
349
350 entry_size = off - entry_start;
351 packed = git_mwindow_open(mwf, &w, entry_start, entry_size, &left);
352 if (packed == NULL) {
353 error = git__rethrow(error, "Failed to open window to read packed data");
354 goto cleanup;
355 }
356 entry->crc = htonl(crc32(entry->crc, packed, entry_size));
357 git_mwindow_close(&w);
ab525a74 358
b5b474dd 359 /* Add the object to the list */
b7c44096
CMN
360 error = git_vector_insert(&idx->objects, entry);
361 if (error < GIT_SUCCESS) {
362 error = git__rethrow(error, "Failed to add entry to list");
363 goto cleanup;
364 }
b5b474dd
CMN
365
366 for (i = oid.id[0]; i < 256; ++i) {
b7c44096 367 idx->fanout[i]++;
b5b474dd
CMN
368 }
369
370 free(obj.data);
ab525a74 371
b7c44096 372 stats->processed = ++processed;
ab525a74
CMN
373 }
374
375cleanup:
376 git_mwindow_free_all(mwf);
377
f23c4a66 378 return error;
ab525a74 379
f23c4a66
CMN
380}
381
ab525a74 382void git_indexer_free(git_indexer *idx)
3412391d 383{
b7c44096
CMN
384 unsigned int i;
385 struct entry *e;
c1af5a39 386 struct git_pack_entry *pe;
b7c44096 387
7d0cdf82 388 p_close(idx->pack->mwf.fd);
b7c44096
CMN
389 git_vector_foreach(&idx->objects, i, e)
390 free(e);
391 git_vector_free(&idx->objects);
c1af5a39
CMN
392 git_vector_foreach(&idx->pack->cache, i, pe)
393 free(pe);
394 git_vector_free(&idx->pack->cache);
3412391d
CMN
395 free(idx->pack);
396 free(idx);
397}
ab525a74 398