]> git.proxmox.com Git - libgit2.git/blame - src/indexer.c
Update Copyright header
[libgit2.git] / src / indexer.c
CommitLineData
3412391d 1/*
5e0de328 2 * Copyright (C) 2009-2012 the libgit2 contributors
3412391d 3 *
bb742ede
VM
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
3412391d
CMN
6 */
7
f23c4a66 8#include "git2/indexer.h"
7d0cdf82 9#include "git2/object.h"
ab525a74 10#include "git2/zlib.h"
b7c44096 11#include "git2/oid.h"
f23c4a66 12
3412391d
CMN
13#include "common.h"
14#include "pack.h"
f23c4a66 15#include "mwindow.h"
3412391d 16#include "posix.h"
b7c44096
CMN
17#include "pack.h"
18#include "filebuf.h"
19#include "sha1.h"
20
21#define UINT31_MAX (0x7FFFFFFF)
3412391d 22
b5b474dd 23struct entry {
b7c44096 24 git_oid oid;
b5b474dd
CMN
25 uint32_t crc;
26 uint32_t offset;
27 uint64_t offset_long;
28};
29
2d3e417e 30struct git_indexer {
a070f152 31 struct git_pack_file *pack;
f23c4a66 32 struct stat st;
b5b474dd 33 struct git_pack_header hdr;
b7c44096
CMN
34 size_t nr_objects;
35 git_vector objects;
36 git_filebuf file;
37 unsigned int fanout[256];
38 git_oid hash;
2d3e417e 39};
f23c4a66 40
b7c44096
CMN
41const git_oid *git_indexer_hash(git_indexer *idx)
42{
43 return &idx->hash;
44}
45
ab525a74 46static int parse_header(git_indexer *idx)
3412391d 47{
3412391d
CMN
48 int error;
49
50 /* Verify we recognize this pack file format. */
b5b474dd
CMN
51 if ((error = p_read(idx->pack->mwf.fd, &idx->hdr, sizeof(idx->hdr))) < GIT_SUCCESS)
52 return git__rethrow(error, "Failed to read in pack header");
3412391d 53
b7c44096 54 if (idx->hdr.hdr_signature != ntohl(PACK_SIGNATURE))
b5b474dd 55 return git__throw(GIT_EOBJCORRUPTED, "Wrong pack signature");
3412391d 56
b5b474dd
CMN
57 if (!pack_version_ok(idx->hdr.hdr_version))
58 return git__throw(GIT_EOBJCORRUPTED, "Wrong pack version");
3412391d 59
f23c4a66 60
3412391d 61 return GIT_SUCCESS;
3412391d
CMN
62}
63
c1af5a39 64static int objects_cmp(const void *a, const void *b)
b7c44096
CMN
65{
66 const struct entry *entrya = a;
67 const struct entry *entryb = b;
68
69 return git_oid_cmp(&entrya->oid, &entryb->oid);
70}
71
c1af5a39
CMN
72static int cache_cmp(const void *a, const void *b)
73{
74 const struct git_pack_entry *ea = a;
75 const struct git_pack_entry *eb = b;
76
77 return git_oid_cmp(&ea->sha1, &eb->sha1);
78}
79
80
ab525a74 81int git_indexer_new(git_indexer **out, const char *packname)
3412391d 82{
ab525a74 83 git_indexer *idx;
26e74c6a 84 size_t namelen;
3412391d
CMN
85 int ret, error;
86
ade3c9bb
CMN
87 assert(out && packname);
88
b7c44096
CMN
89 if (git_path_root(packname) < 0)
90 return git__throw(GIT_EINVALIDPATH, "Path is not absolute");
91
ab525a74 92 idx = git__malloc(sizeof(git_indexer));
3412391d
CMN
93 if (idx == NULL)
94 return GIT_ENOMEM;
95
96 memset(idx, 0x0, sizeof(*idx));
97
98 namelen = strlen(packname);
a070f152 99 idx->pack = git__malloc(sizeof(struct git_pack_file) + namelen + 1);
f6867e63
VM
100 if (idx->pack == NULL) {
101 error = GIT_ENOMEM;
3412391d 102 goto cleanup;
f6867e63 103 }
3412391d 104
a070f152 105 memset(idx->pack, 0x0, sizeof(struct git_pack_file));
b7c44096 106 memcpy(idx->pack->pack_name, packname, namelen + 1);
3412391d
CMN
107
108 ret = p_stat(packname, &idx->st);
109 if (ret < 0) {
110 if (errno == ENOENT)
111 error = git__throw(GIT_ENOTFOUND, "Failed to stat packfile. File not found");
112 else
113 error = git__throw(GIT_EOSERR, "Failed to stat packfile.");
114
115 goto cleanup;
116 }
117
118 ret = p_open(idx->pack->pack_name, O_RDONLY);
119 if (ret < 0) {
120 error = git__throw(GIT_EOSERR, "Failed to open packfile");
121 goto cleanup;
122 }
123
7d0cdf82 124 idx->pack->mwf.fd = ret;
f6867e63 125 idx->pack->mwf.size = (git_off_t)idx->st.st_size;
3412391d
CMN
126
127 error = parse_header(idx);
128 if (error < GIT_SUCCESS) {
129 error = git__rethrow(error, "Failed to parse packfile header");
130 goto cleanup;
131 }
132
b7c44096
CMN
133 idx->nr_objects = ntohl(idx->hdr.hdr_entries);
134
c1af5a39
CMN
135 error = git_vector_init(&idx->pack->cache, idx->nr_objects, cache_cmp);
136 if (error < GIT_SUCCESS)
137 goto cleanup;
138
139 idx->pack->has_cache = 1;
b7c44096 140 error = git_vector_init(&idx->objects, idx->nr_objects, objects_cmp);
c1af5a39 141 if (error < GIT_SUCCESS)
b5b474dd 142 goto cleanup;
b5b474dd 143
3412391d
CMN
144 *out = idx;
145
146 return GIT_SUCCESS;
147
148cleanup:
b7c44096 149 git_indexer_free(idx);
3412391d
CMN
150
151 return error;
152}
153
97769280 154static int index_path(git_buf *path, git_indexer *idx)
f23c4a66 155{
72d6a20b 156 const char prefix[] = "pack-", suffix[] = ".idx";
97769280 157 size_t slash = (size_t)path->size;
b7c44096 158
97769280
RB
159 /* search backwards for '/' */
160 while (slash > 0 && path->ptr[slash - 1] != '/')
161 slash--;
b7c44096 162
97769280
RB
163 if (git_buf_grow(path, slash + 1 + strlen(prefix) +
164 GIT_OID_HEXSZ + strlen(suffix) + 1) < GIT_SUCCESS)
165 return GIT_ENOMEM;
166
d0ec3fb8 167 git_buf_truncate(path, slash);
97769280
RB
168 git_buf_puts(path, prefix);
169 git_oid_fmt(path->ptr + path->size, &idx->hash);
170 path->size += GIT_OID_HEXSZ;
171 git_buf_puts(path, suffix);
172
173 return git_buf_lasterror(path);
b7c44096
CMN
174}
175
48b3ad4f 176int git_indexer_write(git_indexer *idx)
b7c44096
CMN
177{
178 git_mwindow *w = NULL;
26e74c6a 179 int error;
f6867e63 180 unsigned int i, long_offsets = 0, left;
b7c44096 181 struct git_pack_idx_header hdr;
97769280 182 git_buf filename = GIT_BUF_INIT;
b7c44096
CMN
183 struct entry *entry;
184 void *packfile_hash;
185 git_oid file_hash;
186 SHA_CTX ctx;
187
188 git_vector_sort(&idx->objects);
189
97769280
RB
190 git_buf_sets(&filename, idx->pack->pack_name);
191 git_buf_truncate(&filename, filename.size - strlen("pack"));
192 git_buf_puts(&filename, "idx");
b7c44096 193
97769280
RB
194 if ((error = git_buf_lasterror(&filename)) < GIT_SUCCESS)
195 goto cleanup;
196
197 error = git_filebuf_open(&idx->file, filename.ptr, GIT_FILEBUF_HASH_CONTENTS);
198 if (error < GIT_SUCCESS)
199 goto cleanup;
b7c44096
CMN
200
201 /* Write out the header */
202 hdr.idx_signature = htonl(PACK_IDX_SIGNATURE);
203 hdr.idx_version = htonl(2);
204 error = git_filebuf_write(&idx->file, &hdr, sizeof(hdr));
97769280
RB
205 if (error < GIT_SUCCESS)
206 goto cleanup;
b7c44096
CMN
207
208 /* Write out the fanout table */
209 for (i = 0; i < 256; ++i) {
210 uint32_t n = htonl(idx->fanout[i]);
211 error = git_filebuf_write(&idx->file, &n, sizeof(n));
212 if (error < GIT_SUCCESS)
213 goto cleanup;
214 }
215
216 /* Write out the object names (SHA-1 hashes) */
217 SHA1_Init(&ctx);
218 git_vector_foreach(&idx->objects, i, entry) {
219 error = git_filebuf_write(&idx->file, &entry->oid, sizeof(git_oid));
220 SHA1_Update(&ctx, &entry->oid, GIT_OID_RAWSZ);
221 if (error < GIT_SUCCESS)
222 goto cleanup;
223 }
224 SHA1_Final(idx->hash.id, &ctx);
225
226 /* Write out the CRC32 values */
227 git_vector_foreach(&idx->objects, i, entry) {
228 error = git_filebuf_write(&idx->file, &entry->crc, sizeof(uint32_t));
229 if (error < GIT_SUCCESS)
230 goto cleanup;
231 }
232
233 /* Write out the offsets */
234 git_vector_foreach(&idx->objects, i, entry) {
235 uint32_t n;
236
237 if (entry->offset == UINT32_MAX)
238 n = htonl(0x80000000 | long_offsets++);
239 else
240 n = htonl(entry->offset);
241
242 error = git_filebuf_write(&idx->file, &n, sizeof(uint32_t));
243 if (error < GIT_SUCCESS)
244 goto cleanup;
245 }
246
247 /* Write out the long offsets */
248 git_vector_foreach(&idx->objects, i, entry) {
249 uint32_t split[2];
250
251 if (entry->offset != UINT32_MAX)
252 continue;
253
254 split[0] = htonl(entry->offset_long >> 32);
255 split[1] = htonl(entry->offset_long & 0xffffffff);
256
257 error = git_filebuf_write(&idx->file, &split, sizeof(uint32_t) * 2);
258 if (error < GIT_SUCCESS)
259 goto cleanup;
260 }
261
262 /* Write out the packfile trailer */
263
264 packfile_hash = git_mwindow_open(&idx->pack->mwf, &w, idx->st.st_size - GIT_OID_RAWSZ, GIT_OID_RAWSZ, &left);
c1af5a39 265 git_mwindow_close(&w);
b7c44096
CMN
266 if (packfile_hash == NULL) {
267 error = git__rethrow(GIT_ENOMEM, "Failed to open window to packfile hash");
268 goto cleanup;
269 }
270
271 memcpy(&file_hash, packfile_hash, GIT_OID_RAWSZ);
272
273 git_mwindow_close(&w);
274
275 error = git_filebuf_write(&idx->file, &file_hash, sizeof(git_oid));
276
277 /* Write out the index sha */
278 error = git_filebuf_hash(&file_hash, &idx->file);
279 if (error < GIT_SUCCESS)
280 goto cleanup;
281
282 error = git_filebuf_write(&idx->file, &file_hash, sizeof(git_oid));
283 if (error < GIT_SUCCESS)
284 goto cleanup;
285
286 /* Figure out what the final name should be */
97769280
RB
287 error = index_path(&filename, idx);
288 if (error < GIT_SUCCESS)
289 goto cleanup;
290
b7c44096 291 /* Commit file */
97769280 292 error = git_filebuf_commit_at(&idx->file, filename.ptr, GIT_PACK_FILE_MODE);
b7c44096
CMN
293
294cleanup:
c1af5a39 295 git_mwindow_free_all(&idx->pack->mwf);
b7c44096
CMN
296 if (error < GIT_SUCCESS)
297 git_filebuf_cleanup(&idx->file);
97769280 298 git_buf_free(&filename);
b7c44096
CMN
299
300 return error;
301}
302
303int git_indexer_run(git_indexer *idx, git_indexer_stats *stats)
304{
305 git_mwindow_file *mwf;
306 off_t off = sizeof(struct git_pack_header);
f23c4a66 307 int error;
b7c44096
CMN
308 struct entry *entry;
309 unsigned int left, processed;
f23c4a66 310
b7c44096 311 assert(idx && stats);
b5b474dd 312
b7c44096 313 mwf = &idx->pack->mwf;
f23c4a66
CMN
314 error = git_mwindow_file_register(mwf);
315 if (error < GIT_SUCCESS)
316 return git__rethrow(error, "Failed to register mwindow file");
317
b7c44096
CMN
318 stats->total = idx->nr_objects;
319 stats->processed = processed = 0;
f23c4a66 320
b7c44096 321 while (processed < idx->nr_objects) {
b5b474dd
CMN
322 git_rawobj obj;
323 git_oid oid;
c1af5a39 324 struct git_pack_entry *pentry;
b7c44096 325 git_mwindow *w = NULL;
c85e08b1 326 int i;
b7c44096
CMN
327 off_t entry_start = off;
328 void *packed;
329 size_t entry_size;
b5b474dd 330
b7c44096
CMN
331 entry = git__malloc(sizeof(struct entry));
332 memset(entry, 0x0, sizeof(struct entry));
b5b474dd
CMN
333
334 if (off > UINT31_MAX) {
b7c44096
CMN
335 entry->offset = UINT32_MAX;
336 entry->offset_long = off;
b5b474dd 337 } else {
b7c44096 338 entry->offset = off;
b5b474dd
CMN
339 }
340
341 error = git_packfile_unpack(&obj, idx->pack, &off);
342 if (error < GIT_SUCCESS) {
343 error = git__rethrow(error, "Failed to unpack object");
344 goto cleanup;
345 }
346
c1af5a39 347 /* FIXME: Parse the object instead of hashing it */
18e5b854 348 error = git_odb__hashobj(&oid, &obj);
b5b474dd
CMN
349 if (error < GIT_SUCCESS) {
350 error = git__rethrow(error, "Failed to hash object");
ab525a74
CMN
351 goto cleanup;
352 }
353
c1af5a39
CMN
354 pentry = git__malloc(sizeof(struct git_pack_entry));
355 if (pentry == NULL) {
356 error = GIT_ENOMEM;
357 goto cleanup;
358 }
359 git_oid_cpy(&pentry->sha1, &oid);
360 pentry->offset = entry_start;
361 error = git_vector_insert(&idx->pack->cache, pentry);
362 if (error < GIT_SUCCESS)
363 goto cleanup;
364
b7c44096
CMN
365 git_oid_cpy(&entry->oid, &oid);
366 entry->crc = crc32(0L, Z_NULL, 0);
367
368 entry_size = off - entry_start;
369 packed = git_mwindow_open(mwf, &w, entry_start, entry_size, &left);
370 if (packed == NULL) {
371 error = git__rethrow(error, "Failed to open window to read packed data");
372 goto cleanup;
373 }
374 entry->crc = htonl(crc32(entry->crc, packed, entry_size));
375 git_mwindow_close(&w);
ab525a74 376
b5b474dd 377 /* Add the object to the list */
b7c44096
CMN
378 error = git_vector_insert(&idx->objects, entry);
379 if (error < GIT_SUCCESS) {
380 error = git__rethrow(error, "Failed to add entry to list");
381 goto cleanup;
382 }
b5b474dd
CMN
383
384 for (i = oid.id[0]; i < 256; ++i) {
b7c44096 385 idx->fanout[i]++;
b5b474dd
CMN
386 }
387
3286c408 388 git__free(obj.data);
ab525a74 389
b7c44096 390 stats->processed = ++processed;
ab525a74
CMN
391 }
392
393cleanup:
394 git_mwindow_free_all(mwf);
395
f23c4a66 396 return error;
ab525a74 397
f23c4a66
CMN
398}
399
ab525a74 400void git_indexer_free(git_indexer *idx)
3412391d 401{
b7c44096
CMN
402 unsigned int i;
403 struct entry *e;
c1af5a39 404 struct git_pack_entry *pe;
b7c44096 405
92be7908
CMN
406 if (idx == NULL)
407 return;
408
7d0cdf82 409 p_close(idx->pack->mwf.fd);
b7c44096 410 git_vector_foreach(&idx->objects, i, e)
3286c408 411 git__free(e);
b7c44096 412 git_vector_free(&idx->objects);
c1af5a39 413 git_vector_foreach(&idx->pack->cache, i, pe)
3286c408 414 git__free(pe);
c1af5a39 415 git_vector_free(&idx->pack->cache);
3286c408
VM
416 git__free(idx->pack);
417 git__free(idx);
3412391d 418}
ab525a74 419