]> git.proxmox.com Git - libgit2.git/blame - src/oid.c
remote: create FETCH_HEAD with a refspecless remote
[libgit2.git] / src / oid.c
CommitLineData
c15648cb 1/*
359fc2d2 2 * Copyright (C) the libgit2 contributors. All rights reserved.
c15648cb 3 *
bb742ede
VM
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
c15648cb
SP
6 */
7
b3be0fc7 8#include "common.h"
44908fe7 9#include "git2/oid.h"
58519018 10#include "repository.h"
c15648cb 11#include <string.h>
26022f07 12#include <limits.h>
c15648cb 13
af795e49 14static char to_hex[] = "0123456789abcdef";
c15648cb 15
7c7ff7d1
RB
16static int oid_error_invalid(const char *msg)
17{
18 giterr_set(GITERR_INVALID, "Unable to parse OID - %s", msg);
19 return -1;
20}
21
d5afc039 22int git_oid_fromstrn(git_oid *out, const char *str, size_t length)
c15648cb 23{
0ef9d2aa 24 size_t p;
e724b058 25 int v;
d5afc039
VM
26
27 if (length > GIT_OID_HEXSZ)
13640d1b 28 return oid_error_invalid("too long");
d5afc039 29
e724b058 30 for (p = 0; p < length - 1; p += 2) {
eb8de747 31 v = (git__fromhex(str[p + 0]) << 4)
32 | git__fromhex(str[p + 1]);
d5afc039 33
c15648cb 34 if (v < 0)
7c7ff7d1 35 return oid_error_invalid("contains invalid characters");
d5afc039
VM
36
37 out->id[p / 2] = (unsigned char)v;
c15648cb 38 }
d5afc039 39
e724b058 40 if (length % 2) {
eb8de747 41 v = (git__fromhex(str[p + 0]) << 4);
0e058e78 42 if (v < 0)
7c7ff7d1 43 return oid_error_invalid("contains invalid characters");
0e058e78 44
e724b058
DI
45 out->id[p / 2] = (unsigned char)v;
46 p += 2;
47 }
48
6d8d3f19 49 memset(out->id + p / 2, 0, (GIT_OID_HEXSZ - p) / 2);
d5afc039 50
7c7ff7d1 51 return 0;
c15648cb 52}
af795e49 53
0c8efb38
X
54int git_oid_fromstrp(git_oid *out, const char *str)
55{
1e7b7523 56 return git_oid_fromstrn(out, str, strlen(str));
0c8efb38
X
57}
58
d5afc039
VM
59int git_oid_fromstr(git_oid *out, const char *str)
60{
61 return git_oid_fromstrn(out, str, GIT_OID_HEXSZ);
62}
63
213e720c 64GIT_INLINE(char) *fmt_one(char *str, unsigned int val)
af795e49
SP
65{
66 *str++ = to_hex[val >> 4];
67 *str++ = to_hex[val & 0xf];
68 return str;
69}
70
660d59ca 71void git_oid_nfmt(char *str, size_t n, const git_oid *oid)
af795e49 72{
660d59ca
RB
73 size_t i, max_i;
74
75 if (!oid) {
76 memset(str, 0, n);
77 return;
78 }
79 if (n > GIT_OID_HEXSZ) {
80 memset(&str[GIT_OID_HEXSZ], 0, n - GIT_OID_HEXSZ);
81 n = GIT_OID_HEXSZ;
82 }
83
84 max_i = n / 2;
af795e49 85
660d59ca 86 for (i = 0; i < max_i; i++)
af795e49 87 str = fmt_one(str, oid->id[i]);
660d59ca
RB
88
89 if (n & 1)
90 *str++ = to_hex[oid->id[i] >> 4];
91}
92
93void git_oid_fmt(char *str, const git_oid *oid)
94{
95 git_oid_nfmt(str, GIT_OID_HEXSZ, oid);
af795e49
SP
96}
97
98void git_oid_pathfmt(char *str, const git_oid *oid)
99{
0ef9d2aa 100 size_t i;
af795e49
SP
101
102 str = fmt_one(str, oid->id[0]);
103 *str++ = '/';
104 for (i = 1; i < sizeof(oid->id); i++)
105 str = fmt_one(str, oid->id[i]);
106}
107
108char *git_oid_allocfmt(const git_oid *oid)
109{
64a47c01 110 char *str = git__malloc(GIT_OID_HEXSZ + 1);
af795e49
SP
111 if (!str)
112 return NULL;
660d59ca 113 git_oid_nfmt(str, GIT_OID_HEXSZ + 1, oid);
af795e49
SP
114 return str;
115}
960ca1d7 116
5621d809 117char *git_oid_tostr(char *out, size_t n, const git_oid *oid)
960ca1d7 118{
8fe713cc 119 if (!out || n == 0)
960ca1d7
RJ
120 return "";
121
660d59ca
RB
122 if (n > GIT_OID_HEXSZ + 1)
123 n = GIT_OID_HEXSZ + 1;
960ca1d7 124
660d59ca
RB
125 git_oid_nfmt(out, n - 1, oid); /* allow room for terminating NUL */
126 out[n - 1] = '\0';
960ca1d7
RJ
127
128 return out;
129}
130
7c7ff7d1
RB
131int git_oid__parse(
132 git_oid *oid, const char **buffer_out,
133 const char *buffer_end, const char *header)
58519018
VM
134{
135 const size_t sha_len = GIT_OID_HEXSZ;
136 const size_t header_len = strlen(header);
137
720d5472 138 const char *buffer = *buffer_out;
58519018
VM
139
140 if (buffer + (header_len + sha_len + 1) > buffer_end)
73fe6a8e 141 return -1;
58519018
VM
142
143 if (memcmp(buffer, header, header_len) != 0)
73fe6a8e 144 return -1;
58519018
VM
145
146 if (buffer[header_len + sha_len] != '\n')
73fe6a8e 147 return -1;
58519018 148
7c7ff7d1
RB
149 if (git_oid_fromstr(oid, buffer + header_len) < 0)
150 return -1;
58519018
VM
151
152 *buffer_out = buffer + (header_len + sha_len + 1);
153
7c7ff7d1 154 return 0;
58519018
VM
155}
156
afeecf4f
VM
157void git_oid__writebuf(git_buf *buf, const char *header, const git_oid *oid)
158{
159 char hex_oid[GIT_OID_HEXSZ];
160
161 git_oid_fmt(hex_oid, oid);
162 git_buf_puts(buf, header);
163 git_buf_put(buf, hex_oid, GIT_OID_HEXSZ);
164 git_buf_putc(buf, '\n');
165}
166
fa48608e 167void git_oid_fromraw(git_oid *out, const unsigned char *raw)
d12299fe
VM
168{
169 memcpy(out->id, raw, sizeof(out->id));
170}
171
172void git_oid_cpy(git_oid *out, const git_oid *src)
173{
174 memcpy(out->id, src->id, sizeof(out->id));
175}
176
b7f167da 177int git_oid_cmp(const git_oid *a, const git_oid *b)
0c72248b 178{
b7f167da 179 return git_oid__cmp(a, b);
0c72248b
RB
180}
181
b8457baa 182int git_oid_ncmp(const git_oid *oid_a, const git_oid *oid_b, size_t len)
53c0bd81 183{
f1d01851
VM
184 const unsigned char *a = oid_a->id;
185 const unsigned char *b = oid_b->id;
186
8564a022
RB
187 if (len > GIT_OID_HEXSZ)
188 len = GIT_OID_HEXSZ;
189
190 while (len > 1) {
f1d01851
VM
191 if (*a != *b)
192 return 1;
193 a++;
194 b++;
195 len -= 2;
8564a022 196 };
f1d01851
VM
197
198 if (len)
199 if ((*a ^ *b) & 0xf0)
200 return 1;
201
202 return 0;
53c0bd81
MP
203}
204
aa8f0101 205int git_oid_strcmp(const git_oid *oid_a, const char *str)
34aff010 206{
aa8f0101
RB
207 const unsigned char *a = oid_a->id;
208 unsigned char strval;
209 int hexval;
210
211 for (a = oid_a->id; *str && (a - oid_a->id) < GIT_OID_RAWSZ; ++a) {
212 if ((hexval = git__fromhex(*str++)) < 0)
213 return -1;
66566516 214 strval = (unsigned char)(hexval << 4);
aa8f0101
RB
215 if (*str) {
216 if ((hexval = git__fromhex(*str++)) < 0)
217 return -1;
218 strval |= hexval;
219 }
220 if (*a != strval)
221 return (*a - strval);
222 }
34aff010 223
aa8f0101
RB
224 return 0;
225}
34aff010 226
aa8f0101
RB
227int git_oid_streq(const git_oid *oid_a, const char *str)
228{
229 return git_oid_strcmp(oid_a, str) == 0 ? 0 : -1;
34aff010 230}
231
74fa4bfa
RB
232int git_oid_iszero(const git_oid *oid_a)
233{
234 const unsigned char *a = oid_a->id;
235 unsigned int i;
236 for (i = 0; i < GIT_OID_RAWSZ; ++i, ++a)
237 if (*a != 0)
238 return 0;
239 return 1;
240}
241
26022f07
VM
242typedef short node_index;
243
244typedef union {
245 const char *tail;
246 node_index children[16];
247} trie_node;
248
249struct git_oid_shorten {
250 trie_node *nodes;
251 size_t node_count, size;
252 int min_length, full;
253};
254
255static int resize_trie(git_oid_shorten *self, size_t new_size)
256{
3286c408 257 self->nodes = git__realloc(self->nodes, new_size * sizeof(trie_node));
7c7ff7d1 258 GITERR_CHECK_ALLOC(self->nodes);
26022f07
VM
259
260 if (new_size > self->size) {
261 memset(&self->nodes[self->size], 0x0, (new_size - self->size) * sizeof(trie_node));
262 }
263
264 self->size = new_size;
7c7ff7d1 265 return 0;
26022f07
VM
266}
267
268static trie_node *push_leaf(git_oid_shorten *os, node_index idx, int push_at, const char *oid)
269{
270 trie_node *node, *leaf;
271 node_index idx_leaf;
272
273 if (os->node_count >= os->size) {
7c7ff7d1 274 if (resize_trie(os, os->size * 2) < 0)
26022f07
VM
275 return NULL;
276 }
277
278 idx_leaf = (node_index)os->node_count++;
279
52f537e9 280 if (os->node_count == SHRT_MAX) {
26022f07 281 os->full = 1;
52f537e9
AW
282 return NULL;
283 }
26022f07
VM
284
285 node = &os->nodes[idx];
286 node->children[push_at] = -idx_leaf;
287
288 leaf = &os->nodes[idx_leaf];
289 leaf->tail = oid;
290
291 return node;
292}
293
294git_oid_shorten *git_oid_shorten_new(size_t min_length)
295{
296 git_oid_shorten *os;
297
44ef8b1b
RB
298 assert((size_t)((int)min_length) == min_length);
299
7c7ff7d1 300 os = git__calloc(1, sizeof(git_oid_shorten));
26022f07
VM
301 if (os == NULL)
302 return NULL;
303
7c7ff7d1 304 if (resize_trie(os, 16) < 0) {
3286c408 305 git__free(os);
26022f07
VM
306 return NULL;
307 }
308
309 os->node_count = 1;
44ef8b1b 310 os->min_length = (int)min_length;
26022f07
VM
311
312 return os;
313}
314
315void git_oid_shorten_free(git_oid_shorten *os)
316{
3286c408
VM
317 git__free(os->nodes);
318 git__free(os);
26022f07
VM
319}
320
321
322/*
323 * What wizardry is this?
324 *
325 * This is just a memory-optimized trie: basically a very fancy
326 * 16-ary tree, which is used to store the prefixes of the OID
327 * strings.
328 *
329 * Read more: http://en.wikipedia.org/wiki/Trie
330 *
331 * Magic that happens in this method:
332 *
333 * - Each node in the trie is an union, so it can work both as
334 * a normal node, or as a leaf.
335 *
336 * - Each normal node points to 16 children (one for each possible
337 * character in the oid). This is *not* stored in an array of
932d1baf 338 * pointers, because in a 64-bit arch this would be sucking
826bc4a8 339 * 16*sizeof(void*) = 128 bytes of memory per node, which is
26022f07
VM
340 * insane. What we do is store Node Indexes, and use these indexes
341 * to look up each node in the om->index array. These indexes are
342 * signed shorts, so this limits the amount of unique OIDs that
343 * fit in the structure to about 20000 (assuming a more or less uniform
344 * distribution).
345 *
346 * - All the nodes in om->index array are stored contiguously in
347 * memory, and each of them is 32 bytes, so we fit 2x nodes per
348 * cache line. Convenient for speed.
349 *
350 * - To differentiate the leafs from the normal nodes, we store all
351 * the indexes towards a leaf as a negative index (indexes to normal
352 * nodes are positives). When we find that one of the children for
353 * a node has a negative value, that means it's going to be a leaf.
354 * This reduces the amount of indexes we have by two, but also reduces
355 * the size of each node by 1-4 bytes (the amount we would need to
356 * add a `is_leaf` field): this is good because it allows the nodes
357 * to fit cleanly in cache lines.
358 *
359 * - Once we reach an empty children, instead of continuing to insert
360 * new nodes for each remaining character of the OID, we store a pointer
361 * to the tail in the leaf; if the leaf is reached again, we turn it
362 * into a normal node and use the tail to create a new leaf.
363 *
364 * This is a pretty good balance between performance and memory usage.
365 */
366int git_oid_shorten_add(git_oid_shorten *os, const char *text_oid)
367{
44ef8b1b
RB
368 int i;
369 bool is_leaf;
26022f07
VM
370 node_index idx;
371
d45e9480
L
372 if (os->full) {
373 giterr_set(GITERR_INVALID, "Unable to shorten OID - OID set full");
7c7ff7d1 374 return -1;
d45e9480 375 }
26022f07 376
f0ab9fda
VM
377 if (text_oid == NULL)
378 return os->min_length;
379
26022f07 380 idx = 0;
44ef8b1b 381 is_leaf = false;
26022f07
VM
382
383 for (i = 0; i < GIT_OID_HEXSZ; ++i) {
eb8de747 384 int c = git__fromhex(text_oid[i]);
26022f07
VM
385 trie_node *node;
386
7c7ff7d1
RB
387 if (c == -1) {
388 giterr_set(GITERR_INVALID, "Unable to shorten OID - invalid hex value");
389 return -1;
390 }
26022f07
VM
391
392 node = &os->nodes[idx];
393
394 if (is_leaf) {
395 const char *tail;
396
397 tail = node->tail;
398 node->tail = NULL;
399
eb8de747 400 node = push_leaf(os, idx, git__fromhex(tail[0]), &tail[1]);
d45e9480
L
401 if (node == NULL) {
402 if (os->full)
403 giterr_set(GITERR_INVALID, "Unable to shorten OID - OID set full");
404 return -1;
405 }
26022f07
VM
406 }
407
408 if (node->children[c] == 0) {
d45e9480
L
409 if (push_leaf(os, idx, c, &text_oid[i + 1]) == NULL) {
410 if (os->full)
411 giterr_set(GITERR_INVALID, "Unable to shorten OID - OID set full");
7c7ff7d1 412 return -1;
d45e9480 413 }
26022f07
VM
414 break;
415 }
416
417 idx = node->children[c];
44ef8b1b 418 is_leaf = false;
26022f07
VM
419
420 if (idx < 0) {
421 node->children[c] = idx = -idx;
44ef8b1b 422 is_leaf = true;
26022f07
VM
423 }
424 }
425
426 if (++i > os->min_length)
427 os->min_length = i;
428
429 return os->min_length;
430}
431