]> git.proxmox.com Git - libgit2.git/blob - src/oid.c
Merge pull request #2654 from linquize/missing-obj
[libgit2.git] / src / oid.c
1 /*
2 * Copyright (C) the libgit2 contributors. All rights reserved.
3 *
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
6 */
7
8 #include "common.h"
9 #include "git2/oid.h"
10 #include "repository.h"
11 #include "global.h"
12 #include <string.h>
13 #include <limits.h>
14
15 static char to_hex[] = "0123456789abcdef";
16
17 static int oid_error_invalid(const char *msg)
18 {
19 giterr_set(GITERR_INVALID, "Unable to parse OID - %s", msg);
20 return -1;
21 }
22
23 int git_oid_fromstrn(git_oid *out, const char *str, size_t length)
24 {
25 size_t p;
26 int v;
27
28 assert(out && str);
29
30 if (!length)
31 return oid_error_invalid("too short");
32
33 if (length > GIT_OID_HEXSZ)
34 return oid_error_invalid("too long");
35
36 memset(out->id, 0, GIT_OID_RAWSZ);
37
38 for (p = 0; p < length; p++) {
39 v = git__fromhex(str[p]);
40 if (v < 0)
41 return oid_error_invalid("contains invalid characters");
42
43 out->id[p / 2] |= (unsigned char)(v << (p % 2 ? 0 : 4));
44 }
45
46 return 0;
47 }
48
49 int git_oid_fromstrp(git_oid *out, const char *str)
50 {
51 return git_oid_fromstrn(out, str, strlen(str));
52 }
53
54 int git_oid_fromstr(git_oid *out, const char *str)
55 {
56 return git_oid_fromstrn(out, str, GIT_OID_HEXSZ);
57 }
58
59 GIT_INLINE(char) *fmt_one(char *str, unsigned int val)
60 {
61 *str++ = to_hex[val >> 4];
62 *str++ = to_hex[val & 0xf];
63 return str;
64 }
65
66 void git_oid_nfmt(char *str, size_t n, const git_oid *oid)
67 {
68 size_t i, max_i;
69
70 if (!oid) {
71 memset(str, 0, n);
72 return;
73 }
74 if (n > GIT_OID_HEXSZ) {
75 memset(&str[GIT_OID_HEXSZ], 0, n - GIT_OID_HEXSZ);
76 n = GIT_OID_HEXSZ;
77 }
78
79 max_i = n / 2;
80
81 for (i = 0; i < max_i; i++)
82 str = fmt_one(str, oid->id[i]);
83
84 if (n & 1)
85 *str++ = to_hex[oid->id[i] >> 4];
86 }
87
88 void git_oid_fmt(char *str, const git_oid *oid)
89 {
90 git_oid_nfmt(str, GIT_OID_HEXSZ, oid);
91 }
92
93 void git_oid_pathfmt(char *str, const git_oid *oid)
94 {
95 size_t i;
96
97 str = fmt_one(str, oid->id[0]);
98 *str++ = '/';
99 for (i = 1; i < sizeof(oid->id); i++)
100 str = fmt_one(str, oid->id[i]);
101 }
102
103 char *git_oid_tostr_s(const git_oid *oid)
104 {
105 char *str = GIT_GLOBAL->oid_fmt;
106 git_oid_nfmt(str, GIT_OID_HEXSZ + 1, oid);
107 return str;
108 }
109
110 char *git_oid_allocfmt(const git_oid *oid)
111 {
112 char *str = git__malloc(GIT_OID_HEXSZ + 1);
113 if (!str)
114 return NULL;
115 git_oid_nfmt(str, GIT_OID_HEXSZ + 1, oid);
116 return str;
117 }
118
119 char *git_oid_tostr(char *out, size_t n, const git_oid *oid)
120 {
121 if (!out || n == 0)
122 return "";
123
124 if (n > GIT_OID_HEXSZ + 1)
125 n = GIT_OID_HEXSZ + 1;
126
127 git_oid_nfmt(out, n - 1, oid); /* allow room for terminating NUL */
128 out[n - 1] = '\0';
129
130 return out;
131 }
132
133 int git_oid__parse(
134 git_oid *oid, const char **buffer_out,
135 const char *buffer_end, const char *header)
136 {
137 const size_t sha_len = GIT_OID_HEXSZ;
138 const size_t header_len = strlen(header);
139
140 const char *buffer = *buffer_out;
141
142 if (buffer + (header_len + sha_len + 1) > buffer_end)
143 return -1;
144
145 if (memcmp(buffer, header, header_len) != 0)
146 return -1;
147
148 if (buffer[header_len + sha_len] != '\n')
149 return -1;
150
151 if (git_oid_fromstr(oid, buffer + header_len) < 0)
152 return -1;
153
154 *buffer_out = buffer + (header_len + sha_len + 1);
155
156 return 0;
157 }
158
159 void git_oid__writebuf(git_buf *buf, const char *header, const git_oid *oid)
160 {
161 char hex_oid[GIT_OID_HEXSZ];
162
163 git_oid_fmt(hex_oid, oid);
164 git_buf_puts(buf, header);
165 git_buf_put(buf, hex_oid, GIT_OID_HEXSZ);
166 git_buf_putc(buf, '\n');
167 }
168
169 void git_oid_fromraw(git_oid *out, const unsigned char *raw)
170 {
171 memcpy(out->id, raw, sizeof(out->id));
172 }
173
174 void git_oid_cpy(git_oid *out, const git_oid *src)
175 {
176 memcpy(out->id, src->id, sizeof(out->id));
177 }
178
179 int git_oid_cmp(const git_oid *a, const git_oid *b)
180 {
181 return git_oid__cmp(a, b);
182 }
183
184 int git_oid_equal(const git_oid *a, const git_oid *b)
185 {
186 return (git_oid__cmp(a, b) == 0);
187 }
188
189 int git_oid_ncmp(const git_oid *oid_a, const git_oid *oid_b, size_t len)
190 {
191 const unsigned char *a = oid_a->id;
192 const unsigned char *b = oid_b->id;
193
194 if (len > GIT_OID_HEXSZ)
195 len = GIT_OID_HEXSZ;
196
197 while (len > 1) {
198 if (*a != *b)
199 return 1;
200 a++;
201 b++;
202 len -= 2;
203 };
204
205 if (len)
206 if ((*a ^ *b) & 0xf0)
207 return 1;
208
209 return 0;
210 }
211
212 int git_oid_strcmp(const git_oid *oid_a, const char *str)
213 {
214 const unsigned char *a;
215 unsigned char strval;
216 int hexval;
217
218 for (a = oid_a->id; *str && (a - oid_a->id) < GIT_OID_RAWSZ; ++a) {
219 if ((hexval = git__fromhex(*str++)) < 0)
220 return -1;
221 strval = (unsigned char)(hexval << 4);
222 if (*str) {
223 if ((hexval = git__fromhex(*str++)) < 0)
224 return -1;
225 strval |= hexval;
226 }
227 if (*a != strval)
228 return (*a - strval);
229 }
230
231 return 0;
232 }
233
234 int git_oid_streq(const git_oid *oid_a, const char *str)
235 {
236 return git_oid_strcmp(oid_a, str) == 0 ? 0 : -1;
237 }
238
239 int git_oid_iszero(const git_oid *oid_a)
240 {
241 const unsigned char *a = oid_a->id;
242 unsigned int i;
243 for (i = 0; i < GIT_OID_RAWSZ; ++i, ++a)
244 if (*a != 0)
245 return 0;
246 return 1;
247 }
248
249 typedef short node_index;
250
251 typedef union {
252 const char *tail;
253 node_index children[16];
254 } trie_node;
255
256 struct git_oid_shorten {
257 trie_node *nodes;
258 size_t node_count, size;
259 int min_length, full;
260 };
261
262 static int resize_trie(git_oid_shorten *self, size_t new_size)
263 {
264 self->nodes = git__realloc(self->nodes, new_size * sizeof(trie_node));
265 GITERR_CHECK_ALLOC(self->nodes);
266
267 if (new_size > self->size) {
268 memset(&self->nodes[self->size], 0x0, (new_size - self->size) * sizeof(trie_node));
269 }
270
271 self->size = new_size;
272 return 0;
273 }
274
275 static trie_node *push_leaf(git_oid_shorten *os, node_index idx, int push_at, const char *oid)
276 {
277 trie_node *node, *leaf;
278 node_index idx_leaf;
279
280 if (os->node_count >= os->size) {
281 if (resize_trie(os, os->size * 2) < 0)
282 return NULL;
283 }
284
285 idx_leaf = (node_index)os->node_count++;
286
287 if (os->node_count == SHRT_MAX) {
288 os->full = 1;
289 return NULL;
290 }
291
292 node = &os->nodes[idx];
293 node->children[push_at] = -idx_leaf;
294
295 leaf = &os->nodes[idx_leaf];
296 leaf->tail = oid;
297
298 return node;
299 }
300
301 git_oid_shorten *git_oid_shorten_new(size_t min_length)
302 {
303 git_oid_shorten *os;
304
305 assert((size_t)((int)min_length) == min_length);
306
307 os = git__calloc(1, sizeof(git_oid_shorten));
308 if (os == NULL)
309 return NULL;
310
311 if (resize_trie(os, 16) < 0) {
312 git__free(os);
313 return NULL;
314 }
315
316 os->node_count = 1;
317 os->min_length = (int)min_length;
318
319 return os;
320 }
321
322 void git_oid_shorten_free(git_oid_shorten *os)
323 {
324 if (os == NULL)
325 return;
326
327 git__free(os->nodes);
328 git__free(os);
329 }
330
331
332 /*
333 * What wizardry is this?
334 *
335 * This is just a memory-optimized trie: basically a very fancy
336 * 16-ary tree, which is used to store the prefixes of the OID
337 * strings.
338 *
339 * Read more: http://en.wikipedia.org/wiki/Trie
340 *
341 * Magic that happens in this method:
342 *
343 * - Each node in the trie is an union, so it can work both as
344 * a normal node, or as a leaf.
345 *
346 * - Each normal node points to 16 children (one for each possible
347 * character in the oid). This is *not* stored in an array of
348 * pointers, because in a 64-bit arch this would be sucking
349 * 16*sizeof(void*) = 128 bytes of memory per node, which is
350 * insane. What we do is store Node Indexes, and use these indexes
351 * to look up each node in the om->index array. These indexes are
352 * signed shorts, so this limits the amount of unique OIDs that
353 * fit in the structure to about 20000 (assuming a more or less uniform
354 * distribution).
355 *
356 * - All the nodes in om->index array are stored contiguously in
357 * memory, and each of them is 32 bytes, so we fit 2x nodes per
358 * cache line. Convenient for speed.
359 *
360 * - To differentiate the leafs from the normal nodes, we store all
361 * the indexes towards a leaf as a negative index (indexes to normal
362 * nodes are positives). When we find that one of the children for
363 * a node has a negative value, that means it's going to be a leaf.
364 * This reduces the amount of indexes we have by two, but also reduces
365 * the size of each node by 1-4 bytes (the amount we would need to
366 * add a `is_leaf` field): this is good because it allows the nodes
367 * to fit cleanly in cache lines.
368 *
369 * - Once we reach an empty children, instead of continuing to insert
370 * new nodes for each remaining character of the OID, we store a pointer
371 * to the tail in the leaf; if the leaf is reached again, we turn it
372 * into a normal node and use the tail to create a new leaf.
373 *
374 * This is a pretty good balance between performance and memory usage.
375 */
376 int git_oid_shorten_add(git_oid_shorten *os, const char *text_oid)
377 {
378 int i;
379 bool is_leaf;
380 node_index idx;
381
382 if (os->full) {
383 giterr_set(GITERR_INVALID, "Unable to shorten OID - OID set full");
384 return -1;
385 }
386
387 if (text_oid == NULL)
388 return os->min_length;
389
390 idx = 0;
391 is_leaf = false;
392
393 for (i = 0; i < GIT_OID_HEXSZ; ++i) {
394 int c = git__fromhex(text_oid[i]);
395 trie_node *node;
396
397 if (c == -1) {
398 giterr_set(GITERR_INVALID, "Unable to shorten OID - invalid hex value");
399 return -1;
400 }
401
402 node = &os->nodes[idx];
403
404 if (is_leaf) {
405 const char *tail;
406
407 tail = node->tail;
408 node->tail = NULL;
409
410 node = push_leaf(os, idx, git__fromhex(tail[0]), &tail[1]);
411 if (node == NULL) {
412 if (os->full)
413 giterr_set(GITERR_INVALID, "Unable to shorten OID - OID set full");
414 return -1;
415 }
416 }
417
418 if (node->children[c] == 0) {
419 if (push_leaf(os, idx, c, &text_oid[i + 1]) == NULL) {
420 if (os->full)
421 giterr_set(GITERR_INVALID, "Unable to shorten OID - OID set full");
422 return -1;
423 }
424 break;
425 }
426
427 idx = node->children[c];
428 is_leaf = false;
429
430 if (idx < 0) {
431 node->children[c] = idx = -idx;
432 is_leaf = true;
433 }
434 }
435
436 if (++i > os->min_length)
437 os->min_length = i;
438
439 return os->min_length;
440 }
441