]> git.proxmox.com Git - libgit2.git/blob - src/oid.c
Merge pull request #2028 from libgit2/options-names
[libgit2.git] / src / oid.c
1 /*
2 * Copyright (C) the libgit2 contributors. All rights reserved.
3 *
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
6 */
7
8 #include "common.h"
9 #include "git2/oid.h"
10 #include "repository.h"
11 #include <string.h>
12 #include <limits.h>
13
14 static char to_hex[] = "0123456789abcdef";
15
16 static int oid_error_invalid(const char *msg)
17 {
18 giterr_set(GITERR_INVALID, "Unable to parse OID - %s", msg);
19 return -1;
20 }
21
22 int git_oid_fromstrn(git_oid *out, const char *str, size_t length)
23 {
24 size_t p;
25 int v;
26
27 assert(out && str);
28
29 if (!length)
30 return oid_error_invalid("too short");
31
32 if (length > GIT_OID_HEXSZ)
33 return oid_error_invalid("too long");
34
35 memset(out->id, 0, GIT_OID_RAWSZ);
36
37 for (p = 0; p < length; p++) {
38 v = git__fromhex(str[p]);
39 if (v < 0)
40 return oid_error_invalid("contains invalid characters");
41
42 out->id[p / 2] |= (unsigned char)(v << (p % 2 ? 0 : 4));
43 }
44
45 return 0;
46 }
47
48 int git_oid_fromstrp(git_oid *out, const char *str)
49 {
50 return git_oid_fromstrn(out, str, strlen(str));
51 }
52
53 int git_oid_fromstr(git_oid *out, const char *str)
54 {
55 return git_oid_fromstrn(out, str, GIT_OID_HEXSZ);
56 }
57
58 GIT_INLINE(char) *fmt_one(char *str, unsigned int val)
59 {
60 *str++ = to_hex[val >> 4];
61 *str++ = to_hex[val & 0xf];
62 return str;
63 }
64
65 void git_oid_nfmt(char *str, size_t n, const git_oid *oid)
66 {
67 size_t i, max_i;
68
69 if (!oid) {
70 memset(str, 0, n);
71 return;
72 }
73 if (n > GIT_OID_HEXSZ) {
74 memset(&str[GIT_OID_HEXSZ], 0, n - GIT_OID_HEXSZ);
75 n = GIT_OID_HEXSZ;
76 }
77
78 max_i = n / 2;
79
80 for (i = 0; i < max_i; i++)
81 str = fmt_one(str, oid->id[i]);
82
83 if (n & 1)
84 *str++ = to_hex[oid->id[i] >> 4];
85 }
86
87 void git_oid_fmt(char *str, const git_oid *oid)
88 {
89 git_oid_nfmt(str, GIT_OID_HEXSZ, oid);
90 }
91
92 void git_oid_pathfmt(char *str, const git_oid *oid)
93 {
94 size_t i;
95
96 str = fmt_one(str, oid->id[0]);
97 *str++ = '/';
98 for (i = 1; i < sizeof(oid->id); i++)
99 str = fmt_one(str, oid->id[i]);
100 }
101
102 char *git_oid_allocfmt(const git_oid *oid)
103 {
104 char *str = git__malloc(GIT_OID_HEXSZ + 1);
105 if (!str)
106 return NULL;
107 git_oid_nfmt(str, GIT_OID_HEXSZ + 1, oid);
108 return str;
109 }
110
111 char *git_oid_tostr(char *out, size_t n, const git_oid *oid)
112 {
113 if (!out || n == 0)
114 return "";
115
116 if (n > GIT_OID_HEXSZ + 1)
117 n = GIT_OID_HEXSZ + 1;
118
119 git_oid_nfmt(out, n - 1, oid); /* allow room for terminating NUL */
120 out[n - 1] = '\0';
121
122 return out;
123 }
124
125 int git_oid__parse(
126 git_oid *oid, const char **buffer_out,
127 const char *buffer_end, const char *header)
128 {
129 const size_t sha_len = GIT_OID_HEXSZ;
130 const size_t header_len = strlen(header);
131
132 const char *buffer = *buffer_out;
133
134 if (buffer + (header_len + sha_len + 1) > buffer_end)
135 return -1;
136
137 if (memcmp(buffer, header, header_len) != 0)
138 return -1;
139
140 if (buffer[header_len + sha_len] != '\n')
141 return -1;
142
143 if (git_oid_fromstr(oid, buffer + header_len) < 0)
144 return -1;
145
146 *buffer_out = buffer + (header_len + sha_len + 1);
147
148 return 0;
149 }
150
151 void git_oid__writebuf(git_buf *buf, const char *header, const git_oid *oid)
152 {
153 char hex_oid[GIT_OID_HEXSZ];
154
155 git_oid_fmt(hex_oid, oid);
156 git_buf_puts(buf, header);
157 git_buf_put(buf, hex_oid, GIT_OID_HEXSZ);
158 git_buf_putc(buf, '\n');
159 }
160
161 void git_oid_fromraw(git_oid *out, const unsigned char *raw)
162 {
163 memcpy(out->id, raw, sizeof(out->id));
164 }
165
166 void git_oid_cpy(git_oid *out, const git_oid *src)
167 {
168 memcpy(out->id, src->id, sizeof(out->id));
169 }
170
171 int git_oid_cmp(const git_oid *a, const git_oid *b)
172 {
173 return git_oid__cmp(a, b);
174 }
175
176 int git_oid_equal(const git_oid *a, const git_oid *b)
177 {
178 return (git_oid__cmp(a, b) == 0);
179 }
180
181 int git_oid_ncmp(const git_oid *oid_a, const git_oid *oid_b, size_t len)
182 {
183 const unsigned char *a = oid_a->id;
184 const unsigned char *b = oid_b->id;
185
186 if (len > GIT_OID_HEXSZ)
187 len = GIT_OID_HEXSZ;
188
189 while (len > 1) {
190 if (*a != *b)
191 return 1;
192 a++;
193 b++;
194 len -= 2;
195 };
196
197 if (len)
198 if ((*a ^ *b) & 0xf0)
199 return 1;
200
201 return 0;
202 }
203
204 int git_oid_strcmp(const git_oid *oid_a, const char *str)
205 {
206 const unsigned char *a = oid_a->id;
207 unsigned char strval;
208 int hexval;
209
210 for (a = oid_a->id; *str && (a - oid_a->id) < GIT_OID_RAWSZ; ++a) {
211 if ((hexval = git__fromhex(*str++)) < 0)
212 return -1;
213 strval = (unsigned char)(hexval << 4);
214 if (*str) {
215 if ((hexval = git__fromhex(*str++)) < 0)
216 return -1;
217 strval |= hexval;
218 }
219 if (*a != strval)
220 return (*a - strval);
221 }
222
223 return 0;
224 }
225
226 int git_oid_streq(const git_oid *oid_a, const char *str)
227 {
228 return git_oid_strcmp(oid_a, str) == 0 ? 0 : -1;
229 }
230
231 int git_oid_iszero(const git_oid *oid_a)
232 {
233 const unsigned char *a = oid_a->id;
234 unsigned int i;
235 for (i = 0; i < GIT_OID_RAWSZ; ++i, ++a)
236 if (*a != 0)
237 return 0;
238 return 1;
239 }
240
241 typedef short node_index;
242
243 typedef union {
244 const char *tail;
245 node_index children[16];
246 } trie_node;
247
248 struct git_oid_shorten {
249 trie_node *nodes;
250 size_t node_count, size;
251 int min_length, full;
252 };
253
254 static int resize_trie(git_oid_shorten *self, size_t new_size)
255 {
256 self->nodes = git__realloc(self->nodes, new_size * sizeof(trie_node));
257 GITERR_CHECK_ALLOC(self->nodes);
258
259 if (new_size > self->size) {
260 memset(&self->nodes[self->size], 0x0, (new_size - self->size) * sizeof(trie_node));
261 }
262
263 self->size = new_size;
264 return 0;
265 }
266
267 static trie_node *push_leaf(git_oid_shorten *os, node_index idx, int push_at, const char *oid)
268 {
269 trie_node *node, *leaf;
270 node_index idx_leaf;
271
272 if (os->node_count >= os->size) {
273 if (resize_trie(os, os->size * 2) < 0)
274 return NULL;
275 }
276
277 idx_leaf = (node_index)os->node_count++;
278
279 if (os->node_count == SHRT_MAX) {
280 os->full = 1;
281 return NULL;
282 }
283
284 node = &os->nodes[idx];
285 node->children[push_at] = -idx_leaf;
286
287 leaf = &os->nodes[idx_leaf];
288 leaf->tail = oid;
289
290 return node;
291 }
292
293 git_oid_shorten *git_oid_shorten_new(size_t min_length)
294 {
295 git_oid_shorten *os;
296
297 assert((size_t)((int)min_length) == min_length);
298
299 os = git__calloc(1, sizeof(git_oid_shorten));
300 if (os == NULL)
301 return NULL;
302
303 if (resize_trie(os, 16) < 0) {
304 git__free(os);
305 return NULL;
306 }
307
308 os->node_count = 1;
309 os->min_length = (int)min_length;
310
311 return os;
312 }
313
314 void git_oid_shorten_free(git_oid_shorten *os)
315 {
316 if (os == NULL)
317 return;
318
319 git__free(os->nodes);
320 git__free(os);
321 }
322
323
324 /*
325 * What wizardry is this?
326 *
327 * This is just a memory-optimized trie: basically a very fancy
328 * 16-ary tree, which is used to store the prefixes of the OID
329 * strings.
330 *
331 * Read more: http://en.wikipedia.org/wiki/Trie
332 *
333 * Magic that happens in this method:
334 *
335 * - Each node in the trie is an union, so it can work both as
336 * a normal node, or as a leaf.
337 *
338 * - Each normal node points to 16 children (one for each possible
339 * character in the oid). This is *not* stored in an array of
340 * pointers, because in a 64-bit arch this would be sucking
341 * 16*sizeof(void*) = 128 bytes of memory per node, which is
342 * insane. What we do is store Node Indexes, and use these indexes
343 * to look up each node in the om->index array. These indexes are
344 * signed shorts, so this limits the amount of unique OIDs that
345 * fit in the structure to about 20000 (assuming a more or less uniform
346 * distribution).
347 *
348 * - All the nodes in om->index array are stored contiguously in
349 * memory, and each of them is 32 bytes, so we fit 2x nodes per
350 * cache line. Convenient for speed.
351 *
352 * - To differentiate the leafs from the normal nodes, we store all
353 * the indexes towards a leaf as a negative index (indexes to normal
354 * nodes are positives). When we find that one of the children for
355 * a node has a negative value, that means it's going to be a leaf.
356 * This reduces the amount of indexes we have by two, but also reduces
357 * the size of each node by 1-4 bytes (the amount we would need to
358 * add a `is_leaf` field): this is good because it allows the nodes
359 * to fit cleanly in cache lines.
360 *
361 * - Once we reach an empty children, instead of continuing to insert
362 * new nodes for each remaining character of the OID, we store a pointer
363 * to the tail in the leaf; if the leaf is reached again, we turn it
364 * into a normal node and use the tail to create a new leaf.
365 *
366 * This is a pretty good balance between performance and memory usage.
367 */
368 int git_oid_shorten_add(git_oid_shorten *os, const char *text_oid)
369 {
370 int i;
371 bool is_leaf;
372 node_index idx;
373
374 if (os->full) {
375 giterr_set(GITERR_INVALID, "Unable to shorten OID - OID set full");
376 return -1;
377 }
378
379 if (text_oid == NULL)
380 return os->min_length;
381
382 idx = 0;
383 is_leaf = false;
384
385 for (i = 0; i < GIT_OID_HEXSZ; ++i) {
386 int c = git__fromhex(text_oid[i]);
387 trie_node *node;
388
389 if (c == -1) {
390 giterr_set(GITERR_INVALID, "Unable to shorten OID - invalid hex value");
391 return -1;
392 }
393
394 node = &os->nodes[idx];
395
396 if (is_leaf) {
397 const char *tail;
398
399 tail = node->tail;
400 node->tail = NULL;
401
402 node = push_leaf(os, idx, git__fromhex(tail[0]), &tail[1]);
403 if (node == NULL) {
404 if (os->full)
405 giterr_set(GITERR_INVALID, "Unable to shorten OID - OID set full");
406 return -1;
407 }
408 }
409
410 if (node->children[c] == 0) {
411 if (push_leaf(os, idx, c, &text_oid[i + 1]) == NULL) {
412 if (os->full)
413 giterr_set(GITERR_INVALID, "Unable to shorten OID - OID set full");
414 return -1;
415 }
416 break;
417 }
418
419 idx = node->children[c];
420 is_leaf = false;
421
422 if (idx < 0) {
423 node->children[c] = idx = -idx;
424 is_leaf = true;
425 }
426 }
427
428 if (++i > os->min_length)
429 os->min_length = i;
430
431 return os->min_length;
432 }
433