]> git.proxmox.com Git - libgit2.git/blob - src/oid.c
Merge pull request #1819 from linquize/git_oid_shorten_add
[libgit2.git] / src / oid.c
1 /*
2 * Copyright (C) the libgit2 contributors. All rights reserved.
3 *
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
6 */
7
8 #include "common.h"
9 #include "git2/oid.h"
10 #include "repository.h"
11 #include <string.h>
12 #include <limits.h>
13
14 static char to_hex[] = "0123456789abcdef";
15
16 static int oid_error_invalid(const char *msg)
17 {
18 giterr_set(GITERR_INVALID, "Unable to parse OID - %s", msg);
19 return -1;
20 }
21
22 int git_oid_fromstrn(git_oid *out, const char *str, size_t length)
23 {
24 size_t p;
25 int v;
26
27 if (length > GIT_OID_HEXSZ)
28 return oid_error_invalid("too long");
29
30 for (p = 0; p < length - 1; p += 2) {
31 v = (git__fromhex(str[p + 0]) << 4)
32 | git__fromhex(str[p + 1]);
33
34 if (v < 0)
35 return oid_error_invalid("contains invalid characters");
36
37 out->id[p / 2] = (unsigned char)v;
38 }
39
40 if (length % 2) {
41 v = (git__fromhex(str[p + 0]) << 4);
42 if (v < 0)
43 return oid_error_invalid("contains invalid characters");
44
45 out->id[p / 2] = (unsigned char)v;
46 p += 2;
47 }
48
49 memset(out->id + p / 2, 0, (GIT_OID_HEXSZ - p) / 2);
50
51 return 0;
52 }
53
54 int git_oid_fromstrp(git_oid *out, const char *str)
55 {
56 return git_oid_fromstrn(out, str, strlen(str));
57 }
58
59 int git_oid_fromstr(git_oid *out, const char *str)
60 {
61 return git_oid_fromstrn(out, str, GIT_OID_HEXSZ);
62 }
63
64 GIT_INLINE(char) *fmt_one(char *str, unsigned int val)
65 {
66 *str++ = to_hex[val >> 4];
67 *str++ = to_hex[val & 0xf];
68 return str;
69 }
70
71 void git_oid_nfmt(char *str, size_t n, const git_oid *oid)
72 {
73 size_t i, max_i;
74
75 if (!oid) {
76 memset(str, 0, n);
77 return;
78 }
79 if (n > GIT_OID_HEXSZ) {
80 memset(&str[GIT_OID_HEXSZ], 0, n - GIT_OID_HEXSZ);
81 n = GIT_OID_HEXSZ;
82 }
83
84 max_i = n / 2;
85
86 for (i = 0; i < max_i; i++)
87 str = fmt_one(str, oid->id[i]);
88
89 if (n & 1)
90 *str++ = to_hex[oid->id[i] >> 4];
91 }
92
93 void git_oid_fmt(char *str, const git_oid *oid)
94 {
95 git_oid_nfmt(str, GIT_OID_HEXSZ, oid);
96 }
97
98 void git_oid_pathfmt(char *str, const git_oid *oid)
99 {
100 size_t i;
101
102 str = fmt_one(str, oid->id[0]);
103 *str++ = '/';
104 for (i = 1; i < sizeof(oid->id); i++)
105 str = fmt_one(str, oid->id[i]);
106 }
107
108 char *git_oid_allocfmt(const git_oid *oid)
109 {
110 char *str = git__malloc(GIT_OID_HEXSZ + 1);
111 if (!str)
112 return NULL;
113 git_oid_nfmt(str, GIT_OID_HEXSZ + 1, oid);
114 return str;
115 }
116
117 char *git_oid_tostr(char *out, size_t n, const git_oid *oid)
118 {
119 if (!out || n == 0)
120 return "";
121
122 if (n > GIT_OID_HEXSZ + 1)
123 n = GIT_OID_HEXSZ + 1;
124
125 git_oid_nfmt(out, n - 1, oid); /* allow room for terminating NUL */
126 out[n - 1] = '\0';
127
128 return out;
129 }
130
131 int git_oid__parse(
132 git_oid *oid, const char **buffer_out,
133 const char *buffer_end, const char *header)
134 {
135 const size_t sha_len = GIT_OID_HEXSZ;
136 const size_t header_len = strlen(header);
137
138 const char *buffer = *buffer_out;
139
140 if (buffer + (header_len + sha_len + 1) > buffer_end)
141 return -1;
142
143 if (memcmp(buffer, header, header_len) != 0)
144 return -1;
145
146 if (buffer[header_len + sha_len] != '\n')
147 return -1;
148
149 if (git_oid_fromstr(oid, buffer + header_len) < 0)
150 return -1;
151
152 *buffer_out = buffer + (header_len + sha_len + 1);
153
154 return 0;
155 }
156
157 void git_oid__writebuf(git_buf *buf, const char *header, const git_oid *oid)
158 {
159 char hex_oid[GIT_OID_HEXSZ];
160
161 git_oid_fmt(hex_oid, oid);
162 git_buf_puts(buf, header);
163 git_buf_put(buf, hex_oid, GIT_OID_HEXSZ);
164 git_buf_putc(buf, '\n');
165 }
166
167 void git_oid_fromraw(git_oid *out, const unsigned char *raw)
168 {
169 memcpy(out->id, raw, sizeof(out->id));
170 }
171
172 void git_oid_cpy(git_oid *out, const git_oid *src)
173 {
174 memcpy(out->id, src->id, sizeof(out->id));
175 }
176
177 int git_oid_cmp(const git_oid *a, const git_oid *b)
178 {
179 return git_oid__cmp(a, b);
180 }
181
182 int git_oid_ncmp(const git_oid *oid_a, const git_oid *oid_b, size_t len)
183 {
184 const unsigned char *a = oid_a->id;
185 const unsigned char *b = oid_b->id;
186
187 if (len > GIT_OID_HEXSZ)
188 len = GIT_OID_HEXSZ;
189
190 while (len > 1) {
191 if (*a != *b)
192 return 1;
193 a++;
194 b++;
195 len -= 2;
196 };
197
198 if (len)
199 if ((*a ^ *b) & 0xf0)
200 return 1;
201
202 return 0;
203 }
204
205 int git_oid_strcmp(const git_oid *oid_a, const char *str)
206 {
207 const unsigned char *a = oid_a->id;
208 unsigned char strval;
209 int hexval;
210
211 for (a = oid_a->id; *str && (a - oid_a->id) < GIT_OID_RAWSZ; ++a) {
212 if ((hexval = git__fromhex(*str++)) < 0)
213 return -1;
214 strval = hexval << 4;
215 if (*str) {
216 if ((hexval = git__fromhex(*str++)) < 0)
217 return -1;
218 strval |= hexval;
219 }
220 if (*a != strval)
221 return (*a - strval);
222 }
223
224 return 0;
225 }
226
227 int git_oid_streq(const git_oid *oid_a, const char *str)
228 {
229 return git_oid_strcmp(oid_a, str) == 0 ? 0 : -1;
230 }
231
232 int git_oid_iszero(const git_oid *oid_a)
233 {
234 const unsigned char *a = oid_a->id;
235 unsigned int i;
236 for (i = 0; i < GIT_OID_RAWSZ; ++i, ++a)
237 if (*a != 0)
238 return 0;
239 return 1;
240 }
241
242 typedef short node_index;
243
244 typedef union {
245 const char *tail;
246 node_index children[16];
247 } trie_node;
248
249 struct git_oid_shorten {
250 trie_node *nodes;
251 size_t node_count, size;
252 int min_length, full;
253 };
254
255 static int resize_trie(git_oid_shorten *self, size_t new_size)
256 {
257 self->nodes = git__realloc(self->nodes, new_size * sizeof(trie_node));
258 GITERR_CHECK_ALLOC(self->nodes);
259
260 if (new_size > self->size) {
261 memset(&self->nodes[self->size], 0x0, (new_size - self->size) * sizeof(trie_node));
262 }
263
264 self->size = new_size;
265 return 0;
266 }
267
268 static trie_node *push_leaf(git_oid_shorten *os, node_index idx, int push_at, const char *oid)
269 {
270 trie_node *node, *leaf;
271 node_index idx_leaf;
272
273 if (os->node_count >= os->size) {
274 if (resize_trie(os, os->size * 2) < 0)
275 return NULL;
276 }
277
278 idx_leaf = (node_index)os->node_count++;
279
280 if (os->node_count == SHRT_MAX) {
281 os->full = 1;
282 return NULL;
283 }
284
285 node = &os->nodes[idx];
286 node->children[push_at] = -idx_leaf;
287
288 leaf = &os->nodes[idx_leaf];
289 leaf->tail = oid;
290
291 return node;
292 }
293
294 git_oid_shorten *git_oid_shorten_new(size_t min_length)
295 {
296 git_oid_shorten *os;
297
298 assert((size_t)((int)min_length) == min_length);
299
300 os = git__calloc(1, sizeof(git_oid_shorten));
301 if (os == NULL)
302 return NULL;
303
304 if (resize_trie(os, 16) < 0) {
305 git__free(os);
306 return NULL;
307 }
308
309 os->node_count = 1;
310 os->min_length = (int)min_length;
311
312 return os;
313 }
314
315 void git_oid_shorten_free(git_oid_shorten *os)
316 {
317 git__free(os->nodes);
318 git__free(os);
319 }
320
321
322 /*
323 * What wizardry is this?
324 *
325 * This is just a memory-optimized trie: basically a very fancy
326 * 16-ary tree, which is used to store the prefixes of the OID
327 * strings.
328 *
329 * Read more: http://en.wikipedia.org/wiki/Trie
330 *
331 * Magic that happens in this method:
332 *
333 * - Each node in the trie is an union, so it can work both as
334 * a normal node, or as a leaf.
335 *
336 * - Each normal node points to 16 children (one for each possible
337 * character in the oid). This is *not* stored in an array of
338 * pointers, because in a 64-bit arch this would be sucking
339 * 16*sizeof(void*) = 128 bytes of memory per node, which is
340 * insane. What we do is store Node Indexes, and use these indexes
341 * to look up each node in the om->index array. These indexes are
342 * signed shorts, so this limits the amount of unique OIDs that
343 * fit in the structure to about 20000 (assuming a more or less uniform
344 * distribution).
345 *
346 * - All the nodes in om->index array are stored contiguously in
347 * memory, and each of them is 32 bytes, so we fit 2x nodes per
348 * cache line. Convenient for speed.
349 *
350 * - To differentiate the leafs from the normal nodes, we store all
351 * the indexes towards a leaf as a negative index (indexes to normal
352 * nodes are positives). When we find that one of the children for
353 * a node has a negative value, that means it's going to be a leaf.
354 * This reduces the amount of indexes we have by two, but also reduces
355 * the size of each node by 1-4 bytes (the amount we would need to
356 * add a `is_leaf` field): this is good because it allows the nodes
357 * to fit cleanly in cache lines.
358 *
359 * - Once we reach an empty children, instead of continuing to insert
360 * new nodes for each remaining character of the OID, we store a pointer
361 * to the tail in the leaf; if the leaf is reached again, we turn it
362 * into a normal node and use the tail to create a new leaf.
363 *
364 * This is a pretty good balance between performance and memory usage.
365 */
366 int git_oid_shorten_add(git_oid_shorten *os, const char *text_oid)
367 {
368 int i;
369 bool is_leaf;
370 node_index idx;
371
372 if (os->full) {
373 giterr_set(GITERR_INVALID, "Unable to shorten OID - OID set full");
374 return -1;
375 }
376
377 if (text_oid == NULL)
378 return os->min_length;
379
380 idx = 0;
381 is_leaf = false;
382
383 for (i = 0; i < GIT_OID_HEXSZ; ++i) {
384 int c = git__fromhex(text_oid[i]);
385 trie_node *node;
386
387 if (c == -1) {
388 giterr_set(GITERR_INVALID, "Unable to shorten OID - invalid hex value");
389 return -1;
390 }
391
392 node = &os->nodes[idx];
393
394 if (is_leaf) {
395 const char *tail;
396
397 tail = node->tail;
398 node->tail = NULL;
399
400 node = push_leaf(os, idx, git__fromhex(tail[0]), &tail[1]);
401 if (node == NULL) {
402 if (os->full)
403 giterr_set(GITERR_INVALID, "Unable to shorten OID - OID set full");
404 return -1;
405 }
406 }
407
408 if (node->children[c] == 0) {
409 if (push_leaf(os, idx, c, &text_oid[i + 1]) == NULL) {
410 if (os->full)
411 giterr_set(GITERR_INVALID, "Unable to shorten OID - OID set full");
412 return -1;
413 }
414 break;
415 }
416
417 idx = node->children[c];
418 is_leaf = false;
419
420 if (idx < 0) {
421 node->children[c] = idx = -idx;
422 is_leaf = true;
423 }
424 }
425
426 if (++i > os->min_length)
427 os->min_length = i;
428
429 return os->min_length;
430 }
431