]> git.proxmox.com Git - libgit2.git/blame_incremental - src/oid.c
Fix the build on Emscripten
[libgit2.git] / src / oid.c
... / ...
CommitLineData
1/*
2 * Copyright (C) 2009-2011 the libgit2 contributors
3 *
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
6 */
7
8#include "common.h"
9#include "git2/oid.h"
10#include "repository.h"
11#include <string.h>
12#include <limits.h>
13
14static char to_hex[] = "0123456789abcdef";
15
16int git_oid_fromstrn(git_oid *out, const char *str, size_t length)
17{
18 size_t p;
19 int v;
20
21 if (length < 4)
22 return git__throw(GIT_ENOTOID, "Failed to generate sha1. Given string is too short");
23
24 if (length > GIT_OID_HEXSZ)
25 length = GIT_OID_HEXSZ;
26
27 for (p = 0; p < length - 1; p += 2) {
28 v = (git__fromhex(str[p + 0]) << 4)
29 | git__fromhex(str[p + 1]);
30
31 if (v < 0)
32 return git__throw(GIT_ENOTOID, "Failed to generate sha1. Given string is not a valid sha1 hash");
33
34 out->id[p / 2] = (unsigned char)v;
35 }
36
37 if (length % 2) {
38 v = (git__fromhex(str[p + 0]) << 4);
39 if (v < 0)
40 return git__throw(GIT_ENOTOID, "Failed to generate sha1. Given string is not a valid sha1 hash");
41
42 out->id[p / 2] = (unsigned char)v;
43 p += 2;
44 }
45
46 memset(out->id + p / 2, 0, (GIT_OID_HEXSZ - p) / 2);
47
48 return GIT_SUCCESS;
49}
50
51int git_oid_fromstr(git_oid *out, const char *str)
52{
53 return git_oid_fromstrn(out, str, GIT_OID_HEXSZ);
54}
55
56GIT_INLINE(char) *fmt_one(char *str, unsigned int val)
57{
58 *str++ = to_hex[val >> 4];
59 *str++ = to_hex[val & 0xf];
60 return str;
61}
62
63void git_oid_fmt(char *str, const git_oid *oid)
64{
65 size_t i;
66
67 for (i = 0; i < sizeof(oid->id); i++)
68 str = fmt_one(str, oid->id[i]);
69}
70
71void git_oid_pathfmt(char *str, const git_oid *oid)
72{
73 size_t i;
74
75 str = fmt_one(str, oid->id[0]);
76 *str++ = '/';
77 for (i = 1; i < sizeof(oid->id); i++)
78 str = fmt_one(str, oid->id[i]);
79}
80
81char *git_oid_allocfmt(const git_oid *oid)
82{
83 char *str = git__malloc(GIT_OID_HEXSZ + 1);
84 if (!str)
85 return NULL;
86 git_oid_fmt(str, oid);
87 str[GIT_OID_HEXSZ] = '\0';
88 return str;
89}
90
91char *git_oid_to_string(char *out, size_t n, const git_oid *oid)
92{
93 char str[GIT_OID_HEXSZ];
94
95 if (!out || n == 0 || !oid)
96 return "";
97
98 n--; /* allow room for terminating NUL */
99
100 if (n > 0) {
101 git_oid_fmt(str, oid);
102 if (n > GIT_OID_HEXSZ)
103 n = GIT_OID_HEXSZ;
104 memcpy(out, str, n);
105 }
106
107 out[n] = '\0';
108
109 return out;
110}
111
112int git_oid__parse(git_oid *oid, const char **buffer_out,
113 const char *buffer_end, const char *header)
114{
115 const size_t sha_len = GIT_OID_HEXSZ;
116 const size_t header_len = strlen(header);
117
118 const char *buffer = *buffer_out;
119
120 if (buffer + (header_len + sha_len + 1) > buffer_end)
121 return git__throw(GIT_EOBJCORRUPTED, "Failed to parse OID. Buffer too small");
122
123 if (memcmp(buffer, header, header_len) != 0)
124 return git__throw(GIT_EOBJCORRUPTED, "Failed to parse OID. Buffer and header do not match");
125
126 if (buffer[header_len + sha_len] != '\n')
127 return git__throw(GIT_EOBJCORRUPTED, "Failed to parse OID. Buffer not terminated correctly");
128
129 if (git_oid_fromstr(oid, buffer + header_len) < GIT_SUCCESS)
130 return git__throw(GIT_EOBJCORRUPTED, "Failed to parse OID. Failed to generate sha1");
131
132 *buffer_out = buffer + (header_len + sha_len + 1);
133
134 return GIT_SUCCESS;
135}
136
137void git_oid__writebuf(git_buf *buf, const char *header, const git_oid *oid)
138{
139 char hex_oid[GIT_OID_HEXSZ];
140
141 git_oid_fmt(hex_oid, oid);
142 git_buf_puts(buf, header);
143 git_buf_put(buf, hex_oid, GIT_OID_HEXSZ);
144 git_buf_putc(buf, '\n');
145}
146
147void git_oid_fromraw(git_oid *out, const unsigned char *raw)
148{
149 memcpy(out->id, raw, sizeof(out->id));
150}
151
152void git_oid_cpy(git_oid *out, const git_oid *src)
153{
154 memcpy(out->id, src->id, sizeof(out->id));
155}
156
157int git_oid_cmp(const git_oid *a, const git_oid *b)
158{
159 return memcmp(a->id, b->id, sizeof(a->id));
160}
161
162int git_oid_ncmp(const git_oid *oid_a, const git_oid *oid_b, unsigned int len)
163{
164 const unsigned char *a = oid_a->id;
165 const unsigned char *b = oid_b->id;
166
167 do {
168 if (*a != *b)
169 return 1;
170 a++;
171 b++;
172 len -= 2;
173 } while (len > 1);
174
175 if (len)
176 if ((*a ^ *b) & 0xf0)
177 return 1;
178
179 return 0;
180}
181
182int git_oid_streq(const git_oid *a, const char *str)
183{
184 git_oid id;
185 int error;
186
187 if ((error = git_oid_fromstr(&id, str)) < GIT_SUCCESS)
188 return git__rethrow(error, "Failed to convert '%s' to oid.", str);
189
190 return git_oid_cmp(a, &id) == 0 ? GIT_SUCCESS : GIT_ERROR;
191}
192
193typedef short node_index;
194
195typedef union {
196 const char *tail;
197 node_index children[16];
198} trie_node;
199
200struct git_oid_shorten {
201 trie_node *nodes;
202 size_t node_count, size;
203 int min_length, full;
204};
205
206static int resize_trie(git_oid_shorten *self, size_t new_size)
207{
208 self->nodes = git__realloc(self->nodes, new_size * sizeof(trie_node));
209 if (self->nodes == NULL)
210 return GIT_ENOMEM;
211
212 if (new_size > self->size) {
213 memset(&self->nodes[self->size], 0x0, (new_size - self->size) * sizeof(trie_node));
214 }
215
216 self->size = new_size;
217 return GIT_SUCCESS;
218}
219
220static trie_node *push_leaf(git_oid_shorten *os, node_index idx, int push_at, const char *oid)
221{
222 trie_node *node, *leaf;
223 node_index idx_leaf;
224
225 if (os->node_count >= os->size) {
226 if (resize_trie(os, os->size * 2) < GIT_SUCCESS)
227 return NULL;
228 }
229
230 idx_leaf = (node_index)os->node_count++;
231
232 if (os->node_count == SHRT_MAX)
233 os->full = 1;
234
235 node = &os->nodes[idx];
236 node->children[push_at] = -idx_leaf;
237
238 leaf = &os->nodes[idx_leaf];
239 leaf->tail = oid;
240
241 return node;
242}
243
244git_oid_shorten *git_oid_shorten_new(size_t min_length)
245{
246 git_oid_shorten *os;
247
248 os = git__malloc(sizeof(git_oid_shorten));
249 if (os == NULL)
250 return NULL;
251
252 memset(os, 0x0, sizeof(git_oid_shorten));
253
254 if (resize_trie(os, 16) < GIT_SUCCESS) {
255 git__free(os);
256 return NULL;
257 }
258
259 os->node_count = 1;
260 os->min_length = min_length;
261
262 return os;
263}
264
265void git_oid_shorten_free(git_oid_shorten *os)
266{
267 git__free(os->nodes);
268 git__free(os);
269}
270
271
272/*
273 * What wizardry is this?
274 *
275 * This is just a memory-optimized trie: basically a very fancy
276 * 16-ary tree, which is used to store the prefixes of the OID
277 * strings.
278 *
279 * Read more: http://en.wikipedia.org/wiki/Trie
280 *
281 * Magic that happens in this method:
282 *
283 * - Each node in the trie is an union, so it can work both as
284 * a normal node, or as a leaf.
285 *
286 * - Each normal node points to 16 children (one for each possible
287 * character in the oid). This is *not* stored in an array of
288 * pointers, because in a 64-bit arch this would be sucking
289 * 16*sizeof(void*) = 128 bytes of memory per node, which is fucking
290 * insane. What we do is store Node Indexes, and use these indexes
291 * to look up each node in the om->index array. These indexes are
292 * signed shorts, so this limits the amount of unique OIDs that
293 * fit in the structure to about 20000 (assuming a more or less uniform
294 * distribution).
295 *
296 * - All the nodes in om->index array are stored contiguously in
297 * memory, and each of them is 32 bytes, so we fit 2x nodes per
298 * cache line. Convenient for speed.
299 *
300 * - To differentiate the leafs from the normal nodes, we store all
301 * the indexes towards a leaf as a negative index (indexes to normal
302 * nodes are positives). When we find that one of the children for
303 * a node has a negative value, that means it's going to be a leaf.
304 * This reduces the amount of indexes we have by two, but also reduces
305 * the size of each node by 1-4 bytes (the amount we would need to
306 * add a `is_leaf` field): this is good because it allows the nodes
307 * to fit cleanly in cache lines.
308 *
309 * - Once we reach an empty children, instead of continuing to insert
310 * new nodes for each remaining character of the OID, we store a pointer
311 * to the tail in the leaf; if the leaf is reached again, we turn it
312 * into a normal node and use the tail to create a new leaf.
313 *
314 * This is a pretty good balance between performance and memory usage.
315 */
316int git_oid_shorten_add(git_oid_shorten *os, const char *text_oid)
317{
318 int i, is_leaf;
319 node_index idx;
320
321 if (os->full)
322 return GIT_ENOMEM;
323
324 if (text_oid == NULL)
325 return os->min_length;
326
327 idx = 0;
328 is_leaf = 0;
329
330 for (i = 0; i < GIT_OID_HEXSZ; ++i) {
331 int c = git__fromhex(text_oid[i]);
332 trie_node *node;
333
334 if (c == -1)
335 return git__throw(GIT_ENOTOID, "Failed to shorten OID. Invalid hex value");
336
337 node = &os->nodes[idx];
338
339 if (is_leaf) {
340 const char *tail;
341
342 tail = node->tail;
343 node->tail = NULL;
344
345 node = push_leaf(os, idx, git__fromhex(tail[0]), &tail[1]);
346 if (node == NULL)
347 return GIT_ENOMEM;
348 }
349
350 if (node->children[c] == 0) {
351 if (push_leaf(os, idx, c, &text_oid[i + 1]) == NULL)
352 return GIT_ENOMEM;
353 break;
354 }
355
356 idx = node->children[c];
357 is_leaf = 0;
358
359 if (idx < 0) {
360 node->children[c] = idx = -idx;
361 is_leaf = 1;
362 }
363 }
364
365 if (++i > os->min_length)
366 os->min_length = i;
367
368 return os->min_length;
369}
370