]> git.proxmox.com Git - libgit2.git/blob - src/oid.c
Merge pull request #279 from carlosmn/detached-orphan
[libgit2.git] / src / oid.c
1 /*
2 * This file is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2,
4 * as published by the Free Software Foundation.
5 *
6 * In addition to the permissions in the GNU General Public License,
7 * the authors give you unlimited permission to link the compiled
8 * version of this file into combinations with other programs,
9 * and to distribute those combinations without any restriction
10 * coming from the use of this file. (The General Public License
11 * restrictions do apply in other respects; for example, they cover
12 * modification of the file, and distribution when not linked into
13 * a combined executable.)
14 *
15 * This file is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; see the file COPYING. If not, write to
22 * the Free Software Foundation, 51 Franklin Street, Fifth Floor,
23 * Boston, MA 02110-1301, USA.
24 */
25
26 #include "common.h"
27 #include "git2/oid.h"
28 #include "repository.h"
29 #include <string.h>
30 #include <limits.h>
31
32 static signed char from_hex[] = {
33 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 00 */
34 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 10 */
35 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 20 */
36 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, /* 30 */
37 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 40 */
38 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 50 */
39 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 60 */
40 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 70 */
41 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 80 */
42 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 90 */
43 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* a0 */
44 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* b0 */
45 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* c0 */
46 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* d0 */
47 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* e0 */
48 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* f0 */
49 };
50 static char to_hex[] = "0123456789abcdef";
51
52 int git_oid_fromstrn(git_oid *out, const char *str, size_t length)
53 {
54 size_t p;
55
56 if (length > GIT_OID_HEXSZ)
57 length = GIT_OID_HEXSZ;
58
59 if (length % 2)
60 length--;
61
62 for (p = 0; p < length; p += 2) {
63 int v = (from_hex[(unsigned char)str[p + 0]] << 4)
64 | from_hex[(unsigned char)str[p + 1]];
65
66 if (v < 0)
67 return git__throw(GIT_ENOTOID, "Failed to generate sha1. Given string is not a valid sha1 hash");
68
69 out->id[p / 2] = (unsigned char)v;
70 }
71
72 for (; p < GIT_OID_HEXSZ; p += 2)
73 out->id[p / 2] = 0x0;
74
75 return GIT_SUCCESS;
76 }
77
78 int git_oid_fromstr(git_oid *out, const char *str)
79 {
80 return git_oid_fromstrn(out, str, GIT_OID_HEXSZ);
81 }
82
83 GIT_INLINE(char) *fmt_one(char *str, unsigned int val)
84 {
85 *str++ = to_hex[val >> 4];
86 *str++ = to_hex[val & 0xf];
87 return str;
88 }
89
90 void git_oid_fmt(char *str, const git_oid *oid)
91 {
92 size_t i;
93
94 for (i = 0; i < sizeof(oid->id); i++)
95 str = fmt_one(str, oid->id[i]);
96 }
97
98 void git_oid_pathfmt(char *str, const git_oid *oid)
99 {
100 size_t i;
101
102 str = fmt_one(str, oid->id[0]);
103 *str++ = '/';
104 for (i = 1; i < sizeof(oid->id); i++)
105 str = fmt_one(str, oid->id[i]);
106 }
107
108 char *git_oid_allocfmt(const git_oid *oid)
109 {
110 char *str = git__malloc(GIT_OID_HEXSZ + 1);
111 if (!str)
112 return NULL;
113 git_oid_fmt(str, oid);
114 str[GIT_OID_HEXSZ] = '\0';
115 return str;
116 }
117
118 char *git_oid_to_string(char *out, size_t n, const git_oid *oid)
119 {
120 char str[GIT_OID_HEXSZ];
121
122 if (!out || n == 0 || !oid)
123 return "";
124
125 n--; /* allow room for terminating NUL */
126
127 if (n > 0) {
128 git_oid_fmt(str, oid);
129 if (n > GIT_OID_HEXSZ)
130 n = GIT_OID_HEXSZ;
131 memcpy(out, str, n);
132 }
133
134 out[n] = '\0';
135
136 return out;
137 }
138
139 int git__parse_oid(git_oid *oid, const char **buffer_out,
140 const char *buffer_end, const char *header)
141 {
142 const size_t sha_len = GIT_OID_HEXSZ;
143 const size_t header_len = strlen(header);
144
145 const char *buffer = *buffer_out;
146
147 if (buffer + (header_len + sha_len + 1) > buffer_end)
148 return git__throw(GIT_EOBJCORRUPTED, "Failed to parse OID. Buffer too small");
149
150 if (memcmp(buffer, header, header_len) != 0)
151 return git__throw(GIT_EOBJCORRUPTED, "Failed to parse OID. Buffer and header do not match");
152
153 if (buffer[header_len + sha_len] != '\n')
154 return git__throw(GIT_EOBJCORRUPTED, "Failed to parse OID. Buffer not terminated correctly");
155
156 if (git_oid_fromstr(oid, buffer + header_len) < GIT_SUCCESS)
157 return git__throw(GIT_EOBJCORRUPTED, "Failed to parse OID. Failed to generate sha1");
158
159 *buffer_out = buffer + (header_len + sha_len + 1);
160
161 return GIT_SUCCESS;
162 }
163
164 int git__write_oid(git_odb_stream *stream, const char *header, const git_oid *oid)
165 {
166 char hex_oid[42];
167
168 git_oid_fmt(hex_oid + 1, oid);
169
170 hex_oid[0] = ' ';
171 hex_oid[41] = '\n';
172
173 stream->write(stream, header, strlen(header));
174 stream->write(stream, hex_oid, 42);
175 return GIT_SUCCESS;
176 }
177
178 void git_oid_fromraw(git_oid *out, const unsigned char *raw)
179 {
180 memcpy(out->id, raw, sizeof(out->id));
181 }
182
183 void git_oid_cpy(git_oid *out, const git_oid *src)
184 {
185 memcpy(out->id, src->id, sizeof(out->id));
186 }
187
188 int git_oid_cmp(const git_oid *a, const git_oid *b)
189 {
190 return memcmp(a->id, b->id, sizeof(a->id));
191 }
192
193 int git_oid_ncmp(const git_oid *oid_a, const git_oid *oid_b, unsigned int len)
194 {
195 const unsigned char *a = oid_a->id;
196 const unsigned char *b = oid_b->id;
197
198 do {
199 if (*a != *b)
200 return 1;
201 a++;
202 b++;
203 len -= 2;
204 } while (len > 1);
205
206 if (len)
207 if ((*a ^ *b) & 0xf0)
208 return 1;
209
210 return 0;
211 }
212
213 typedef short node_index;
214
215 typedef union {
216 const char *tail;
217 node_index children[16];
218 } trie_node;
219
220 struct git_oid_shorten {
221 trie_node *nodes;
222 size_t node_count, size;
223 int min_length, full;
224 };
225
226 static int resize_trie(git_oid_shorten *self, size_t new_size)
227 {
228 self->nodes = realloc(self->nodes, new_size * sizeof(trie_node));
229 if (self->nodes == NULL)
230 return GIT_ENOMEM;
231
232 if (new_size > self->size) {
233 memset(&self->nodes[self->size], 0x0, (new_size - self->size) * sizeof(trie_node));
234 }
235
236 self->size = new_size;
237 return GIT_SUCCESS;
238 }
239
240 static trie_node *push_leaf(git_oid_shorten *os, node_index idx, int push_at, const char *oid)
241 {
242 trie_node *node, *leaf;
243 node_index idx_leaf;
244
245 if (os->node_count >= os->size) {
246 if (resize_trie(os, os->size * 2) < GIT_SUCCESS)
247 return NULL;
248 }
249
250 idx_leaf = (node_index)os->node_count++;
251
252 if (os->node_count == SHRT_MAX)
253 os->full = 1;
254
255 node = &os->nodes[idx];
256 node->children[push_at] = -idx_leaf;
257
258 leaf = &os->nodes[idx_leaf];
259 leaf->tail = oid;
260
261 return node;
262 }
263
264 git_oid_shorten *git_oid_shorten_new(size_t min_length)
265 {
266 git_oid_shorten *os;
267
268 os = git__malloc(sizeof(git_oid_shorten));
269 if (os == NULL)
270 return NULL;
271
272 memset(os, 0x0, sizeof(git_oid_shorten));
273
274 if (resize_trie(os, 16) < GIT_SUCCESS) {
275 free(os);
276 return NULL;
277 }
278
279 os->node_count = 1;
280 os->min_length = min_length;
281
282 return os;
283 }
284
285 void git_oid_shorten_free(git_oid_shorten *os)
286 {
287 free(os->nodes);
288 free(os);
289 }
290
291
292 /*
293 * What wizardry is this?
294 *
295 * This is just a memory-optimized trie: basically a very fancy
296 * 16-ary tree, which is used to store the prefixes of the OID
297 * strings.
298 *
299 * Read more: http://en.wikipedia.org/wiki/Trie
300 *
301 * Magic that happens in this method:
302 *
303 * - Each node in the trie is an union, so it can work both as
304 * a normal node, or as a leaf.
305 *
306 * - Each normal node points to 16 children (one for each possible
307 * character in the oid). This is *not* stored in an array of
308 * pointers, because in a 64-bit arch this would be sucking
309 * 16*sizeof(void*) = 128 bytes of memory per node, which is fucking
310 * insane. What we do is store Node Indexes, and use these indexes
311 * to look up each node in the om->index array. These indexes are
312 * signed shorts, so this limits the amount of unique OIDs that
313 * fit in the structure to about 20000 (assuming a more or less uniform
314 * distribution).
315 *
316 * - All the nodes in om->index array are stored contiguously in
317 * memory, and each of them is 32 bytes, so we fit 2x nodes per
318 * cache line. Convenient for speed.
319 *
320 * - To differentiate the leafs from the normal nodes, we store all
321 * the indexes towards a leaf as a negative index (indexes to normal
322 * nodes are positives). When we find that one of the children for
323 * a node has a negative value, that means it's going to be a leaf.
324 * This reduces the amount of indexes we have by two, but also reduces
325 * the size of each node by 1-4 bytes (the amount we would need to
326 * add a `is_leaf` field): this is good because it allows the nodes
327 * to fit cleanly in cache lines.
328 *
329 * - Once we reach an empty children, instead of continuing to insert
330 * new nodes for each remaining character of the OID, we store a pointer
331 * to the tail in the leaf; if the leaf is reached again, we turn it
332 * into a normal node and use the tail to create a new leaf.
333 *
334 * This is a pretty good balance between performance and memory usage.
335 */
336 int git_oid_shorten_add(git_oid_shorten *os, const char *text_oid)
337 {
338 int i, is_leaf;
339 node_index idx;
340
341 if (os->full)
342 return GIT_ENOMEM;
343
344 idx = 0;
345 is_leaf = 0;
346
347 for (i = 0; i < GIT_OID_HEXSZ; ++i) {
348 int c = from_hex[(int)text_oid[i]];
349 trie_node *node;
350
351 if (c == -1)
352 return git__throw(GIT_ENOTOID, "Failed to shorten OID. Invalid hex value");
353
354 node = &os->nodes[idx];
355
356 if (is_leaf) {
357 const char *tail;
358
359 tail = node->tail;
360 node->tail = NULL;
361
362 node = push_leaf(os, idx, from_hex[(int)tail[0]], &tail[1]);
363 if (node == NULL)
364 return GIT_ENOMEM;
365 }
366
367 if (node->children[c] == 0) {
368 if (push_leaf(os, idx, c, &text_oid[i + 1]) == NULL)
369 return GIT_ENOMEM;
370 break;
371 }
372
373 idx = node->children[c];
374 is_leaf = 0;
375
376 if (idx < 0) {
377 node->children[c] = idx = -idx;
378 is_leaf = 1;
379 }
380 }
381
382 if (++i > os->min_length)
383 os->min_length = i;
384
385 return os->min_length;
386 }
387