]>
git.proxmox.com Git - libgit2.git/blob - src/oid.c
2 * This file is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2,
4 * as published by the Free Software Foundation.
6 * In addition to the permissions in the GNU General Public License,
7 * the authors give you unlimited permission to link the compiled
8 * version of this file into combinations with other programs,
9 * and to distribute those combinations without any restriction
10 * coming from the use of this file. (The General Public License
11 * restrictions do apply in other respects; for example, they cover
12 * modification of the file, and distribution when not linked into
13 * a combined executable.)
15 * This file is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; see the file COPYING. If not, write to
22 * the Free Software Foundation, 51 Franklin Street, Fifth Floor,
23 * Boston, MA 02110-1301, USA.
28 #include "repository.h"
32 static signed char from_hex
[] = {
33 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 00 */
34 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 10 */
35 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 20 */
36 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, /* 30 */
37 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 40 */
38 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 50 */
39 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 60 */
40 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 70 */
41 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 80 */
42 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 90 */
43 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* a0 */
44 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* b0 */
45 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* c0 */
46 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* d0 */
47 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* e0 */
48 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* f0 */
50 static char to_hex
[] = "0123456789abcdef";
52 int git_oid_fromstrn(git_oid
*out
, const char *str
, size_t length
)
56 if (length
> GIT_OID_HEXSZ
)
57 length
= GIT_OID_HEXSZ
;
62 for (p
= 0; p
< length
; p
+= 2) {
63 int v
= (from_hex
[(unsigned char)str
[p
+ 0]] << 4)
64 | from_hex
[(unsigned char)str
[p
+ 1]];
67 return git__throw(GIT_ENOTOID
, "Failed to generate sha1. Given string is not a valid sha1 hash");
69 out
->id
[p
/ 2] = (unsigned char)v
;
72 for (; p
< GIT_OID_HEXSZ
; p
+= 2)
78 int git_oid_fromstr(git_oid
*out
, const char *str
)
80 return git_oid_fromstrn(out
, str
, GIT_OID_HEXSZ
);
83 GIT_INLINE(char) *fmt_one(char *str
, unsigned int val
)
85 *str
++ = to_hex
[val
>> 4];
86 *str
++ = to_hex
[val
& 0xf];
90 void git_oid_fmt(char *str
, const git_oid
*oid
)
94 for (i
= 0; i
< sizeof(oid
->id
); i
++)
95 str
= fmt_one(str
, oid
->id
[i
]);
98 void git_oid_pathfmt(char *str
, const git_oid
*oid
)
102 str
= fmt_one(str
, oid
->id
[0]);
104 for (i
= 1; i
< sizeof(oid
->id
); i
++)
105 str
= fmt_one(str
, oid
->id
[i
]);
108 char *git_oid_allocfmt(const git_oid
*oid
)
110 char *str
= git__malloc(GIT_OID_HEXSZ
+ 1);
113 git_oid_fmt(str
, oid
);
114 str
[GIT_OID_HEXSZ
] = '\0';
118 char *git_oid_to_string(char *out
, size_t n
, const git_oid
*oid
)
120 char str
[GIT_OID_HEXSZ
];
122 if (!out
|| n
== 0 || !oid
)
125 n
--; /* allow room for terminating NUL */
128 git_oid_fmt(str
, oid
);
129 if (n
> GIT_OID_HEXSZ
)
139 int git__parse_oid(git_oid
*oid
, const char **buffer_out
,
140 const char *buffer_end
, const char *header
)
142 const size_t sha_len
= GIT_OID_HEXSZ
;
143 const size_t header_len
= strlen(header
);
145 const char *buffer
= *buffer_out
;
147 if (buffer
+ (header_len
+ sha_len
+ 1) > buffer_end
)
148 return git__throw(GIT_EOBJCORRUPTED
, "Failed to parse OID. Buffer too small");
150 if (memcmp(buffer
, header
, header_len
) != 0)
151 return git__throw(GIT_EOBJCORRUPTED
, "Failed to parse OID. Buffer and header do not match");
153 if (buffer
[header_len
+ sha_len
] != '\n')
154 return git__throw(GIT_EOBJCORRUPTED
, "Failed to parse OID. Buffer not terminated correctly");
156 if (git_oid_fromstr(oid
, buffer
+ header_len
) < GIT_SUCCESS
)
157 return git__throw(GIT_EOBJCORRUPTED
, "Failed to parse OID. Failed to generate sha1");
159 *buffer_out
= buffer
+ (header_len
+ sha_len
+ 1);
164 int git__write_oid(git_odb_stream
*stream
, const char *header
, const git_oid
*oid
)
168 git_oid_fmt(hex_oid
+ 1, oid
);
173 stream
->write(stream
, header
, strlen(header
));
174 stream
->write(stream
, hex_oid
, 42);
178 void git_oid_fromraw(git_oid
*out
, const unsigned char *raw
)
180 memcpy(out
->id
, raw
, sizeof(out
->id
));
183 void git_oid_cpy(git_oid
*out
, const git_oid
*src
)
185 memcpy(out
->id
, src
->id
, sizeof(out
->id
));
188 int git_oid_cmp(const git_oid
*a
, const git_oid
*b
)
190 return memcmp(a
->id
, b
->id
, sizeof(a
->id
));
193 int git_oid_ncmp(const git_oid
*oid_a
, const git_oid
*oid_b
, unsigned int len
)
195 const unsigned char *a
= oid_a
->id
;
196 const unsigned char *b
= oid_b
->id
;
207 if ((*a
^ *b
) & 0xf0)
213 typedef short node_index
;
217 node_index children
[16];
220 struct git_oid_shorten
{
222 size_t node_count
, size
;
223 int min_length
, full
;
226 static int resize_trie(git_oid_shorten
*self
, size_t new_size
)
228 self
->nodes
= realloc(self
->nodes
, new_size
* sizeof(trie_node
));
229 if (self
->nodes
== NULL
)
232 if (new_size
> self
->size
) {
233 memset(&self
->nodes
[self
->size
], 0x0, (new_size
- self
->size
) * sizeof(trie_node
));
236 self
->size
= new_size
;
240 static trie_node
*push_leaf(git_oid_shorten
*os
, node_index idx
, int push_at
, const char *oid
)
242 trie_node
*node
, *leaf
;
245 if (os
->node_count
>= os
->size
) {
246 if (resize_trie(os
, os
->size
* 2) < GIT_SUCCESS
)
250 idx_leaf
= (node_index
)os
->node_count
++;
252 if (os
->node_count
== SHRT_MAX
)
255 node
= &os
->nodes
[idx
];
256 node
->children
[push_at
] = -idx_leaf
;
258 leaf
= &os
->nodes
[idx_leaf
];
264 git_oid_shorten
*git_oid_shorten_new(size_t min_length
)
268 os
= git__malloc(sizeof(git_oid_shorten
));
272 memset(os
, 0x0, sizeof(git_oid_shorten
));
274 if (resize_trie(os
, 16) < GIT_SUCCESS
) {
280 os
->min_length
= min_length
;
285 void git_oid_shorten_free(git_oid_shorten
*os
)
293 * What wizardry is this?
295 * This is just a memory-optimized trie: basically a very fancy
296 * 16-ary tree, which is used to store the prefixes of the OID
299 * Read more: http://en.wikipedia.org/wiki/Trie
301 * Magic that happens in this method:
303 * - Each node in the trie is an union, so it can work both as
304 * a normal node, or as a leaf.
306 * - Each normal node points to 16 children (one for each possible
307 * character in the oid). This is *not* stored in an array of
308 * pointers, because in a 64-bit arch this would be sucking
309 * 16*sizeof(void*) = 128 bytes of memory per node, which is fucking
310 * insane. What we do is store Node Indexes, and use these indexes
311 * to look up each node in the om->index array. These indexes are
312 * signed shorts, so this limits the amount of unique OIDs that
313 * fit in the structure to about 20000 (assuming a more or less uniform
316 * - All the nodes in om->index array are stored contiguously in
317 * memory, and each of them is 32 bytes, so we fit 2x nodes per
318 * cache line. Convenient for speed.
320 * - To differentiate the leafs from the normal nodes, we store all
321 * the indexes towards a leaf as a negative index (indexes to normal
322 * nodes are positives). When we find that one of the children for
323 * a node has a negative value, that means it's going to be a leaf.
324 * This reduces the amount of indexes we have by two, but also reduces
325 * the size of each node by 1-4 bytes (the amount we would need to
326 * add a `is_leaf` field): this is good because it allows the nodes
327 * to fit cleanly in cache lines.
329 * - Once we reach an empty children, instead of continuing to insert
330 * new nodes for each remaining character of the OID, we store a pointer
331 * to the tail in the leaf; if the leaf is reached again, we turn it
332 * into a normal node and use the tail to create a new leaf.
334 * This is a pretty good balance between performance and memory usage.
336 int git_oid_shorten_add(git_oid_shorten
*os
, const char *text_oid
)
347 for (i
= 0; i
< GIT_OID_HEXSZ
; ++i
) {
348 int c
= from_hex
[(int)text_oid
[i
]];
352 return git__throw(GIT_ENOTOID
, "Failed to shorten OID. Invalid hex value");
354 node
= &os
->nodes
[idx
];
362 node
= push_leaf(os
, idx
, from_hex
[(int)tail
[0]], &tail
[1]);
367 if (node
->children
[c
] == 0) {
368 if (push_leaf(os
, idx
, c
, &text_oid
[i
+ 1]) == NULL
)
373 idx
= node
->children
[c
];
377 node
->children
[c
] = idx
= -idx
;
382 if (++i
> os
->min_length
)
385 return os
->min_length
;