]> git.proxmox.com Git - libgit2.git/blob - src/delta.c
remote: create FETCH_HEAD with a refspecless remote
[libgit2.git] / src / delta.c
1 /*
2 * Copyright (C) the libgit2 contributors. All rights reserved.
3 *
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
6 */
7
8 #include "delta.h"
9
10 /* maximum hash entry list for the same hash bucket */
11 #define HASH_LIMIT 64
12
13 #define RABIN_SHIFT 23
14 #define RABIN_WINDOW 16
15
16 static const unsigned int T[256] = {
17 0x00000000, 0xab59b4d1, 0x56b369a2, 0xfdeadd73, 0x063f6795, 0xad66d344,
18 0x508c0e37, 0xfbd5bae6, 0x0c7ecf2a, 0xa7277bfb, 0x5acda688, 0xf1941259,
19 0x0a41a8bf, 0xa1181c6e, 0x5cf2c11d, 0xf7ab75cc, 0x18fd9e54, 0xb3a42a85,
20 0x4e4ef7f6, 0xe5174327, 0x1ec2f9c1, 0xb59b4d10, 0x48719063, 0xe32824b2,
21 0x1483517e, 0xbfdae5af, 0x423038dc, 0xe9698c0d, 0x12bc36eb, 0xb9e5823a,
22 0x440f5f49, 0xef56eb98, 0x31fb3ca8, 0x9aa28879, 0x6748550a, 0xcc11e1db,
23 0x37c45b3d, 0x9c9defec, 0x6177329f, 0xca2e864e, 0x3d85f382, 0x96dc4753,
24 0x6b369a20, 0xc06f2ef1, 0x3bba9417, 0x90e320c6, 0x6d09fdb5, 0xc6504964,
25 0x2906a2fc, 0x825f162d, 0x7fb5cb5e, 0xd4ec7f8f, 0x2f39c569, 0x846071b8,
26 0x798aaccb, 0xd2d3181a, 0x25786dd6, 0x8e21d907, 0x73cb0474, 0xd892b0a5,
27 0x23470a43, 0x881ebe92, 0x75f463e1, 0xdeadd730, 0x63f67950, 0xc8afcd81,
28 0x354510f2, 0x9e1ca423, 0x65c91ec5, 0xce90aa14, 0x337a7767, 0x9823c3b6,
29 0x6f88b67a, 0xc4d102ab, 0x393bdfd8, 0x92626b09, 0x69b7d1ef, 0xc2ee653e,
30 0x3f04b84d, 0x945d0c9c, 0x7b0be704, 0xd05253d5, 0x2db88ea6, 0x86e13a77,
31 0x7d348091, 0xd66d3440, 0x2b87e933, 0x80de5de2, 0x7775282e, 0xdc2c9cff,
32 0x21c6418c, 0x8a9ff55d, 0x714a4fbb, 0xda13fb6a, 0x27f92619, 0x8ca092c8,
33 0x520d45f8, 0xf954f129, 0x04be2c5a, 0xafe7988b, 0x5432226d, 0xff6b96bc,
34 0x02814bcf, 0xa9d8ff1e, 0x5e738ad2, 0xf52a3e03, 0x08c0e370, 0xa39957a1,
35 0x584ced47, 0xf3155996, 0x0eff84e5, 0xa5a63034, 0x4af0dbac, 0xe1a96f7d,
36 0x1c43b20e, 0xb71a06df, 0x4ccfbc39, 0xe79608e8, 0x1a7cd59b, 0xb125614a,
37 0x468e1486, 0xedd7a057, 0x103d7d24, 0xbb64c9f5, 0x40b17313, 0xebe8c7c2,
38 0x16021ab1, 0xbd5bae60, 0x6cb54671, 0xc7ecf2a0, 0x3a062fd3, 0x915f9b02,
39 0x6a8a21e4, 0xc1d39535, 0x3c394846, 0x9760fc97, 0x60cb895b, 0xcb923d8a,
40 0x3678e0f9, 0x9d215428, 0x66f4eece, 0xcdad5a1f, 0x3047876c, 0x9b1e33bd,
41 0x7448d825, 0xdf116cf4, 0x22fbb187, 0x89a20556, 0x7277bfb0, 0xd92e0b61,
42 0x24c4d612, 0x8f9d62c3, 0x7836170f, 0xd36fa3de, 0x2e857ead, 0x85dcca7c,
43 0x7e09709a, 0xd550c44b, 0x28ba1938, 0x83e3ade9, 0x5d4e7ad9, 0xf617ce08,
44 0x0bfd137b, 0xa0a4a7aa, 0x5b711d4c, 0xf028a99d, 0x0dc274ee, 0xa69bc03f,
45 0x5130b5f3, 0xfa690122, 0x0783dc51, 0xacda6880, 0x570fd266, 0xfc5666b7,
46 0x01bcbbc4, 0xaae50f15, 0x45b3e48d, 0xeeea505c, 0x13008d2f, 0xb85939fe,
47 0x438c8318, 0xe8d537c9, 0x153feaba, 0xbe665e6b, 0x49cd2ba7, 0xe2949f76,
48 0x1f7e4205, 0xb427f6d4, 0x4ff24c32, 0xe4abf8e3, 0x19412590, 0xb2189141,
49 0x0f433f21, 0xa41a8bf0, 0x59f05683, 0xf2a9e252, 0x097c58b4, 0xa225ec65,
50 0x5fcf3116, 0xf49685c7, 0x033df00b, 0xa86444da, 0x558e99a9, 0xfed72d78,
51 0x0502979e, 0xae5b234f, 0x53b1fe3c, 0xf8e84aed, 0x17bea175, 0xbce715a4,
52 0x410dc8d7, 0xea547c06, 0x1181c6e0, 0xbad87231, 0x4732af42, 0xec6b1b93,
53 0x1bc06e5f, 0xb099da8e, 0x4d7307fd, 0xe62ab32c, 0x1dff09ca, 0xb6a6bd1b,
54 0x4b4c6068, 0xe015d4b9, 0x3eb80389, 0x95e1b758, 0x680b6a2b, 0xc352defa,
55 0x3887641c, 0x93ded0cd, 0x6e340dbe, 0xc56db96f, 0x32c6cca3, 0x999f7872,
56 0x6475a501, 0xcf2c11d0, 0x34f9ab36, 0x9fa01fe7, 0x624ac294, 0xc9137645,
57 0x26459ddd, 0x8d1c290c, 0x70f6f47f, 0xdbaf40ae, 0x207afa48, 0x8b234e99,
58 0x76c993ea, 0xdd90273b, 0x2a3b52f7, 0x8162e626, 0x7c883b55, 0xd7d18f84,
59 0x2c043562, 0x875d81b3, 0x7ab75cc0, 0xd1eee811
60 };
61
62 static const unsigned int U[256] = {
63 0x00000000, 0x7eb5200d, 0x5633f4cb, 0x2886d4c6, 0x073e5d47, 0x798b7d4a,
64 0x510da98c, 0x2fb88981, 0x0e7cba8e, 0x70c99a83, 0x584f4e45, 0x26fa6e48,
65 0x0942e7c9, 0x77f7c7c4, 0x5f711302, 0x21c4330f, 0x1cf9751c, 0x624c5511,
66 0x4aca81d7, 0x347fa1da, 0x1bc7285b, 0x65720856, 0x4df4dc90, 0x3341fc9d,
67 0x1285cf92, 0x6c30ef9f, 0x44b63b59, 0x3a031b54, 0x15bb92d5, 0x6b0eb2d8,
68 0x4388661e, 0x3d3d4613, 0x39f2ea38, 0x4747ca35, 0x6fc11ef3, 0x11743efe,
69 0x3eccb77f, 0x40799772, 0x68ff43b4, 0x164a63b9, 0x378e50b6, 0x493b70bb,
70 0x61bda47d, 0x1f088470, 0x30b00df1, 0x4e052dfc, 0x6683f93a, 0x1836d937,
71 0x250b9f24, 0x5bbebf29, 0x73386bef, 0x0d8d4be2, 0x2235c263, 0x5c80e26e,
72 0x740636a8, 0x0ab316a5, 0x2b7725aa, 0x55c205a7, 0x7d44d161, 0x03f1f16c,
73 0x2c4978ed, 0x52fc58e0, 0x7a7a8c26, 0x04cfac2b, 0x73e5d470, 0x0d50f47d,
74 0x25d620bb, 0x5b6300b6, 0x74db8937, 0x0a6ea93a, 0x22e87dfc, 0x5c5d5df1,
75 0x7d996efe, 0x032c4ef3, 0x2baa9a35, 0x551fba38, 0x7aa733b9, 0x041213b4,
76 0x2c94c772, 0x5221e77f, 0x6f1ca16c, 0x11a98161, 0x392f55a7, 0x479a75aa,
77 0x6822fc2b, 0x1697dc26, 0x3e1108e0, 0x40a428ed, 0x61601be2, 0x1fd53bef,
78 0x3753ef29, 0x49e6cf24, 0x665e46a5, 0x18eb66a8, 0x306db26e, 0x4ed89263,
79 0x4a173e48, 0x34a21e45, 0x1c24ca83, 0x6291ea8e, 0x4d29630f, 0x339c4302,
80 0x1b1a97c4, 0x65afb7c9, 0x446b84c6, 0x3adea4cb, 0x1258700d, 0x6ced5000,
81 0x4355d981, 0x3de0f98c, 0x15662d4a, 0x6bd30d47, 0x56ee4b54, 0x285b6b59,
82 0x00ddbf9f, 0x7e689f92, 0x51d01613, 0x2f65361e, 0x07e3e2d8, 0x7956c2d5,
83 0x5892f1da, 0x2627d1d7, 0x0ea10511, 0x7014251c, 0x5facac9d, 0x21198c90,
84 0x099f5856, 0x772a785b, 0x4c921c31, 0x32273c3c, 0x1aa1e8fa, 0x6414c8f7,
85 0x4bac4176, 0x3519617b, 0x1d9fb5bd, 0x632a95b0, 0x42eea6bf, 0x3c5b86b2,
86 0x14dd5274, 0x6a687279, 0x45d0fbf8, 0x3b65dbf5, 0x13e30f33, 0x6d562f3e,
87 0x506b692d, 0x2ede4920, 0x06589de6, 0x78edbdeb, 0x5755346a, 0x29e01467,
88 0x0166c0a1, 0x7fd3e0ac, 0x5e17d3a3, 0x20a2f3ae, 0x08242768, 0x76910765,
89 0x59298ee4, 0x279caee9, 0x0f1a7a2f, 0x71af5a22, 0x7560f609, 0x0bd5d604,
90 0x235302c2, 0x5de622cf, 0x725eab4e, 0x0ceb8b43, 0x246d5f85, 0x5ad87f88,
91 0x7b1c4c87, 0x05a96c8a, 0x2d2fb84c, 0x539a9841, 0x7c2211c0, 0x029731cd,
92 0x2a11e50b, 0x54a4c506, 0x69998315, 0x172ca318, 0x3faa77de, 0x411f57d3,
93 0x6ea7de52, 0x1012fe5f, 0x38942a99, 0x46210a94, 0x67e5399b, 0x19501996,
94 0x31d6cd50, 0x4f63ed5d, 0x60db64dc, 0x1e6e44d1, 0x36e89017, 0x485db01a,
95 0x3f77c841, 0x41c2e84c, 0x69443c8a, 0x17f11c87, 0x38499506, 0x46fcb50b,
96 0x6e7a61cd, 0x10cf41c0, 0x310b72cf, 0x4fbe52c2, 0x67388604, 0x198da609,
97 0x36352f88, 0x48800f85, 0x6006db43, 0x1eb3fb4e, 0x238ebd5d, 0x5d3b9d50,
98 0x75bd4996, 0x0b08699b, 0x24b0e01a, 0x5a05c017, 0x728314d1, 0x0c3634dc,
99 0x2df207d3, 0x534727de, 0x7bc1f318, 0x0574d315, 0x2acc5a94, 0x54797a99,
100 0x7cffae5f, 0x024a8e52, 0x06852279, 0x78300274, 0x50b6d6b2, 0x2e03f6bf,
101 0x01bb7f3e, 0x7f0e5f33, 0x57888bf5, 0x293dabf8, 0x08f998f7, 0x764cb8fa,
102 0x5eca6c3c, 0x207f4c31, 0x0fc7c5b0, 0x7172e5bd, 0x59f4317b, 0x27411176,
103 0x1a7c5765, 0x64c97768, 0x4c4fa3ae, 0x32fa83a3, 0x1d420a22, 0x63f72a2f,
104 0x4b71fee9, 0x35c4dee4, 0x1400edeb, 0x6ab5cde6, 0x42331920, 0x3c86392d,
105 0x133eb0ac, 0x6d8b90a1, 0x450d4467, 0x3bb8646a
106 };
107
108 struct index_entry {
109 const unsigned char *ptr;
110 unsigned int val;
111 struct index_entry *next;
112 };
113
114 struct git_delta_index {
115 unsigned long memsize;
116 const void *src_buf;
117 unsigned long src_size;
118 unsigned int hash_mask;
119 struct index_entry *hash[GIT_FLEX_ARRAY];
120 };
121
122 struct git_delta_index *
123 git_delta_create_index(const void *buf, unsigned long bufsize)
124 {
125 unsigned int i, hsize, hmask, entries, prev_val, *hash_count;
126 const unsigned char *data, *buffer = buf;
127 struct git_delta_index *index;
128 struct index_entry *entry, **hash;
129 void *mem;
130 unsigned long memsize;
131
132 if (!buf || !bufsize)
133 return NULL;
134
135 /* Determine index hash size. Note that indexing skips the
136 first byte to allow for optimizing the rabin polynomial
137 initialization in create_delta(). */
138 entries = (unsigned int)(bufsize - 1) / RABIN_WINDOW;
139 if (bufsize >= 0xffffffffUL) {
140 /*
141 * Current delta format can't encode offsets into
142 * reference buffer with more than 32 bits.
143 */
144 entries = 0xfffffffeU / RABIN_WINDOW;
145 }
146 hsize = entries / 4;
147 for (i = 4; (1u << i) < hsize && i < 31; i++);
148 hsize = 1 << i;
149 hmask = hsize - 1;
150
151 /* allocate lookup index */
152 memsize = sizeof(*index) +
153 sizeof(*hash) * hsize +
154 sizeof(*entry) * entries;
155 mem = git__malloc(memsize);
156 if (!mem)
157 return NULL;
158 index = mem;
159 mem = index->hash;
160 hash = mem;
161 mem = hash + hsize;
162 entry = mem;
163
164 index->memsize = memsize;
165 index->src_buf = buf;
166 index->src_size = bufsize;
167 index->hash_mask = hmask;
168 memset(hash, 0, hsize * sizeof(*hash));
169
170 /* allocate an array to count hash entries */
171 hash_count = git__calloc(hsize, sizeof(*hash_count));
172 if (!hash_count) {
173 git__free(index);
174 return NULL;
175 }
176
177 /* then populate the index */
178 prev_val = ~0;
179 for (data = buffer + entries * RABIN_WINDOW - RABIN_WINDOW;
180 data >= buffer;
181 data -= RABIN_WINDOW) {
182 unsigned int val = 0;
183 for (i = 1; i <= RABIN_WINDOW; i++)
184 val = ((val << 8) | data[i]) ^ T[val >> RABIN_SHIFT];
185 if (val == prev_val) {
186 /* keep the lowest of consecutive identical blocks */
187 entry[-1].ptr = data + RABIN_WINDOW;
188 } else {
189 prev_val = val;
190 i = val & hmask;
191 entry->ptr = data + RABIN_WINDOW;
192 entry->val = val;
193 entry->next = hash[i];
194 hash[i] = entry++;
195 hash_count[i]++;
196 }
197 }
198
199 /*
200 * Determine a limit on the number of entries in the same hash
201 * bucket. This guard us against patological data sets causing
202 * really bad hash distribution with most entries in the same hash
203 * bucket that would bring us to O(m*n) computing costs (m and n
204 * corresponding to reference and target buffer sizes).
205 *
206 * Make sure none of the hash buckets has more entries than
207 * we're willing to test. Otherwise we cull the entry list
208 * uniformly to still preserve a good repartition across
209 * the reference buffer.
210 */
211 for (i = 0; i < hsize; i++) {
212 if (hash_count[i] < HASH_LIMIT)
213 continue;
214
215 entry = hash[i];
216 do {
217 struct index_entry *keep = entry;
218 int skip = hash_count[i] / HASH_LIMIT / 2;
219 do {
220 entry = entry->next;
221 } while(--skip && entry);
222 keep->next = entry;
223 } while (entry);
224 }
225 git__free(hash_count);
226
227 return index;
228 }
229
230 void git_delta_free_index(struct git_delta_index *index)
231 {
232 git__free(index);
233 }
234
235 unsigned long git_delta_sizeof_index(struct git_delta_index *index)
236 {
237 if (index)
238 return index->memsize;
239 else
240 return 0;
241 }
242
243 /*
244 * The maximum size for any opcode sequence, including the initial header
245 * plus rabin window plus biggest copy.
246 */
247 #define MAX_OP_SIZE (5 + 5 + 1 + RABIN_WINDOW + 7)
248
249 void *
250 git_delta_create(
251 const struct git_delta_index *index,
252 const void *trg_buf,
253 unsigned long trg_size,
254 unsigned long *delta_size,
255 unsigned long max_size)
256 {
257 unsigned int i, outpos, outsize, moff, msize, val;
258 int inscnt;
259 const unsigned char *ref_data, *ref_top, *data, *top;
260 unsigned char *out;
261
262 if (!trg_buf || !trg_size)
263 return NULL;
264
265 outpos = 0;
266 outsize = 8192;
267 if (max_size && outsize >= max_size)
268 outsize = (unsigned int)(max_size + MAX_OP_SIZE + 1);
269 out = git__malloc(outsize);
270 if (!out)
271 return NULL;
272
273 /* store reference buffer size */
274 i = index->src_size;
275 while (i >= 0x80) {
276 out[outpos++] = i | 0x80;
277 i >>= 7;
278 }
279 out[outpos++] = i;
280
281 /* store target buffer size */
282 i = trg_size;
283 while (i >= 0x80) {
284 out[outpos++] = i | 0x80;
285 i >>= 7;
286 }
287 out[outpos++] = i;
288
289 ref_data = index->src_buf;
290 ref_top = ref_data + index->src_size;
291 data = trg_buf;
292 top = (const unsigned char *) trg_buf + trg_size;
293
294 outpos++;
295 val = 0;
296 for (i = 0; i < RABIN_WINDOW && data < top; i++, data++) {
297 out[outpos++] = *data;
298 val = ((val << 8) | *data) ^ T[val >> RABIN_SHIFT];
299 }
300 inscnt = i;
301
302 moff = 0;
303 msize = 0;
304 while (data < top) {
305 if (msize < 4096) {
306 struct index_entry *entry;
307 val ^= U[data[-RABIN_WINDOW]];
308 val = ((val << 8) | *data) ^ T[val >> RABIN_SHIFT];
309 i = val & index->hash_mask;
310 for (entry = index->hash[i]; entry; entry = entry->next) {
311 const unsigned char *ref = entry->ptr;
312 const unsigned char *src = data;
313 unsigned int ref_size = (unsigned int)(ref_top - ref);
314 if (entry->val != val)
315 continue;
316 if (ref_size > (unsigned int)(top - src))
317 ref_size = (unsigned int)(top - src);
318 if (ref_size <= msize)
319 break;
320 while (ref_size-- && *src++ == *ref)
321 ref++;
322 if (msize < (unsigned int)(ref - entry->ptr)) {
323 /* this is our best match so far */
324 msize = (unsigned int)(ref - entry->ptr);
325 moff = (unsigned int)(entry->ptr - ref_data);
326 if (msize >= 4096) /* good enough */
327 break;
328 }
329 }
330 }
331
332 if (msize < 4) {
333 if (!inscnt)
334 outpos++;
335 out[outpos++] = *data++;
336 inscnt++;
337 if (inscnt == 0x7f) {
338 out[outpos - inscnt - 1] = inscnt;
339 inscnt = 0;
340 }
341 msize = 0;
342 } else {
343 unsigned int left;
344 unsigned char *op;
345
346 if (inscnt) {
347 while (moff && ref_data[moff-1] == data[-1]) {
348 /* we can match one byte back */
349 msize++;
350 moff--;
351 data--;
352 outpos--;
353 if (--inscnt)
354 continue;
355 outpos--; /* remove count slot */
356 inscnt--; /* make it -1 */
357 break;
358 }
359 out[outpos - inscnt - 1] = inscnt;
360 inscnt = 0;
361 }
362
363 /* A copy op is currently limited to 64KB (pack v2) */
364 left = (msize < 0x10000) ? 0 : (msize - 0x10000);
365 msize -= left;
366
367 op = out + outpos++;
368 i = 0x80;
369
370 if (moff & 0x000000ff)
371 out[outpos++] = moff >> 0, i |= 0x01;
372 if (moff & 0x0000ff00)
373 out[outpos++] = moff >> 8, i |= 0x02;
374 if (moff & 0x00ff0000)
375 out[outpos++] = moff >> 16, i |= 0x04;
376 if (moff & 0xff000000)
377 out[outpos++] = moff >> 24, i |= 0x08;
378
379 if (msize & 0x00ff)
380 out[outpos++] = msize >> 0, i |= 0x10;
381 if (msize & 0xff00)
382 out[outpos++] = msize >> 8, i |= 0x20;
383
384 *op = i;
385
386 data += msize;
387 moff += msize;
388 msize = left;
389
390 if (msize < 4096) {
391 int j;
392 val = 0;
393 for (j = -RABIN_WINDOW; j < 0; j++)
394 val = ((val << 8) | data[j])
395 ^ T[val >> RABIN_SHIFT];
396 }
397 }
398
399 if (outpos >= outsize - MAX_OP_SIZE) {
400 void *tmp = out;
401 outsize = outsize * 3 / 2;
402 if (max_size && outsize >= max_size)
403 outsize = max_size + MAX_OP_SIZE + 1;
404 if (max_size && outpos > max_size)
405 break;
406 out = git__realloc(out, outsize);
407 if (!out) {
408 git__free(tmp);
409 return NULL;
410 }
411 }
412 }
413
414 if (inscnt)
415 out[outpos - inscnt - 1] = inscnt;
416
417 if (max_size && outpos > max_size) {
418 git__free(out);
419 return NULL;
420 }
421
422 *delta_size = outpos;
423 return out;
424 }