]> git.proxmox.com Git - libgit2.git/blame - src/delta.c
New upstream version 1.3.0+dfsg.1
[libgit2.git] / src / delta.c
CommitLineData
ec1d42b7 1/*
359fc2d2 2 * Copyright (C) the libgit2 contributors. All rights reserved.
ec1d42b7 3 *
f6ed5e2d
PK
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
ec1d42b7
MS
6 */
7
8#include "delta.h"
9
10/* maximum hash entry list for the same hash bucket */
11#define HASH_LIMIT 64
12
13#define RABIN_SHIFT 23
14#define RABIN_WINDOW 16
15
16static const unsigned int T[256] = {
17 0x00000000, 0xab59b4d1, 0x56b369a2, 0xfdeadd73, 0x063f6795, 0xad66d344,
18 0x508c0e37, 0xfbd5bae6, 0x0c7ecf2a, 0xa7277bfb, 0x5acda688, 0xf1941259,
19 0x0a41a8bf, 0xa1181c6e, 0x5cf2c11d, 0xf7ab75cc, 0x18fd9e54, 0xb3a42a85,
20 0x4e4ef7f6, 0xe5174327, 0x1ec2f9c1, 0xb59b4d10, 0x48719063, 0xe32824b2,
21 0x1483517e, 0xbfdae5af, 0x423038dc, 0xe9698c0d, 0x12bc36eb, 0xb9e5823a,
22 0x440f5f49, 0xef56eb98, 0x31fb3ca8, 0x9aa28879, 0x6748550a, 0xcc11e1db,
23 0x37c45b3d, 0x9c9defec, 0x6177329f, 0xca2e864e, 0x3d85f382, 0x96dc4753,
24 0x6b369a20, 0xc06f2ef1, 0x3bba9417, 0x90e320c6, 0x6d09fdb5, 0xc6504964,
25 0x2906a2fc, 0x825f162d, 0x7fb5cb5e, 0xd4ec7f8f, 0x2f39c569, 0x846071b8,
26 0x798aaccb, 0xd2d3181a, 0x25786dd6, 0x8e21d907, 0x73cb0474, 0xd892b0a5,
27 0x23470a43, 0x881ebe92, 0x75f463e1, 0xdeadd730, 0x63f67950, 0xc8afcd81,
28 0x354510f2, 0x9e1ca423, 0x65c91ec5, 0xce90aa14, 0x337a7767, 0x9823c3b6,
29 0x6f88b67a, 0xc4d102ab, 0x393bdfd8, 0x92626b09, 0x69b7d1ef, 0xc2ee653e,
30 0x3f04b84d, 0x945d0c9c, 0x7b0be704, 0xd05253d5, 0x2db88ea6, 0x86e13a77,
31 0x7d348091, 0xd66d3440, 0x2b87e933, 0x80de5de2, 0x7775282e, 0xdc2c9cff,
32 0x21c6418c, 0x8a9ff55d, 0x714a4fbb, 0xda13fb6a, 0x27f92619, 0x8ca092c8,
33 0x520d45f8, 0xf954f129, 0x04be2c5a, 0xafe7988b, 0x5432226d, 0xff6b96bc,
34 0x02814bcf, 0xa9d8ff1e, 0x5e738ad2, 0xf52a3e03, 0x08c0e370, 0xa39957a1,
35 0x584ced47, 0xf3155996, 0x0eff84e5, 0xa5a63034, 0x4af0dbac, 0xe1a96f7d,
36 0x1c43b20e, 0xb71a06df, 0x4ccfbc39, 0xe79608e8, 0x1a7cd59b, 0xb125614a,
37 0x468e1486, 0xedd7a057, 0x103d7d24, 0xbb64c9f5, 0x40b17313, 0xebe8c7c2,
38 0x16021ab1, 0xbd5bae60, 0x6cb54671, 0xc7ecf2a0, 0x3a062fd3, 0x915f9b02,
39 0x6a8a21e4, 0xc1d39535, 0x3c394846, 0x9760fc97, 0x60cb895b, 0xcb923d8a,
40 0x3678e0f9, 0x9d215428, 0x66f4eece, 0xcdad5a1f, 0x3047876c, 0x9b1e33bd,
41 0x7448d825, 0xdf116cf4, 0x22fbb187, 0x89a20556, 0x7277bfb0, 0xd92e0b61,
42 0x24c4d612, 0x8f9d62c3, 0x7836170f, 0xd36fa3de, 0x2e857ead, 0x85dcca7c,
43 0x7e09709a, 0xd550c44b, 0x28ba1938, 0x83e3ade9, 0x5d4e7ad9, 0xf617ce08,
44 0x0bfd137b, 0xa0a4a7aa, 0x5b711d4c, 0xf028a99d, 0x0dc274ee, 0xa69bc03f,
45 0x5130b5f3, 0xfa690122, 0x0783dc51, 0xacda6880, 0x570fd266, 0xfc5666b7,
46 0x01bcbbc4, 0xaae50f15, 0x45b3e48d, 0xeeea505c, 0x13008d2f, 0xb85939fe,
47 0x438c8318, 0xe8d537c9, 0x153feaba, 0xbe665e6b, 0x49cd2ba7, 0xe2949f76,
48 0x1f7e4205, 0xb427f6d4, 0x4ff24c32, 0xe4abf8e3, 0x19412590, 0xb2189141,
49 0x0f433f21, 0xa41a8bf0, 0x59f05683, 0xf2a9e252, 0x097c58b4, 0xa225ec65,
50 0x5fcf3116, 0xf49685c7, 0x033df00b, 0xa86444da, 0x558e99a9, 0xfed72d78,
51 0x0502979e, 0xae5b234f, 0x53b1fe3c, 0xf8e84aed, 0x17bea175, 0xbce715a4,
52 0x410dc8d7, 0xea547c06, 0x1181c6e0, 0xbad87231, 0x4732af42, 0xec6b1b93,
53 0x1bc06e5f, 0xb099da8e, 0x4d7307fd, 0xe62ab32c, 0x1dff09ca, 0xb6a6bd1b,
54 0x4b4c6068, 0xe015d4b9, 0x3eb80389, 0x95e1b758, 0x680b6a2b, 0xc352defa,
55 0x3887641c, 0x93ded0cd, 0x6e340dbe, 0xc56db96f, 0x32c6cca3, 0x999f7872,
56 0x6475a501, 0xcf2c11d0, 0x34f9ab36, 0x9fa01fe7, 0x624ac294, 0xc9137645,
57 0x26459ddd, 0x8d1c290c, 0x70f6f47f, 0xdbaf40ae, 0x207afa48, 0x8b234e99,
58 0x76c993ea, 0xdd90273b, 0x2a3b52f7, 0x8162e626, 0x7c883b55, 0xd7d18f84,
59 0x2c043562, 0x875d81b3, 0x7ab75cc0, 0xd1eee811
60};
61
62static const unsigned int U[256] = {
63 0x00000000, 0x7eb5200d, 0x5633f4cb, 0x2886d4c6, 0x073e5d47, 0x798b7d4a,
64 0x510da98c, 0x2fb88981, 0x0e7cba8e, 0x70c99a83, 0x584f4e45, 0x26fa6e48,
65 0x0942e7c9, 0x77f7c7c4, 0x5f711302, 0x21c4330f, 0x1cf9751c, 0x624c5511,
66 0x4aca81d7, 0x347fa1da, 0x1bc7285b, 0x65720856, 0x4df4dc90, 0x3341fc9d,
67 0x1285cf92, 0x6c30ef9f, 0x44b63b59, 0x3a031b54, 0x15bb92d5, 0x6b0eb2d8,
68 0x4388661e, 0x3d3d4613, 0x39f2ea38, 0x4747ca35, 0x6fc11ef3, 0x11743efe,
69 0x3eccb77f, 0x40799772, 0x68ff43b4, 0x164a63b9, 0x378e50b6, 0x493b70bb,
70 0x61bda47d, 0x1f088470, 0x30b00df1, 0x4e052dfc, 0x6683f93a, 0x1836d937,
71 0x250b9f24, 0x5bbebf29, 0x73386bef, 0x0d8d4be2, 0x2235c263, 0x5c80e26e,
72 0x740636a8, 0x0ab316a5, 0x2b7725aa, 0x55c205a7, 0x7d44d161, 0x03f1f16c,
73 0x2c4978ed, 0x52fc58e0, 0x7a7a8c26, 0x04cfac2b, 0x73e5d470, 0x0d50f47d,
74 0x25d620bb, 0x5b6300b6, 0x74db8937, 0x0a6ea93a, 0x22e87dfc, 0x5c5d5df1,
75 0x7d996efe, 0x032c4ef3, 0x2baa9a35, 0x551fba38, 0x7aa733b9, 0x041213b4,
76 0x2c94c772, 0x5221e77f, 0x6f1ca16c, 0x11a98161, 0x392f55a7, 0x479a75aa,
77 0x6822fc2b, 0x1697dc26, 0x3e1108e0, 0x40a428ed, 0x61601be2, 0x1fd53bef,
78 0x3753ef29, 0x49e6cf24, 0x665e46a5, 0x18eb66a8, 0x306db26e, 0x4ed89263,
79 0x4a173e48, 0x34a21e45, 0x1c24ca83, 0x6291ea8e, 0x4d29630f, 0x339c4302,
80 0x1b1a97c4, 0x65afb7c9, 0x446b84c6, 0x3adea4cb, 0x1258700d, 0x6ced5000,
81 0x4355d981, 0x3de0f98c, 0x15662d4a, 0x6bd30d47, 0x56ee4b54, 0x285b6b59,
82 0x00ddbf9f, 0x7e689f92, 0x51d01613, 0x2f65361e, 0x07e3e2d8, 0x7956c2d5,
83 0x5892f1da, 0x2627d1d7, 0x0ea10511, 0x7014251c, 0x5facac9d, 0x21198c90,
84 0x099f5856, 0x772a785b, 0x4c921c31, 0x32273c3c, 0x1aa1e8fa, 0x6414c8f7,
85 0x4bac4176, 0x3519617b, 0x1d9fb5bd, 0x632a95b0, 0x42eea6bf, 0x3c5b86b2,
86 0x14dd5274, 0x6a687279, 0x45d0fbf8, 0x3b65dbf5, 0x13e30f33, 0x6d562f3e,
87 0x506b692d, 0x2ede4920, 0x06589de6, 0x78edbdeb, 0x5755346a, 0x29e01467,
88 0x0166c0a1, 0x7fd3e0ac, 0x5e17d3a3, 0x20a2f3ae, 0x08242768, 0x76910765,
89 0x59298ee4, 0x279caee9, 0x0f1a7a2f, 0x71af5a22, 0x7560f609, 0x0bd5d604,
90 0x235302c2, 0x5de622cf, 0x725eab4e, 0x0ceb8b43, 0x246d5f85, 0x5ad87f88,
91 0x7b1c4c87, 0x05a96c8a, 0x2d2fb84c, 0x539a9841, 0x7c2211c0, 0x029731cd,
92 0x2a11e50b, 0x54a4c506, 0x69998315, 0x172ca318, 0x3faa77de, 0x411f57d3,
93 0x6ea7de52, 0x1012fe5f, 0x38942a99, 0x46210a94, 0x67e5399b, 0x19501996,
94 0x31d6cd50, 0x4f63ed5d, 0x60db64dc, 0x1e6e44d1, 0x36e89017, 0x485db01a,
95 0x3f77c841, 0x41c2e84c, 0x69443c8a, 0x17f11c87, 0x38499506, 0x46fcb50b,
96 0x6e7a61cd, 0x10cf41c0, 0x310b72cf, 0x4fbe52c2, 0x67388604, 0x198da609,
97 0x36352f88, 0x48800f85, 0x6006db43, 0x1eb3fb4e, 0x238ebd5d, 0x5d3b9d50,
98 0x75bd4996, 0x0b08699b, 0x24b0e01a, 0x5a05c017, 0x728314d1, 0x0c3634dc,
99 0x2df207d3, 0x534727de, 0x7bc1f318, 0x0574d315, 0x2acc5a94, 0x54797a99,
100 0x7cffae5f, 0x024a8e52, 0x06852279, 0x78300274, 0x50b6d6b2, 0x2e03f6bf,
101 0x01bb7f3e, 0x7f0e5f33, 0x57888bf5, 0x293dabf8, 0x08f998f7, 0x764cb8fa,
102 0x5eca6c3c, 0x207f4c31, 0x0fc7c5b0, 0x7172e5bd, 0x59f4317b, 0x27411176,
103 0x1a7c5765, 0x64c97768, 0x4c4fa3ae, 0x32fa83a3, 0x1d420a22, 0x63f72a2f,
104 0x4b71fee9, 0x35c4dee4, 0x1400edeb, 0x6ab5cde6, 0x42331920, 0x3c86392d,
105 0x133eb0ac, 0x6d8b90a1, 0x450d4467, 0x3bb8646a
106};
107
108struct index_entry {
109 const unsigned char *ptr;
110 unsigned int val;
f6ed5e2d 111 struct index_entry *next;
ec1d42b7
MS
112};
113
114struct git_delta_index {
115 unsigned long memsize;
116 const void *src_buf;
1cd65991 117 size_t src_size;
ec1d42b7
MS
118 unsigned int hash_mask;
119 struct index_entry *hash[GIT_FLEX_ARRAY];
120};
121
392702ee
ET
122static int lookup_index_alloc(
123 void **out, unsigned long *out_len, size_t entries, size_t hash_count)
124{
f1453c59 125 size_t entries_len, hash_len, index_len;
392702ee 126
ac3d33df
JK
127 GIT_ERROR_CHECK_ALLOC_MULTIPLY(&entries_len, entries, sizeof(struct index_entry));
128 GIT_ERROR_CHECK_ALLOC_MULTIPLY(&hash_len, hash_count, sizeof(struct index_entry *));
392702ee 129
ac3d33df
JK
130 GIT_ERROR_CHECK_ALLOC_ADD(&index_len, sizeof(struct git_delta_index), entries_len);
131 GIT_ERROR_CHECK_ALLOC_ADD(&index_len, index_len, hash_len);
392702ee
ET
132
133 if (!git__is_ulong(index_len)) {
ac3d33df 134 git_error_set(GIT_ERROR_NOMEMORY, "overly large delta");
392702ee
ET
135 return -1;
136 }
137
138 *out = git__malloc(index_len);
ac3d33df 139 GIT_ERROR_CHECK_ALLOC(*out);
392702ee 140
ac3d33df 141 *out_len = (unsigned long)index_len;
392702ee
ET
142 return 0;
143}
144
1cd65991
ET
145int git_delta_index_init(
146 git_delta_index **out, const void *buf, size_t bufsize)
ec1d42b7
MS
147{
148 unsigned int i, hsize, hmask, entries, prev_val, *hash_count;
149 const unsigned char *data, *buffer = buf;
150 struct git_delta_index *index;
f6ed5e2d 151 struct index_entry *entry, **hash;
ec1d42b7
MS
152 void *mem;
153 unsigned long memsize;
154
1cd65991
ET
155 *out = NULL;
156
ec1d42b7 157 if (!buf || !bufsize)
1cd65991 158 return 0;
ec1d42b7
MS
159
160 /* Determine index hash size. Note that indexing skips the
f6ed5e2d 161 first byte to allow for optimizing the rabin polynomial
ec1d42b7 162 initialization in create_delta(). */
a8122b5d 163 entries = (unsigned int)(bufsize - 1) / RABIN_WINDOW;
ec1d42b7
MS
164 if (bufsize >= 0xffffffffUL) {
165 /*
166 * Current delta format can't encode offsets into
167 * reference buffer with more than 32 bits.
168 */
169 entries = 0xfffffffeU / RABIN_WINDOW;
170 }
171 hsize = entries / 4;
307a3d67 172 for (i = 4; i < 31 && (1u << i) < hsize; i++);
ec1d42b7
MS
173 hsize = 1 << i;
174 hmask = hsize - 1;
175
392702ee 176 if (lookup_index_alloc(&mem, &memsize, entries, hsize) < 0)
1cd65991 177 return -1;
392702ee 178
f6ed5e2d
PK
179 index = mem;
180 mem = index->hash;
ec1d42b7
MS
181 hash = mem;
182 mem = hash + hsize;
183 entry = mem;
184
f6ed5e2d
PK
185 index->memsize = memsize;
186 index->src_buf = buf;
187 index->src_size = bufsize;
188 index->hash_mask = hmask;
ec1d42b7
MS
189 memset(hash, 0, hsize * sizeof(*hash));
190
191 /* allocate an array to count hash entries */
9007c53f 192 hash_count = git__calloc(hsize, sizeof(*hash_count));
ec1d42b7 193 if (!hash_count) {
f6ed5e2d 194 git__free(index);
1cd65991 195 return -1;
ec1d42b7
MS
196 }
197
198 /* then populate the index */
199 prev_val = ~0;
200 for (data = buffer + entries * RABIN_WINDOW - RABIN_WINDOW;
201 data >= buffer;
202 data -= RABIN_WINDOW) {
203 unsigned int val = 0;
204 for (i = 1; i <= RABIN_WINDOW; i++)
205 val = ((val << 8) | data[i]) ^ T[val >> RABIN_SHIFT];
206 if (val == prev_val) {
207 /* keep the lowest of consecutive identical blocks */
f6ed5e2d 208 entry[-1].ptr = data + RABIN_WINDOW;
ec1d42b7
MS
209 } else {
210 prev_val = val;
211 i = val & hmask;
f6ed5e2d
PK
212 entry->ptr = data + RABIN_WINDOW;
213 entry->val = val;
ec1d42b7
MS
214 entry->next = hash[i];
215 hash[i] = entry++;
216 hash_count[i]++;
217 }
218 }
219
220 /*
221 * Determine a limit on the number of entries in the same hash
f6ed5e2d 222 * bucket. This guard us against patological data sets causing
ec1d42b7
MS
223 * really bad hash distribution with most entries in the same hash
224 * bucket that would bring us to O(m*n) computing costs (m and n
225 * corresponding to reference and target buffer sizes).
226 *
227 * Make sure none of the hash buckets has more entries than
228 * we're willing to test. Otherwise we cull the entry list
229 * uniformly to still preserve a good repartition across
230 * the reference buffer.
231 */
232 for (i = 0; i < hsize; i++) {
f6ed5e2d 233 if (hash_count[i] < HASH_LIMIT)
ec1d42b7
MS
234 continue;
235
ec1d42b7 236 entry = hash[i];
ec1d42b7 237 do {
f6ed5e2d
PK
238 struct index_entry *keep = entry;
239 int skip = hash_count[i] / HASH_LIMIT / 2;
240 do {
241 entry = entry->next;
242 } while(--skip && entry);
243 keep->next = entry;
ec1d42b7
MS
244 } while (entry);
245 }
246 git__free(hash_count);
247
1cd65991
ET
248 *out = index;
249 return 0;
ec1d42b7
MS
250}
251
1cd65991 252void git_delta_index_free(git_delta_index *index)
ec1d42b7
MS
253{
254 git__free(index);
255}
256
1cd65991 257size_t git_delta_index_size(git_delta_index *index)
ec1d42b7 258{
c25aa7cd 259 GIT_ASSERT_ARG(index);
1cd65991
ET
260
261 return index->memsize;
ec1d42b7
MS
262}
263
264/*
265 * The maximum size for any opcode sequence, including the initial header
f6ed5e2d 266 * plus rabin window plus biggest copy.
ec1d42b7
MS
267 */
268#define MAX_OP_SIZE (5 + 5 + 1 + RABIN_WINDOW + 7)
269
1cd65991
ET
270int git_delta_create_from_index(
271 void **out,
272 size_t *out_len,
a8122b5d
RB
273 const struct git_delta_index *index,
274 const void *trg_buf,
1cd65991
ET
275 size_t trg_size,
276 size_t max_size)
ec1d42b7 277{
1cd65991 278 unsigned int i, bufpos, bufsize, moff, msize, val;
ec1d42b7
MS
279 int inscnt;
280 const unsigned char *ref_data, *ref_top, *data, *top;
1cd65991
ET
281 unsigned char *buf;
282
283 *out = NULL;
284 *out_len = 0;
ec1d42b7
MS
285
286 if (!trg_buf || !trg_size)
1cd65991 287 return 0;
ec1d42b7 288
ac3d33df
JK
289 if (index->src_size > UINT_MAX ||
290 trg_size > UINT_MAX ||
291 max_size > (UINT_MAX - MAX_OP_SIZE - 1)) {
292 git_error_set(GIT_ERROR_INVALID, "buffer sizes too large for delta processing");
293 return -1;
294 }
295
1cd65991
ET
296 bufpos = 0;
297 bufsize = 8192;
298 if (max_size && bufsize >= max_size)
299 bufsize = (unsigned int)(max_size + MAX_OP_SIZE + 1);
300 buf = git__malloc(bufsize);
ac3d33df 301 GIT_ERROR_CHECK_ALLOC(buf);
ec1d42b7
MS
302
303 /* store reference buffer size */
ac3d33df 304 i = (unsigned int)index->src_size;
ec1d42b7 305 while (i >= 0x80) {
1cd65991 306 buf[bufpos++] = i | 0x80;
ec1d42b7
MS
307 i >>= 7;
308 }
1cd65991 309 buf[bufpos++] = i;
ec1d42b7
MS
310
311 /* store target buffer size */
ac3d33df 312 i = (unsigned int)trg_size;
ec1d42b7 313 while (i >= 0x80) {
1cd65991 314 buf[bufpos++] = i | 0x80;
ec1d42b7
MS
315 i >>= 7;
316 }
1cd65991 317 buf[bufpos++] = i;
ec1d42b7
MS
318
319 ref_data = index->src_buf;
320 ref_top = ref_data + index->src_size;
321 data = trg_buf;
322 top = (const unsigned char *) trg_buf + trg_size;
323
1cd65991 324 bufpos++;
ec1d42b7
MS
325 val = 0;
326 for (i = 0; i < RABIN_WINDOW && data < top; i++, data++) {
1cd65991 327 buf[bufpos++] = *data;
ec1d42b7
MS
328 val = ((val << 8) | *data) ^ T[val >> RABIN_SHIFT];
329 }
330 inscnt = i;
331
332 moff = 0;
333 msize = 0;
334 while (data < top) {
335 if (msize < 4096) {
336 struct index_entry *entry;
337 val ^= U[data[-RABIN_WINDOW]];
338 val = ((val << 8) | *data) ^ T[val >> RABIN_SHIFT];
339 i = val & index->hash_mask;
f6ed5e2d 340 for (entry = index->hash[i]; entry; entry = entry->next) {
ec1d42b7
MS
341 const unsigned char *ref = entry->ptr;
342 const unsigned char *src = data;
a8122b5d 343 unsigned int ref_size = (unsigned int)(ref_top - ref);
ec1d42b7
MS
344 if (entry->val != val)
345 continue;
346 if (ref_size > (unsigned int)(top - src))
a8122b5d 347 ref_size = (unsigned int)(top - src);
ec1d42b7
MS
348 if (ref_size <= msize)
349 break;
350 while (ref_size-- && *src++ == *ref)
351 ref++;
352 if (msize < (unsigned int)(ref - entry->ptr)) {
353 /* this is our best match so far */
a8122b5d
RB
354 msize = (unsigned int)(ref - entry->ptr);
355 moff = (unsigned int)(entry->ptr - ref_data);
ec1d42b7
MS
356 if (msize >= 4096) /* good enough */
357 break;
358 }
359 }
360 }
361
362 if (msize < 4) {
363 if (!inscnt)
1cd65991
ET
364 bufpos++;
365 buf[bufpos++] = *data++;
ec1d42b7
MS
366 inscnt++;
367 if (inscnt == 0x7f) {
1cd65991 368 buf[bufpos - inscnt - 1] = inscnt;
ec1d42b7
MS
369 inscnt = 0;
370 }
371 msize = 0;
372 } else {
373 unsigned int left;
374 unsigned char *op;
375
376 if (inscnt) {
377 while (moff && ref_data[moff-1] == data[-1]) {
378 /* we can match one byte back */
379 msize++;
380 moff--;
381 data--;
1cd65991 382 bufpos--;
ec1d42b7
MS
383 if (--inscnt)
384 continue;
1cd65991 385 bufpos--; /* remove count slot */
ec1d42b7
MS
386 inscnt--; /* make it -1 */
387 break;
388 }
1cd65991 389 buf[bufpos - inscnt - 1] = inscnt;
ec1d42b7
MS
390 inscnt = 0;
391 }
392
393 /* A copy op is currently limited to 64KB (pack v2) */
394 left = (msize < 0x10000) ? 0 : (msize - 0x10000);
395 msize -= left;
396
1cd65991 397 op = buf + bufpos++;
ec1d42b7
MS
398 i = 0x80;
399
400 if (moff & 0x000000ff)
1cd65991 401 buf[bufpos++] = moff >> 0, i |= 0x01;
ec1d42b7 402 if (moff & 0x0000ff00)
1cd65991 403 buf[bufpos++] = moff >> 8, i |= 0x02;
ec1d42b7 404 if (moff & 0x00ff0000)
1cd65991 405 buf[bufpos++] = moff >> 16, i |= 0x04;
ec1d42b7 406 if (moff & 0xff000000)
1cd65991 407 buf[bufpos++] = moff >> 24, i |= 0x08;
ec1d42b7
MS
408
409 if (msize & 0x00ff)
1cd65991 410 buf[bufpos++] = msize >> 0, i |= 0x10;
ec1d42b7 411 if (msize & 0xff00)
1cd65991 412 buf[bufpos++] = msize >> 8, i |= 0x20;
ec1d42b7
MS
413
414 *op = i;
415
416 data += msize;
417 moff += msize;
418 msize = left;
419
420 if (msize < 4096) {
421 int j;
422 val = 0;
423 for (j = -RABIN_WINDOW; j < 0; j++)
424 val = ((val << 8) | data[j])
425 ^ T[val >> RABIN_SHIFT];
426 }
427 }
428
1cd65991
ET
429 if (bufpos >= bufsize - MAX_OP_SIZE) {
430 void *tmp = buf;
431 bufsize = bufsize * 3 / 2;
432 if (max_size && bufsize >= max_size)
ac3d33df 433 bufsize = (unsigned int)(max_size + MAX_OP_SIZE + 1);
1cd65991 434 if (max_size && bufpos > max_size)
ec1d42b7 435 break;
1cd65991
ET
436 buf = git__realloc(buf, bufsize);
437 if (!buf) {
ec1d42b7 438 git__free(tmp);
1cd65991 439 return -1;
ec1d42b7
MS
440 }
441 }
442 }
443
444 if (inscnt)
1cd65991 445 buf[bufpos - inscnt - 1] = inscnt;
ec1d42b7 446
1cd65991 447 if (max_size && bufpos > max_size) {
ac3d33df 448 git_error_set(GIT_ERROR_NOMEMORY, "delta would be larger than maximum size");
1cd65991
ET
449 git__free(buf);
450 return GIT_EBUFS;
ec1d42b7
MS
451 }
452
1cd65991
ET
453 *out_len = bufpos;
454 *out = buf;
455 return 0;
ec1d42b7 456}
6a2d2f8a
ET
457
458/*
459* Delta application was heavily cribbed from BinaryDelta.java in JGit, which
460* itself was heavily cribbed from <code>patch-delta.c</code> in the
461* GIT project. The original delta patching code was written by
462* Nicolas Pitre <nico@cam.org>.
463*/
464
465static int hdr_sz(
466 size_t *size,
467 const unsigned char **delta,
468 const unsigned char *end)
469{
470 const unsigned char *d = *delta;
471 size_t r = 0;
472 unsigned int c, shift = 0;
473
474 do {
1cd65991 475 if (d == end) {
ac3d33df 476 git_error_set(GIT_ERROR_INVALID, "truncated delta");
6a2d2f8a 477 return -1;
1cd65991
ET
478 }
479
6a2d2f8a
ET
480 c = *d++;
481 r |= (c & 0x7f) << shift;
482 shift += 7;
483 } while (c & 0x80);
484 *delta = d;
485 *size = r;
486 return 0;
487}
488
489int git_delta_read_header(
490 size_t *base_out,
491 size_t *result_out,
492 const unsigned char *delta,
493 size_t delta_len)
494{
495 const unsigned char *delta_end = delta + delta_len;
496 if ((hdr_sz(base_out, &delta, delta_end) < 0) ||
497 (hdr_sz(result_out, &delta, delta_end) < 0))
498 return -1;
499 return 0;
500}
501
502#define DELTA_HEADER_BUFFER_LEN 16
503int git_delta_read_header_fromstream(
504 size_t *base_sz, size_t *res_sz, git_packfile_stream *stream)
505{
506 static const size_t buffer_len = DELTA_HEADER_BUFFER_LEN;
507 unsigned char buffer[DELTA_HEADER_BUFFER_LEN];
508 const unsigned char *delta, *delta_end;
509 size_t len;
510 ssize_t read;
511
512 len = read = 0;
513 while (len < buffer_len) {
514 read = git_packfile_stream_read(stream, &buffer[len], buffer_len - len);
515
516 if (read == 0)
517 break;
518
519 if (read == GIT_EBUFS)
520 continue;
521
522 len += read;
523 }
524
525 delta = buffer;
526 delta_end = delta + len;
527 if ((hdr_sz(base_sz, &delta, delta_end) < 0) ||
528 (hdr_sz(res_sz, &delta, delta_end) < 0))
529 return -1;
530
531 return 0;
532}
533
534int git_delta_apply(
535 void **out,
536 size_t *out_len,
537 const unsigned char *base,
538 size_t base_len,
539 const unsigned char *delta,
540 size_t delta_len)
541{
542 const unsigned char *delta_end = delta + delta_len;
543 size_t base_sz, res_sz, alloc_sz;
544 unsigned char *res_dp;
545
546 *out = NULL;
547 *out_len = 0;
548
4b3ec53c
XL
549 /*
550 * Check that the base size matches the data we were given;
551 * if not we would underflow while accessing data from the
552 * base object, resulting in data corruption or segfault.
553 */
6a2d2f8a 554 if ((hdr_sz(&base_sz, &delta, delta_end) < 0) || (base_sz != base_len)) {
ac3d33df 555 git_error_set(GIT_ERROR_INVALID, "failed to apply delta: base size does not match given data");
6a2d2f8a
ET
556 return -1;
557 }
558
559 if (hdr_sz(&res_sz, &delta, delta_end) < 0) {
ac3d33df 560 git_error_set(GIT_ERROR_INVALID, "failed to apply delta: base size does not match given data");
6a2d2f8a
ET
561 return -1;
562 }
563
ac3d33df 564 GIT_ERROR_CHECK_ALLOC_ADD(&alloc_sz, res_sz, 1);
6a2d2f8a 565 res_dp = git__malloc(alloc_sz);
ac3d33df 566 GIT_ERROR_CHECK_ALLOC(res_dp);
6a2d2f8a
ET
567
568 res_dp[res_sz] = '\0';
569 *out = res_dp;
570 *out_len = res_sz;
571
572 while (delta < delta_end) {
573 unsigned char cmd = *delta++;
574 if (cmd & 0x80) {
4b3ec53c
XL
575 /* cmd is a copy instruction; copy from the base. */
576 size_t off = 0, len = 0, end;
577
578#define ADD_DELTA(o, shift) { if (delta < delta_end) (o) |= ((unsigned) *delta++ << shift); else goto fail; }
579 if (cmd & 0x01) ADD_DELTA(off, 0UL);
580 if (cmd & 0x02) ADD_DELTA(off, 8UL);
581 if (cmd & 0x04) ADD_DELTA(off, 16UL);
582 if (cmd & 0x08) ADD_DELTA(off, 24UL);
583
584 if (cmd & 0x10) ADD_DELTA(len, 0UL);
585 if (cmd & 0x20) ADD_DELTA(len, 8UL);
586 if (cmd & 0x40) ADD_DELTA(len, 16UL);
587 if (!len) len = 0x10000;
588#undef ADD_DELTA
589
590 if (GIT_ADD_SIZET_OVERFLOW(&end, off, len) ||
591 base_len < end || res_sz < len)
6a2d2f8a 592 goto fail;
4b3ec53c 593
6a2d2f8a
ET
594 memcpy(res_dp, base + off, len);
595 res_dp += len;
596 res_sz -= len;
597
4b3ec53c
XL
598 } else if (cmd) {
599 /*
600 * cmd is a literal insert instruction; copy from
601 * the delta stream itself.
602 */
6a2d2f8a
ET
603 if (delta_end - delta < cmd || res_sz < cmd)
604 goto fail;
605 memcpy(res_dp, delta, cmd);
606 delta += cmd;
607 res_dp += cmd;
608 res_sz -= cmd;
609
4b3ec53c
XL
610 } else {
611 /* cmd == 0 is reserved for future encodings. */
6a2d2f8a
ET
612 goto fail;
613 }
614 }
615
616 if (delta != delta_end || res_sz)
617 goto fail;
618 return 0;
619
620fail:
621 git__free(*out);
622
623 *out = NULL;
624 *out_len = 0;
625
ac3d33df 626 git_error_set(GIT_ERROR_INVALID, "failed to apply delta");
6a2d2f8a
ET
627 return -1;
628}