]>
git.proxmox.com Git - libgit2.git/blob - src/delta.c
2 * diff-delta.c: generate a delta between two buffers
4 * This code was greatly inspired by parts of LibXDiff from Davide Libenzi
5 * http://www.xmailserver.org/xdiff-lib.html
7 * Rewritten for GIT by Nicolas Pitre <nico@fluxnic.net>, (C) 2005-2007
9 * Modified for libgit2 by Michael Schubert <schu@schu.io>, (C) 2012
11 * This code is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License version 2 as
13 * published by the Free Software Foundation.
18 /* maximum hash entry list for the same hash bucket */
21 #define RABIN_SHIFT 23
22 #define RABIN_WINDOW 16
24 static const unsigned int T
[256] = {
25 0x00000000, 0xab59b4d1, 0x56b369a2, 0xfdeadd73, 0x063f6795, 0xad66d344,
26 0x508c0e37, 0xfbd5bae6, 0x0c7ecf2a, 0xa7277bfb, 0x5acda688, 0xf1941259,
27 0x0a41a8bf, 0xa1181c6e, 0x5cf2c11d, 0xf7ab75cc, 0x18fd9e54, 0xb3a42a85,
28 0x4e4ef7f6, 0xe5174327, 0x1ec2f9c1, 0xb59b4d10, 0x48719063, 0xe32824b2,
29 0x1483517e, 0xbfdae5af, 0x423038dc, 0xe9698c0d, 0x12bc36eb, 0xb9e5823a,
30 0x440f5f49, 0xef56eb98, 0x31fb3ca8, 0x9aa28879, 0x6748550a, 0xcc11e1db,
31 0x37c45b3d, 0x9c9defec, 0x6177329f, 0xca2e864e, 0x3d85f382, 0x96dc4753,
32 0x6b369a20, 0xc06f2ef1, 0x3bba9417, 0x90e320c6, 0x6d09fdb5, 0xc6504964,
33 0x2906a2fc, 0x825f162d, 0x7fb5cb5e, 0xd4ec7f8f, 0x2f39c569, 0x846071b8,
34 0x798aaccb, 0xd2d3181a, 0x25786dd6, 0x8e21d907, 0x73cb0474, 0xd892b0a5,
35 0x23470a43, 0x881ebe92, 0x75f463e1, 0xdeadd730, 0x63f67950, 0xc8afcd81,
36 0x354510f2, 0x9e1ca423, 0x65c91ec5, 0xce90aa14, 0x337a7767, 0x9823c3b6,
37 0x6f88b67a, 0xc4d102ab, 0x393bdfd8, 0x92626b09, 0x69b7d1ef, 0xc2ee653e,
38 0x3f04b84d, 0x945d0c9c, 0x7b0be704, 0xd05253d5, 0x2db88ea6, 0x86e13a77,
39 0x7d348091, 0xd66d3440, 0x2b87e933, 0x80de5de2, 0x7775282e, 0xdc2c9cff,
40 0x21c6418c, 0x8a9ff55d, 0x714a4fbb, 0xda13fb6a, 0x27f92619, 0x8ca092c8,
41 0x520d45f8, 0xf954f129, 0x04be2c5a, 0xafe7988b, 0x5432226d, 0xff6b96bc,
42 0x02814bcf, 0xa9d8ff1e, 0x5e738ad2, 0xf52a3e03, 0x08c0e370, 0xa39957a1,
43 0x584ced47, 0xf3155996, 0x0eff84e5, 0xa5a63034, 0x4af0dbac, 0xe1a96f7d,
44 0x1c43b20e, 0xb71a06df, 0x4ccfbc39, 0xe79608e8, 0x1a7cd59b, 0xb125614a,
45 0x468e1486, 0xedd7a057, 0x103d7d24, 0xbb64c9f5, 0x40b17313, 0xebe8c7c2,
46 0x16021ab1, 0xbd5bae60, 0x6cb54671, 0xc7ecf2a0, 0x3a062fd3, 0x915f9b02,
47 0x6a8a21e4, 0xc1d39535, 0x3c394846, 0x9760fc97, 0x60cb895b, 0xcb923d8a,
48 0x3678e0f9, 0x9d215428, 0x66f4eece, 0xcdad5a1f, 0x3047876c, 0x9b1e33bd,
49 0x7448d825, 0xdf116cf4, 0x22fbb187, 0x89a20556, 0x7277bfb0, 0xd92e0b61,
50 0x24c4d612, 0x8f9d62c3, 0x7836170f, 0xd36fa3de, 0x2e857ead, 0x85dcca7c,
51 0x7e09709a, 0xd550c44b, 0x28ba1938, 0x83e3ade9, 0x5d4e7ad9, 0xf617ce08,
52 0x0bfd137b, 0xa0a4a7aa, 0x5b711d4c, 0xf028a99d, 0x0dc274ee, 0xa69bc03f,
53 0x5130b5f3, 0xfa690122, 0x0783dc51, 0xacda6880, 0x570fd266, 0xfc5666b7,
54 0x01bcbbc4, 0xaae50f15, 0x45b3e48d, 0xeeea505c, 0x13008d2f, 0xb85939fe,
55 0x438c8318, 0xe8d537c9, 0x153feaba, 0xbe665e6b, 0x49cd2ba7, 0xe2949f76,
56 0x1f7e4205, 0xb427f6d4, 0x4ff24c32, 0xe4abf8e3, 0x19412590, 0xb2189141,
57 0x0f433f21, 0xa41a8bf0, 0x59f05683, 0xf2a9e252, 0x097c58b4, 0xa225ec65,
58 0x5fcf3116, 0xf49685c7, 0x033df00b, 0xa86444da, 0x558e99a9, 0xfed72d78,
59 0x0502979e, 0xae5b234f, 0x53b1fe3c, 0xf8e84aed, 0x17bea175, 0xbce715a4,
60 0x410dc8d7, 0xea547c06, 0x1181c6e0, 0xbad87231, 0x4732af42, 0xec6b1b93,
61 0x1bc06e5f, 0xb099da8e, 0x4d7307fd, 0xe62ab32c, 0x1dff09ca, 0xb6a6bd1b,
62 0x4b4c6068, 0xe015d4b9, 0x3eb80389, 0x95e1b758, 0x680b6a2b, 0xc352defa,
63 0x3887641c, 0x93ded0cd, 0x6e340dbe, 0xc56db96f, 0x32c6cca3, 0x999f7872,
64 0x6475a501, 0xcf2c11d0, 0x34f9ab36, 0x9fa01fe7, 0x624ac294, 0xc9137645,
65 0x26459ddd, 0x8d1c290c, 0x70f6f47f, 0xdbaf40ae, 0x207afa48, 0x8b234e99,
66 0x76c993ea, 0xdd90273b, 0x2a3b52f7, 0x8162e626, 0x7c883b55, 0xd7d18f84,
67 0x2c043562, 0x875d81b3, 0x7ab75cc0, 0xd1eee811
70 static const unsigned int U
[256] = {
71 0x00000000, 0x7eb5200d, 0x5633f4cb, 0x2886d4c6, 0x073e5d47, 0x798b7d4a,
72 0x510da98c, 0x2fb88981, 0x0e7cba8e, 0x70c99a83, 0x584f4e45, 0x26fa6e48,
73 0x0942e7c9, 0x77f7c7c4, 0x5f711302, 0x21c4330f, 0x1cf9751c, 0x624c5511,
74 0x4aca81d7, 0x347fa1da, 0x1bc7285b, 0x65720856, 0x4df4dc90, 0x3341fc9d,
75 0x1285cf92, 0x6c30ef9f, 0x44b63b59, 0x3a031b54, 0x15bb92d5, 0x6b0eb2d8,
76 0x4388661e, 0x3d3d4613, 0x39f2ea38, 0x4747ca35, 0x6fc11ef3, 0x11743efe,
77 0x3eccb77f, 0x40799772, 0x68ff43b4, 0x164a63b9, 0x378e50b6, 0x493b70bb,
78 0x61bda47d, 0x1f088470, 0x30b00df1, 0x4e052dfc, 0x6683f93a, 0x1836d937,
79 0x250b9f24, 0x5bbebf29, 0x73386bef, 0x0d8d4be2, 0x2235c263, 0x5c80e26e,
80 0x740636a8, 0x0ab316a5, 0x2b7725aa, 0x55c205a7, 0x7d44d161, 0x03f1f16c,
81 0x2c4978ed, 0x52fc58e0, 0x7a7a8c26, 0x04cfac2b, 0x73e5d470, 0x0d50f47d,
82 0x25d620bb, 0x5b6300b6, 0x74db8937, 0x0a6ea93a, 0x22e87dfc, 0x5c5d5df1,
83 0x7d996efe, 0x032c4ef3, 0x2baa9a35, 0x551fba38, 0x7aa733b9, 0x041213b4,
84 0x2c94c772, 0x5221e77f, 0x6f1ca16c, 0x11a98161, 0x392f55a7, 0x479a75aa,
85 0x6822fc2b, 0x1697dc26, 0x3e1108e0, 0x40a428ed, 0x61601be2, 0x1fd53bef,
86 0x3753ef29, 0x49e6cf24, 0x665e46a5, 0x18eb66a8, 0x306db26e, 0x4ed89263,
87 0x4a173e48, 0x34a21e45, 0x1c24ca83, 0x6291ea8e, 0x4d29630f, 0x339c4302,
88 0x1b1a97c4, 0x65afb7c9, 0x446b84c6, 0x3adea4cb, 0x1258700d, 0x6ced5000,
89 0x4355d981, 0x3de0f98c, 0x15662d4a, 0x6bd30d47, 0x56ee4b54, 0x285b6b59,
90 0x00ddbf9f, 0x7e689f92, 0x51d01613, 0x2f65361e, 0x07e3e2d8, 0x7956c2d5,
91 0x5892f1da, 0x2627d1d7, 0x0ea10511, 0x7014251c, 0x5facac9d, 0x21198c90,
92 0x099f5856, 0x772a785b, 0x4c921c31, 0x32273c3c, 0x1aa1e8fa, 0x6414c8f7,
93 0x4bac4176, 0x3519617b, 0x1d9fb5bd, 0x632a95b0, 0x42eea6bf, 0x3c5b86b2,
94 0x14dd5274, 0x6a687279, 0x45d0fbf8, 0x3b65dbf5, 0x13e30f33, 0x6d562f3e,
95 0x506b692d, 0x2ede4920, 0x06589de6, 0x78edbdeb, 0x5755346a, 0x29e01467,
96 0x0166c0a1, 0x7fd3e0ac, 0x5e17d3a3, 0x20a2f3ae, 0x08242768, 0x76910765,
97 0x59298ee4, 0x279caee9, 0x0f1a7a2f, 0x71af5a22, 0x7560f609, 0x0bd5d604,
98 0x235302c2, 0x5de622cf, 0x725eab4e, 0x0ceb8b43, 0x246d5f85, 0x5ad87f88,
99 0x7b1c4c87, 0x05a96c8a, 0x2d2fb84c, 0x539a9841, 0x7c2211c0, 0x029731cd,
100 0x2a11e50b, 0x54a4c506, 0x69998315, 0x172ca318, 0x3faa77de, 0x411f57d3,
101 0x6ea7de52, 0x1012fe5f, 0x38942a99, 0x46210a94, 0x67e5399b, 0x19501996,
102 0x31d6cd50, 0x4f63ed5d, 0x60db64dc, 0x1e6e44d1, 0x36e89017, 0x485db01a,
103 0x3f77c841, 0x41c2e84c, 0x69443c8a, 0x17f11c87, 0x38499506, 0x46fcb50b,
104 0x6e7a61cd, 0x10cf41c0, 0x310b72cf, 0x4fbe52c2, 0x67388604, 0x198da609,
105 0x36352f88, 0x48800f85, 0x6006db43, 0x1eb3fb4e, 0x238ebd5d, 0x5d3b9d50,
106 0x75bd4996, 0x0b08699b, 0x24b0e01a, 0x5a05c017, 0x728314d1, 0x0c3634dc,
107 0x2df207d3, 0x534727de, 0x7bc1f318, 0x0574d315, 0x2acc5a94, 0x54797a99,
108 0x7cffae5f, 0x024a8e52, 0x06852279, 0x78300274, 0x50b6d6b2, 0x2e03f6bf,
109 0x01bb7f3e, 0x7f0e5f33, 0x57888bf5, 0x293dabf8, 0x08f998f7, 0x764cb8fa,
110 0x5eca6c3c, 0x207f4c31, 0x0fc7c5b0, 0x7172e5bd, 0x59f4317b, 0x27411176,
111 0x1a7c5765, 0x64c97768, 0x4c4fa3ae, 0x32fa83a3, 0x1d420a22, 0x63f72a2f,
112 0x4b71fee9, 0x35c4dee4, 0x1400edeb, 0x6ab5cde6, 0x42331920, 0x3c86392d,
113 0x133eb0ac, 0x6d8b90a1, 0x450d4467, 0x3bb8646a
117 const unsigned char *ptr
;
121 struct unpacked_index_entry
{
122 struct index_entry entry
;
123 struct unpacked_index_entry
*next
;
126 struct git_delta_index
{
127 unsigned long memsize
;
129 unsigned long src_size
;
130 unsigned int hash_mask
;
131 struct index_entry
*hash
[GIT_FLEX_ARRAY
];
134 struct git_delta_index
*
135 git_delta_create_index(const void *buf
, unsigned long bufsize
)
137 unsigned int i
, hsize
, hmask
, entries
, prev_val
, *hash_count
;
138 const unsigned char *data
, *buffer
= buf
;
139 struct git_delta_index
*index
;
140 struct unpacked_index_entry
*entry
, **hash
;
141 struct index_entry
*packed_entry
, **packed_hash
;
143 unsigned long memsize
;
145 if (!buf
|| !bufsize
)
148 /* Determine index hash size. Note that indexing skips the
149 first byte to allow for optimizing the Rabin's polynomial
150 initialization in create_delta(). */
151 entries
= (unsigned int)(bufsize
- 1) / RABIN_WINDOW
;
152 if (bufsize
>= 0xffffffffUL
) {
154 * Current delta format can't encode offsets into
155 * reference buffer with more than 32 bits.
157 entries
= 0xfffffffeU
/ RABIN_WINDOW
;
160 for (i
= 4; (1u << i
) < hsize
&& i
< 31; i
++);
164 /* allocate lookup index */
165 memsize
= sizeof(*hash
) * hsize
+
166 sizeof(*entry
) * entries
;
167 mem
= git__malloc(memsize
);
174 memset(hash
, 0, hsize
* sizeof(*hash
));
176 /* allocate an array to count hash entries */
177 hash_count
= calloc(hsize
, sizeof(*hash_count
));
183 /* then populate the index */
185 for (data
= buffer
+ entries
* RABIN_WINDOW
- RABIN_WINDOW
;
187 data
-= RABIN_WINDOW
) {
188 unsigned int val
= 0;
189 for (i
= 1; i
<= RABIN_WINDOW
; i
++)
190 val
= ((val
<< 8) | data
[i
]) ^ T
[val
>> RABIN_SHIFT
];
191 if (val
== prev_val
) {
192 /* keep the lowest of consecutive identical blocks */
193 entry
[-1].entry
.ptr
= data
+ RABIN_WINDOW
;
198 entry
->entry
.ptr
= data
+ RABIN_WINDOW
;
199 entry
->entry
.val
= val
;
200 entry
->next
= hash
[i
];
207 * Determine a limit on the number of entries in the same hash
208 * bucket. This guards us against pathological data sets causing
209 * really bad hash distribution with most entries in the same hash
210 * bucket that would bring us to O(m*n) computing costs (m and n
211 * corresponding to reference and target buffer sizes).
213 * Make sure none of the hash buckets has more entries than
214 * we're willing to test. Otherwise we cull the entry list
215 * uniformly to still preserve a good repartition across
216 * the reference buffer.
218 for (i
= 0; i
< hsize
; i
++) {
221 if (hash_count
[i
] <= HASH_LIMIT
)
224 /* We leave exactly HASH_LIMIT entries in the bucket */
225 entries
-= hash_count
[i
] - HASH_LIMIT
;
231 * Assume that this loop is gone through exactly
232 * HASH_LIMIT times and is entered and left with
233 * acc==0. So the first statement in the loop
234 * contributes (hash_count[i]-HASH_LIMIT)*HASH_LIMIT
235 * to the accumulator, and the inner loop consequently
236 * is run (hash_count[i]-HASH_LIMIT) times, removing
237 * one element from the list each time. Since acc
238 * balances out to 0 at the final run, the inner loop
239 * body can't be left with entry==NULL. So we indeed
240 * encounter entry==NULL in the outer loop only.
243 acc
+= hash_count
[i
] - HASH_LIMIT
;
245 struct unpacked_index_entry
*keep
= entry
;
250 keep
->next
= entry
->next
;
255 git__free(hash_count
);
258 * Now create the packed index in array form
259 * rather than linked lists.
261 memsize
= sizeof(*index
)
262 + sizeof(*packed_hash
) * (hsize
+1)
263 + sizeof(*packed_entry
) * entries
;
264 mem
= git__malloc(memsize
);
271 index
->memsize
= memsize
;
272 index
->src_buf
= buf
;
273 index
->src_size
= bufsize
;
274 index
->hash_mask
= hmask
;
278 mem
= packed_hash
+ (hsize
+1);
281 for (i
= 0; i
< hsize
; i
++) {
283 * Coalesce all entries belonging to one linked list
284 * into consecutive array entries.
286 packed_hash
[i
] = packed_entry
;
287 for (entry
= hash
[i
]; entry
; entry
= entry
->next
)
288 *packed_entry
++ = entry
->entry
;
291 /* Sentinel value to indicate the length of the last hash bucket */
292 packed_hash
[hsize
] = packed_entry
;
294 assert(packed_entry
- (struct index_entry
*)mem
== entries
);
300 void git_delta_free_index(struct git_delta_index
*index
)
305 unsigned long git_delta_sizeof_index(struct git_delta_index
*index
)
308 return index
->memsize
;
314 * The maximum size for any opcode sequence, including the initial header
315 * plus Rabin window plus biggest copy.
317 #define MAX_OP_SIZE (5 + 5 + 1 + RABIN_WINDOW + 7)
321 const struct git_delta_index
*index
,
323 unsigned long trg_size
,
324 unsigned long *delta_size
,
325 unsigned long max_size
)
327 unsigned int i
, outpos
, outsize
, moff
, msize
, val
;
329 const unsigned char *ref_data
, *ref_top
, *data
, *top
;
332 if (!trg_buf
|| !trg_size
)
337 if (max_size
&& outsize
>= max_size
)
338 outsize
= (unsigned int)(max_size
+ MAX_OP_SIZE
+ 1);
339 out
= git__malloc(outsize
);
343 /* store reference buffer size */
346 out
[outpos
++] = i
| 0x80;
351 /* store target buffer size */
354 out
[outpos
++] = i
| 0x80;
359 ref_data
= index
->src_buf
;
360 ref_top
= ref_data
+ index
->src_size
;
362 top
= (const unsigned char *) trg_buf
+ trg_size
;
366 for (i
= 0; i
< RABIN_WINDOW
&& data
< top
; i
++, data
++) {
367 out
[outpos
++] = *data
;
368 val
= ((val
<< 8) | *data
) ^ T
[val
>> RABIN_SHIFT
];
376 struct index_entry
*entry
;
377 val
^= U
[data
[-RABIN_WINDOW
]];
378 val
= ((val
<< 8) | *data
) ^ T
[val
>> RABIN_SHIFT
];
379 i
= val
& index
->hash_mask
;
380 for (entry
= index
->hash
[i
]; entry
< index
->hash
[i
+1]; entry
++) {
381 const unsigned char *ref
= entry
->ptr
;
382 const unsigned char *src
= data
;
383 unsigned int ref_size
= (unsigned int)(ref_top
- ref
);
384 if (entry
->val
!= val
)
386 if (ref_size
> (unsigned int)(top
- src
))
387 ref_size
= (unsigned int)(top
- src
);
388 if (ref_size
<= msize
)
390 while (ref_size
-- && *src
++ == *ref
)
392 if (msize
< (unsigned int)(ref
- entry
->ptr
)) {
393 /* this is our best match so far */
394 msize
= (unsigned int)(ref
- entry
->ptr
);
395 moff
= (unsigned int)(entry
->ptr
- ref_data
);
396 if (msize
>= 4096) /* good enough */
405 out
[outpos
++] = *data
++;
407 if (inscnt
== 0x7f) {
408 out
[outpos
- inscnt
- 1] = inscnt
;
417 while (moff
&& ref_data
[moff
-1] == data
[-1]) {
418 /* we can match one byte back */
425 outpos
--; /* remove count slot */
426 inscnt
--; /* make it -1 */
429 out
[outpos
- inscnt
- 1] = inscnt
;
433 /* A copy op is currently limited to 64KB (pack v2) */
434 left
= (msize
< 0x10000) ? 0 : (msize
- 0x10000);
440 if (moff
& 0x000000ff)
441 out
[outpos
++] = moff
>> 0, i
|= 0x01;
442 if (moff
& 0x0000ff00)
443 out
[outpos
++] = moff
>> 8, i
|= 0x02;
444 if (moff
& 0x00ff0000)
445 out
[outpos
++] = moff
>> 16, i
|= 0x04;
446 if (moff
& 0xff000000)
447 out
[outpos
++] = moff
>> 24, i
|= 0x08;
450 out
[outpos
++] = msize
>> 0, i
|= 0x10;
452 out
[outpos
++] = msize
>> 8, i
|= 0x20;
463 for (j
= -RABIN_WINDOW
; j
< 0; j
++)
464 val
= ((val
<< 8) | data
[j
])
465 ^ T
[val
>> RABIN_SHIFT
];
469 if (outpos
>= outsize
- MAX_OP_SIZE
) {
471 outsize
= outsize
* 3 / 2;
472 if (max_size
&& outsize
>= max_size
)
473 outsize
= max_size
+ MAX_OP_SIZE
+ 1;
474 if (max_size
&& outpos
> max_size
)
476 out
= git__realloc(out
, outsize
);
485 out
[outpos
- inscnt
- 1] = inscnt
;
487 if (max_size
&& outpos
> max_size
) {
492 *delta_size
= outpos
;