]>
git.proxmox.com Git - libgit2.git/blob - src/delta.c
2 * Copyright (C) the libgit2 contributors. All rights reserved.
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
10 /* maximum hash entry list for the same hash bucket */
13 #define RABIN_SHIFT 23
14 #define RABIN_WINDOW 16
16 static const unsigned int T
[256] = {
17 0x00000000, 0xab59b4d1, 0x56b369a2, 0xfdeadd73, 0x063f6795, 0xad66d344,
18 0x508c0e37, 0xfbd5bae6, 0x0c7ecf2a, 0xa7277bfb, 0x5acda688, 0xf1941259,
19 0x0a41a8bf, 0xa1181c6e, 0x5cf2c11d, 0xf7ab75cc, 0x18fd9e54, 0xb3a42a85,
20 0x4e4ef7f6, 0xe5174327, 0x1ec2f9c1, 0xb59b4d10, 0x48719063, 0xe32824b2,
21 0x1483517e, 0xbfdae5af, 0x423038dc, 0xe9698c0d, 0x12bc36eb, 0xb9e5823a,
22 0x440f5f49, 0xef56eb98, 0x31fb3ca8, 0x9aa28879, 0x6748550a, 0xcc11e1db,
23 0x37c45b3d, 0x9c9defec, 0x6177329f, 0xca2e864e, 0x3d85f382, 0x96dc4753,
24 0x6b369a20, 0xc06f2ef1, 0x3bba9417, 0x90e320c6, 0x6d09fdb5, 0xc6504964,
25 0x2906a2fc, 0x825f162d, 0x7fb5cb5e, 0xd4ec7f8f, 0x2f39c569, 0x846071b8,
26 0x798aaccb, 0xd2d3181a, 0x25786dd6, 0x8e21d907, 0x73cb0474, 0xd892b0a5,
27 0x23470a43, 0x881ebe92, 0x75f463e1, 0xdeadd730, 0x63f67950, 0xc8afcd81,
28 0x354510f2, 0x9e1ca423, 0x65c91ec5, 0xce90aa14, 0x337a7767, 0x9823c3b6,
29 0x6f88b67a, 0xc4d102ab, 0x393bdfd8, 0x92626b09, 0x69b7d1ef, 0xc2ee653e,
30 0x3f04b84d, 0x945d0c9c, 0x7b0be704, 0xd05253d5, 0x2db88ea6, 0x86e13a77,
31 0x7d348091, 0xd66d3440, 0x2b87e933, 0x80de5de2, 0x7775282e, 0xdc2c9cff,
32 0x21c6418c, 0x8a9ff55d, 0x714a4fbb, 0xda13fb6a, 0x27f92619, 0x8ca092c8,
33 0x520d45f8, 0xf954f129, 0x04be2c5a, 0xafe7988b, 0x5432226d, 0xff6b96bc,
34 0x02814bcf, 0xa9d8ff1e, 0x5e738ad2, 0xf52a3e03, 0x08c0e370, 0xa39957a1,
35 0x584ced47, 0xf3155996, 0x0eff84e5, 0xa5a63034, 0x4af0dbac, 0xe1a96f7d,
36 0x1c43b20e, 0xb71a06df, 0x4ccfbc39, 0xe79608e8, 0x1a7cd59b, 0xb125614a,
37 0x468e1486, 0xedd7a057, 0x103d7d24, 0xbb64c9f5, 0x40b17313, 0xebe8c7c2,
38 0x16021ab1, 0xbd5bae60, 0x6cb54671, 0xc7ecf2a0, 0x3a062fd3, 0x915f9b02,
39 0x6a8a21e4, 0xc1d39535, 0x3c394846, 0x9760fc97, 0x60cb895b, 0xcb923d8a,
40 0x3678e0f9, 0x9d215428, 0x66f4eece, 0xcdad5a1f, 0x3047876c, 0x9b1e33bd,
41 0x7448d825, 0xdf116cf4, 0x22fbb187, 0x89a20556, 0x7277bfb0, 0xd92e0b61,
42 0x24c4d612, 0x8f9d62c3, 0x7836170f, 0xd36fa3de, 0x2e857ead, 0x85dcca7c,
43 0x7e09709a, 0xd550c44b, 0x28ba1938, 0x83e3ade9, 0x5d4e7ad9, 0xf617ce08,
44 0x0bfd137b, 0xa0a4a7aa, 0x5b711d4c, 0xf028a99d, 0x0dc274ee, 0xa69bc03f,
45 0x5130b5f3, 0xfa690122, 0x0783dc51, 0xacda6880, 0x570fd266, 0xfc5666b7,
46 0x01bcbbc4, 0xaae50f15, 0x45b3e48d, 0xeeea505c, 0x13008d2f, 0xb85939fe,
47 0x438c8318, 0xe8d537c9, 0x153feaba, 0xbe665e6b, 0x49cd2ba7, 0xe2949f76,
48 0x1f7e4205, 0xb427f6d4, 0x4ff24c32, 0xe4abf8e3, 0x19412590, 0xb2189141,
49 0x0f433f21, 0xa41a8bf0, 0x59f05683, 0xf2a9e252, 0x097c58b4, 0xa225ec65,
50 0x5fcf3116, 0xf49685c7, 0x033df00b, 0xa86444da, 0x558e99a9, 0xfed72d78,
51 0x0502979e, 0xae5b234f, 0x53b1fe3c, 0xf8e84aed, 0x17bea175, 0xbce715a4,
52 0x410dc8d7, 0xea547c06, 0x1181c6e0, 0xbad87231, 0x4732af42, 0xec6b1b93,
53 0x1bc06e5f, 0xb099da8e, 0x4d7307fd, 0xe62ab32c, 0x1dff09ca, 0xb6a6bd1b,
54 0x4b4c6068, 0xe015d4b9, 0x3eb80389, 0x95e1b758, 0x680b6a2b, 0xc352defa,
55 0x3887641c, 0x93ded0cd, 0x6e340dbe, 0xc56db96f, 0x32c6cca3, 0x999f7872,
56 0x6475a501, 0xcf2c11d0, 0x34f9ab36, 0x9fa01fe7, 0x624ac294, 0xc9137645,
57 0x26459ddd, 0x8d1c290c, 0x70f6f47f, 0xdbaf40ae, 0x207afa48, 0x8b234e99,
58 0x76c993ea, 0xdd90273b, 0x2a3b52f7, 0x8162e626, 0x7c883b55, 0xd7d18f84,
59 0x2c043562, 0x875d81b3, 0x7ab75cc0, 0xd1eee811
62 static const unsigned int U
[256] = {
63 0x00000000, 0x7eb5200d, 0x5633f4cb, 0x2886d4c6, 0x073e5d47, 0x798b7d4a,
64 0x510da98c, 0x2fb88981, 0x0e7cba8e, 0x70c99a83, 0x584f4e45, 0x26fa6e48,
65 0x0942e7c9, 0x77f7c7c4, 0x5f711302, 0x21c4330f, 0x1cf9751c, 0x624c5511,
66 0x4aca81d7, 0x347fa1da, 0x1bc7285b, 0x65720856, 0x4df4dc90, 0x3341fc9d,
67 0x1285cf92, 0x6c30ef9f, 0x44b63b59, 0x3a031b54, 0x15bb92d5, 0x6b0eb2d8,
68 0x4388661e, 0x3d3d4613, 0x39f2ea38, 0x4747ca35, 0x6fc11ef3, 0x11743efe,
69 0x3eccb77f, 0x40799772, 0x68ff43b4, 0x164a63b9, 0x378e50b6, 0x493b70bb,
70 0x61bda47d, 0x1f088470, 0x30b00df1, 0x4e052dfc, 0x6683f93a, 0x1836d937,
71 0x250b9f24, 0x5bbebf29, 0x73386bef, 0x0d8d4be2, 0x2235c263, 0x5c80e26e,
72 0x740636a8, 0x0ab316a5, 0x2b7725aa, 0x55c205a7, 0x7d44d161, 0x03f1f16c,
73 0x2c4978ed, 0x52fc58e0, 0x7a7a8c26, 0x04cfac2b, 0x73e5d470, 0x0d50f47d,
74 0x25d620bb, 0x5b6300b6, 0x74db8937, 0x0a6ea93a, 0x22e87dfc, 0x5c5d5df1,
75 0x7d996efe, 0x032c4ef3, 0x2baa9a35, 0x551fba38, 0x7aa733b9, 0x041213b4,
76 0x2c94c772, 0x5221e77f, 0x6f1ca16c, 0x11a98161, 0x392f55a7, 0x479a75aa,
77 0x6822fc2b, 0x1697dc26, 0x3e1108e0, 0x40a428ed, 0x61601be2, 0x1fd53bef,
78 0x3753ef29, 0x49e6cf24, 0x665e46a5, 0x18eb66a8, 0x306db26e, 0x4ed89263,
79 0x4a173e48, 0x34a21e45, 0x1c24ca83, 0x6291ea8e, 0x4d29630f, 0x339c4302,
80 0x1b1a97c4, 0x65afb7c9, 0x446b84c6, 0x3adea4cb, 0x1258700d, 0x6ced5000,
81 0x4355d981, 0x3de0f98c, 0x15662d4a, 0x6bd30d47, 0x56ee4b54, 0x285b6b59,
82 0x00ddbf9f, 0x7e689f92, 0x51d01613, 0x2f65361e, 0x07e3e2d8, 0x7956c2d5,
83 0x5892f1da, 0x2627d1d7, 0x0ea10511, 0x7014251c, 0x5facac9d, 0x21198c90,
84 0x099f5856, 0x772a785b, 0x4c921c31, 0x32273c3c, 0x1aa1e8fa, 0x6414c8f7,
85 0x4bac4176, 0x3519617b, 0x1d9fb5bd, 0x632a95b0, 0x42eea6bf, 0x3c5b86b2,
86 0x14dd5274, 0x6a687279, 0x45d0fbf8, 0x3b65dbf5, 0x13e30f33, 0x6d562f3e,
87 0x506b692d, 0x2ede4920, 0x06589de6, 0x78edbdeb, 0x5755346a, 0x29e01467,
88 0x0166c0a1, 0x7fd3e0ac, 0x5e17d3a3, 0x20a2f3ae, 0x08242768, 0x76910765,
89 0x59298ee4, 0x279caee9, 0x0f1a7a2f, 0x71af5a22, 0x7560f609, 0x0bd5d604,
90 0x235302c2, 0x5de622cf, 0x725eab4e, 0x0ceb8b43, 0x246d5f85, 0x5ad87f88,
91 0x7b1c4c87, 0x05a96c8a, 0x2d2fb84c, 0x539a9841, 0x7c2211c0, 0x029731cd,
92 0x2a11e50b, 0x54a4c506, 0x69998315, 0x172ca318, 0x3faa77de, 0x411f57d3,
93 0x6ea7de52, 0x1012fe5f, 0x38942a99, 0x46210a94, 0x67e5399b, 0x19501996,
94 0x31d6cd50, 0x4f63ed5d, 0x60db64dc, 0x1e6e44d1, 0x36e89017, 0x485db01a,
95 0x3f77c841, 0x41c2e84c, 0x69443c8a, 0x17f11c87, 0x38499506, 0x46fcb50b,
96 0x6e7a61cd, 0x10cf41c0, 0x310b72cf, 0x4fbe52c2, 0x67388604, 0x198da609,
97 0x36352f88, 0x48800f85, 0x6006db43, 0x1eb3fb4e, 0x238ebd5d, 0x5d3b9d50,
98 0x75bd4996, 0x0b08699b, 0x24b0e01a, 0x5a05c017, 0x728314d1, 0x0c3634dc,
99 0x2df207d3, 0x534727de, 0x7bc1f318, 0x0574d315, 0x2acc5a94, 0x54797a99,
100 0x7cffae5f, 0x024a8e52, 0x06852279, 0x78300274, 0x50b6d6b2, 0x2e03f6bf,
101 0x01bb7f3e, 0x7f0e5f33, 0x57888bf5, 0x293dabf8, 0x08f998f7, 0x764cb8fa,
102 0x5eca6c3c, 0x207f4c31, 0x0fc7c5b0, 0x7172e5bd, 0x59f4317b, 0x27411176,
103 0x1a7c5765, 0x64c97768, 0x4c4fa3ae, 0x32fa83a3, 0x1d420a22, 0x63f72a2f,
104 0x4b71fee9, 0x35c4dee4, 0x1400edeb, 0x6ab5cde6, 0x42331920, 0x3c86392d,
105 0x133eb0ac, 0x6d8b90a1, 0x450d4467, 0x3bb8646a
109 const unsigned char *ptr
;
111 struct index_entry
*next
;
114 struct git_delta_index
{
115 unsigned long memsize
;
117 unsigned long src_size
;
118 unsigned int hash_mask
;
119 struct index_entry
*hash
[GIT_FLEX_ARRAY
];
122 struct git_delta_index
*
123 git_delta_create_index(const void *buf
, unsigned long bufsize
)
125 unsigned int i
, hsize
, hmask
, entries
, prev_val
, *hash_count
;
126 const unsigned char *data
, *buffer
= buf
;
127 struct git_delta_index
*index
;
128 struct index_entry
*entry
, **hash
;
130 unsigned long memsize
;
132 if (!buf
|| !bufsize
)
135 /* Determine index hash size. Note that indexing skips the
136 first byte to allow for optimizing the rabin polynomial
137 initialization in create_delta(). */
138 entries
= (unsigned int)(bufsize
- 1) / RABIN_WINDOW
;
139 if (bufsize
>= 0xffffffffUL
) {
141 * Current delta format can't encode offsets into
142 * reference buffer with more than 32 bits.
144 entries
= 0xfffffffeU
/ RABIN_WINDOW
;
147 for (i
= 4; (1u << i
) < hsize
&& i
< 31; i
++);
151 /* allocate lookup index */
152 memsize
= sizeof(*index
) +
153 sizeof(*hash
) * hsize
+
154 sizeof(*entry
) * entries
;
155 mem
= git__malloc(memsize
);
164 index
->memsize
= memsize
;
165 index
->src_buf
= buf
;
166 index
->src_size
= bufsize
;
167 index
->hash_mask
= hmask
;
168 memset(hash
, 0, hsize
* sizeof(*hash
));
170 /* allocate an array to count hash entries */
171 hash_count
= git__calloc(hsize
, sizeof(*hash_count
));
177 /* then populate the index */
179 for (data
= buffer
+ entries
* RABIN_WINDOW
- RABIN_WINDOW
;
181 data
-= RABIN_WINDOW
) {
182 unsigned int val
= 0;
183 for (i
= 1; i
<= RABIN_WINDOW
; i
++)
184 val
= ((val
<< 8) | data
[i
]) ^ T
[val
>> RABIN_SHIFT
];
185 if (val
== prev_val
) {
186 /* keep the lowest of consecutive identical blocks */
187 entry
[-1].ptr
= data
+ RABIN_WINDOW
;
191 entry
->ptr
= data
+ RABIN_WINDOW
;
193 entry
->next
= hash
[i
];
200 * Determine a limit on the number of entries in the same hash
201 * bucket. This guard us against patological data sets causing
202 * really bad hash distribution with most entries in the same hash
203 * bucket that would bring us to O(m*n) computing costs (m and n
204 * corresponding to reference and target buffer sizes).
206 * Make sure none of the hash buckets has more entries than
207 * we're willing to test. Otherwise we cull the entry list
208 * uniformly to still preserve a good repartition across
209 * the reference buffer.
211 for (i
= 0; i
< hsize
; i
++) {
212 if (hash_count
[i
] < HASH_LIMIT
)
217 struct index_entry
*keep
= entry
;
218 int skip
= hash_count
[i
] / HASH_LIMIT
/ 2;
221 } while(--skip
&& entry
);
225 git__free(hash_count
);
230 void git_delta_free_index(struct git_delta_index
*index
)
235 unsigned long git_delta_sizeof_index(struct git_delta_index
*index
)
238 return index
->memsize
;
244 * The maximum size for any opcode sequence, including the initial header
245 * plus rabin window plus biggest copy.
247 #define MAX_OP_SIZE (5 + 5 + 1 + RABIN_WINDOW + 7)
251 const struct git_delta_index
*index
,
253 unsigned long trg_size
,
254 unsigned long *delta_size
,
255 unsigned long max_size
)
257 unsigned int i
, outpos
, outsize
, moff
, msize
, val
;
259 const unsigned char *ref_data
, *ref_top
, *data
, *top
;
262 if (!trg_buf
|| !trg_size
)
267 if (max_size
&& outsize
>= max_size
)
268 outsize
= (unsigned int)(max_size
+ MAX_OP_SIZE
+ 1);
269 out
= git__malloc(outsize
);
273 /* store reference buffer size */
276 out
[outpos
++] = i
| 0x80;
281 /* store target buffer size */
284 out
[outpos
++] = i
| 0x80;
289 ref_data
= index
->src_buf
;
290 ref_top
= ref_data
+ index
->src_size
;
292 top
= (const unsigned char *) trg_buf
+ trg_size
;
296 for (i
= 0; i
< RABIN_WINDOW
&& data
< top
; i
++, data
++) {
297 out
[outpos
++] = *data
;
298 val
= ((val
<< 8) | *data
) ^ T
[val
>> RABIN_SHIFT
];
306 struct index_entry
*entry
;
307 val
^= U
[data
[-RABIN_WINDOW
]];
308 val
= ((val
<< 8) | *data
) ^ T
[val
>> RABIN_SHIFT
];
309 i
= val
& index
->hash_mask
;
310 for (entry
= index
->hash
[i
]; entry
; entry
= entry
->next
) {
311 const unsigned char *ref
= entry
->ptr
;
312 const unsigned char *src
= data
;
313 unsigned int ref_size
= (unsigned int)(ref_top
- ref
);
314 if (entry
->val
!= val
)
316 if (ref_size
> (unsigned int)(top
- src
))
317 ref_size
= (unsigned int)(top
- src
);
318 if (ref_size
<= msize
)
320 while (ref_size
-- && *src
++ == *ref
)
322 if (msize
< (unsigned int)(ref
- entry
->ptr
)) {
323 /* this is our best match so far */
324 msize
= (unsigned int)(ref
- entry
->ptr
);
325 moff
= (unsigned int)(entry
->ptr
- ref_data
);
326 if (msize
>= 4096) /* good enough */
335 out
[outpos
++] = *data
++;
337 if (inscnt
== 0x7f) {
338 out
[outpos
- inscnt
- 1] = inscnt
;
347 while (moff
&& ref_data
[moff
-1] == data
[-1]) {
348 /* we can match one byte back */
355 outpos
--; /* remove count slot */
356 inscnt
--; /* make it -1 */
359 out
[outpos
- inscnt
- 1] = inscnt
;
363 /* A copy op is currently limited to 64KB (pack v2) */
364 left
= (msize
< 0x10000) ? 0 : (msize
- 0x10000);
370 if (moff
& 0x000000ff)
371 out
[outpos
++] = moff
>> 0, i
|= 0x01;
372 if (moff
& 0x0000ff00)
373 out
[outpos
++] = moff
>> 8, i
|= 0x02;
374 if (moff
& 0x00ff0000)
375 out
[outpos
++] = moff
>> 16, i
|= 0x04;
376 if (moff
& 0xff000000)
377 out
[outpos
++] = moff
>> 24, i
|= 0x08;
380 out
[outpos
++] = msize
>> 0, i
|= 0x10;
382 out
[outpos
++] = msize
>> 8, i
|= 0x20;
393 for (j
= -RABIN_WINDOW
; j
< 0; j
++)
394 val
= ((val
<< 8) | data
[j
])
395 ^ T
[val
>> RABIN_SHIFT
];
399 if (outpos
>= outsize
- MAX_OP_SIZE
) {
401 outsize
= outsize
* 3 / 2;
402 if (max_size
&& outsize
>= max_size
)
403 outsize
= max_size
+ MAX_OP_SIZE
+ 1;
404 if (max_size
&& outpos
> max_size
)
406 out
= git__realloc(out
, outsize
);
415 out
[outpos
- inscnt
- 1] = inscnt
;
417 if (max_size
&& outpos
> max_size
) {
422 *delta_size
= outpos
;