]>
Commit | Line | Data |
---|---|---|
f67539c2 TL |
1 | /* |
2 | * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. | |
3 | * All rights reserved. | |
4 | * | |
5 | * This source code is licensed under both the BSD-style license (found in the | |
6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found | |
7 | * in the COPYING file in the root directory of this source tree). | |
8 | * You may select, at your option, one of the above-listed licenses. | |
9 | */ | |
10 | ||
11 | /*-************************************* | |
12 | * Dependencies | |
13 | ***************************************/ | |
14 | ||
15 | /* Currently relies on qsort when combining contiguous matches. This can probabily | |
16 | * be avoided but would require changes to the algorithm. The qsort is far from | |
17 | * the bottleneck in this algorithm even for medium sized files so it's probably | |
18 | * not worth trying to address */ | |
19 | #include <stdlib.h> | |
20 | #include <assert.h> | |
21 | ||
22 | #include "zstd_edist.h" | |
23 | #include "mem.h" | |
24 | ||
25 | /*-************************************* | |
26 | * Constants | |
27 | ***************************************/ | |
28 | ||
29 | /* Just a sential for the entires of the diagnomal matrix */ | |
30 | #define ZSTD_EDIST_DIAG_MAX (S32)(1 << 30) | |
31 | ||
32 | /* How large should a snake be to be considered a 'big' snake. | |
33 | * For an explanation of what a 'snake' is with respect to the | |
34 | * edit distance matrix, see the linked paper in zstd_edist.h */ | |
35 | #define ZSTD_EDIST_SNAKE_THRESH 20 | |
36 | ||
37 | /* After how many iterations should we start to use the heuristic | |
38 | * based on 'big' snakes */ | |
39 | #define ZSTD_EDIST_SNAKE_ITER_THRESH 200 | |
40 | ||
41 | /* After how many iterations should be just give up and take | |
42 | * the best availabe edit script for this round */ | |
43 | #define ZSTD_EDIST_EXPENSIVE_THRESH 1024 | |
44 | ||
45 | /*-************************************* | |
46 | * Structures | |
47 | ***************************************/ | |
48 | ||
49 | typedef struct { | |
50 | U32 dictIdx; | |
51 | U32 srcIdx; | |
52 | U32 matchLength; | |
53 | } ZSTD_eDist_match; | |
54 | ||
55 | typedef struct { | |
56 | const BYTE* dict; | |
57 | const BYTE* src; | |
58 | size_t dictSize; | |
59 | size_t srcSize; | |
60 | S32* forwardDiag; /* Entires of the forward diagonal stored here */ | |
61 | S32* backwardDiag; /* Entires of the backward diagonal stored here. | |
62 | * Note: this buffer and the 'forwardDiag' buffer | |
63 | * are contiguous. See the ZSTD_eDist_genSequences */ | |
64 | ZSTD_eDist_match* matches; /* Accumulate matches of length 1 in this buffer. | |
65 | * In a subsequence post-processing step, we combine | |
66 | * contiguous matches. */ | |
67 | U32 nbMatches; | |
68 | } ZSTD_eDist_state; | |
69 | ||
70 | typedef struct { | |
71 | S32 dictMid; /* The mid diagonal for the dictionary */ | |
72 | S32 srcMid; /* The mid diagonal for the source */ | |
73 | int lowUseHeuristics; /* Should we use heuristics for the low part */ | |
74 | int highUseHeuristics; /* Should we use heuristics for the high part */ | |
75 | } ZSTD_eDist_partition; | |
76 | ||
77 | /*-************************************* | |
78 | * Internal | |
79 | ***************************************/ | |
80 | ||
81 | static void ZSTD_eDist_diag(ZSTD_eDist_state* state, | |
82 | ZSTD_eDist_partition* partition, | |
83 | S32 dictLow, S32 dictHigh, S32 srcLow, | |
84 | S32 srcHigh, int useHeuristics) | |
85 | { | |
86 | S32* const forwardDiag = state->forwardDiag; | |
87 | S32* const backwardDiag = state->backwardDiag; | |
88 | const BYTE* const dict = state->dict; | |
89 | const BYTE* const src = state->src; | |
90 | ||
91 | S32 const diagMin = dictLow - srcHigh; | |
92 | S32 const diagMax = dictHigh - srcLow; | |
93 | S32 const forwardMid = dictLow - srcLow; | |
94 | S32 const backwardMid = dictHigh - srcHigh; | |
95 | ||
96 | S32 forwardMin = forwardMid; | |
97 | S32 forwardMax = forwardMid; | |
98 | S32 backwardMin = backwardMid; | |
99 | S32 backwardMax = backwardMid; | |
100 | int odd = (forwardMid - backwardMid) & 1; | |
101 | U32 iterations; | |
102 | ||
103 | forwardDiag[forwardMid] = dictLow; | |
104 | backwardDiag[backwardMid] = dictHigh; | |
105 | ||
106 | /* Main loop for updating diag entries. Unless useHeuristics is | |
107 | * set to false, this loop will run until it finds the minimal | |
108 | * edit script */ | |
109 | for (iterations = 1;;iterations++) { | |
110 | S32 diag; | |
111 | int bigSnake = 0; | |
112 | ||
113 | if (forwardMin > diagMin) { | |
114 | forwardMin--; | |
115 | forwardDiag[forwardMin - 1] = -1; | |
116 | } else { | |
117 | forwardMin++; | |
118 | } | |
119 | ||
120 | if (forwardMax < diagMax) { | |
121 | forwardMax++; | |
122 | forwardDiag[forwardMax + 1] = -1; | |
123 | } else { | |
124 | forwardMax--; | |
125 | } | |
126 | ||
127 | for (diag = forwardMax; diag >= forwardMin; diag -= 2) { | |
128 | S32 dictIdx; | |
129 | S32 srcIdx; | |
130 | S32 low = forwardDiag[diag - 1]; | |
131 | S32 high = forwardDiag[diag + 1]; | |
132 | S32 dictIdx0 = low < high ? high : low + 1; | |
133 | ||
134 | for (dictIdx = dictIdx0, srcIdx = dictIdx0 - diag; | |
135 | dictIdx < dictHigh && srcIdx < srcHigh && dict[dictIdx] == src[srcIdx]; | |
136 | dictIdx++, srcIdx++) continue; | |
137 | ||
138 | if (dictIdx - dictIdx0 > ZSTD_EDIST_SNAKE_THRESH) | |
139 | bigSnake = 1; | |
140 | ||
141 | forwardDiag[diag] = dictIdx; | |
142 | ||
143 | if (odd && backwardMin <= diag && diag <= backwardMax && backwardDiag[diag] <= dictIdx) { | |
144 | partition->dictMid = dictIdx; | |
145 | partition->srcMid = srcIdx; | |
146 | partition->lowUseHeuristics = 0; | |
147 | partition->highUseHeuristics = 0; | |
148 | return; | |
149 | } | |
150 | } | |
151 | ||
152 | if (backwardMin > diagMin) { | |
153 | backwardMin--; | |
154 | backwardDiag[backwardMin - 1] = ZSTD_EDIST_DIAG_MAX; | |
155 | } else { | |
156 | backwardMin++; | |
157 | } | |
158 | ||
159 | if (backwardMax < diagMax) { | |
160 | backwardMax++; | |
161 | backwardDiag[backwardMax + 1] = ZSTD_EDIST_DIAG_MAX; | |
162 | } else { | |
163 | backwardMax--; | |
164 | } | |
165 | ||
166 | ||
167 | for (diag = backwardMax; diag >= backwardMin; diag -= 2) { | |
168 | S32 dictIdx; | |
169 | S32 srcIdx; | |
170 | S32 low = backwardDiag[diag - 1]; | |
171 | S32 high = backwardDiag[diag + 1]; | |
172 | S32 dictIdx0 = low < high ? low : high - 1; | |
173 | ||
174 | for (dictIdx = dictIdx0, srcIdx = dictIdx0 - diag; | |
175 | dictLow < dictIdx && srcLow < srcIdx && dict[dictIdx - 1] == src[srcIdx - 1]; | |
176 | dictIdx--, srcIdx--) continue; | |
177 | ||
178 | if (dictIdx0 - dictIdx > ZSTD_EDIST_SNAKE_THRESH) | |
179 | bigSnake = 1; | |
180 | ||
181 | backwardDiag[diag] = dictIdx; | |
182 | ||
183 | if (!odd && forwardMin <= diag && diag <= forwardMax && dictIdx <= forwardDiag[diag]) { | |
184 | partition->dictMid = dictIdx; | |
185 | partition->srcMid = srcIdx; | |
186 | partition->lowUseHeuristics = 0; | |
187 | partition->highUseHeuristics = 0; | |
188 | return; | |
189 | } | |
190 | } | |
191 | ||
192 | if (!useHeuristics) | |
193 | continue; | |
194 | ||
195 | /* Everything under this point is a heuritic. Using these will | |
196 | * substantially speed up the match finding. In some cases, taking | |
197 | * the total match finding time from several minutes to seconds. | |
198 | * Of course, the caveat is that the edit script found may no longer | |
199 | * be optimal */ | |
200 | ||
201 | /* Big snake heuristic */ | |
202 | if (iterations > ZSTD_EDIST_SNAKE_ITER_THRESH && bigSnake) { | |
203 | { | |
204 | S32 best = 0; | |
205 | ||
206 | for (diag = forwardMax; diag >= forwardMin; diag -= 2) { | |
207 | S32 diagDiag = diag - forwardMid; | |
208 | S32 dictIdx = forwardDiag[diag]; | |
209 | S32 srcIdx = dictIdx - diag; | |
210 | S32 v = (dictIdx - dictLow) * 2 - diagDiag; | |
211 | ||
212 | if (v > 12 * (iterations + (diagDiag < 0 ? -diagDiag : diagDiag))) { | |
213 | if (v > best | |
214 | && dictLow + ZSTD_EDIST_SNAKE_THRESH <= dictIdx && dictIdx <= dictHigh | |
215 | && srcLow + ZSTD_EDIST_SNAKE_THRESH <= srcIdx && srcIdx <= srcHigh) { | |
216 | S32 k; | |
217 | for (k = 1; dict[dictIdx - k] == src[srcIdx - k]; k++) { | |
218 | if (k == ZSTD_EDIST_SNAKE_THRESH) { | |
219 | best = v; | |
220 | partition->dictMid = dictIdx; | |
221 | partition->srcMid = srcIdx; | |
222 | break; | |
223 | } | |
224 | } | |
225 | } | |
226 | } | |
227 | } | |
228 | ||
229 | if (best > 0) { | |
230 | partition->lowUseHeuristics = 0; | |
231 | partition->highUseHeuristics = 1; | |
232 | return; | |
233 | } | |
234 | } | |
235 | ||
236 | { | |
237 | S32 best = 0; | |
238 | ||
239 | for (diag = backwardMax; diag >= backwardMin; diag -= 2) { | |
240 | S32 diagDiag = diag - backwardMid; | |
241 | S32 dictIdx = backwardDiag[diag]; | |
242 | S32 srcIdx = dictIdx - diag; | |
243 | S32 v = (dictHigh - dictIdx) * 2 + diagDiag; | |
244 | ||
245 | if (v > 12 * (iterations + (diagDiag < 0 ? -diagDiag : diagDiag))) { | |
246 | if (v > best | |
247 | && dictLow < dictIdx && dictIdx <= dictHigh - ZSTD_EDIST_SNAKE_THRESH | |
248 | && srcLow < srcIdx && srcIdx <= srcHigh - ZSTD_EDIST_SNAKE_THRESH) { | |
249 | int k; | |
250 | for (k = 0; dict[dictIdx + k] == src[srcIdx + k]; k++) { | |
251 | if (k == ZSTD_EDIST_SNAKE_THRESH - 1) { | |
252 | best = v; | |
253 | partition->dictMid = dictIdx; | |
254 | partition->srcMid = srcIdx; | |
255 | break; | |
256 | } | |
257 | } | |
258 | } | |
259 | } | |
260 | } | |
261 | ||
262 | if (best > 0) { | |
263 | partition->lowUseHeuristics = 1; | |
264 | partition->highUseHeuristics = 0; | |
265 | return; | |
266 | } | |
267 | } | |
268 | } | |
269 | ||
270 | /* More general 'too expensive' heuristic */ | |
271 | if (iterations >= ZSTD_EDIST_EXPENSIVE_THRESH) { | |
272 | S32 forwardDictSrcBest; | |
273 | S32 forwardDictBest = 0; | |
274 | S32 backwardDictSrcBest; | |
275 | S32 backwardDictBest = 0; | |
276 | ||
277 | forwardDictSrcBest = -1; | |
278 | for (diag = forwardMax; diag >= forwardMin; diag -= 2) { | |
279 | S32 dictIdx = MIN(forwardDiag[diag], dictHigh); | |
280 | S32 srcIdx = dictIdx - diag; | |
281 | ||
282 | if (srcHigh < srcIdx) { | |
283 | dictIdx = srcHigh + diag; | |
284 | srcIdx = srcHigh; | |
285 | } | |
286 | ||
287 | if (forwardDictSrcBest < dictIdx + srcIdx) { | |
288 | forwardDictSrcBest = dictIdx + srcIdx; | |
289 | forwardDictBest = dictIdx; | |
290 | } | |
291 | } | |
292 | ||
293 | backwardDictSrcBest = ZSTD_EDIST_DIAG_MAX; | |
294 | for (diag = backwardMax; diag >= backwardMin; diag -= 2) { | |
295 | S32 dictIdx = MAX(dictLow, backwardDiag[diag]); | |
296 | S32 srcIdx = dictIdx - diag; | |
297 | ||
298 | if (srcIdx < srcLow) { | |
299 | dictIdx = srcLow + diag; | |
300 | srcIdx = srcLow; | |
301 | } | |
302 | ||
303 | if (dictIdx + srcIdx < backwardDictSrcBest) { | |
304 | backwardDictSrcBest = dictIdx + srcIdx; | |
305 | backwardDictBest = dictIdx; | |
306 | } | |
307 | } | |
308 | ||
309 | if ((dictHigh + srcHigh) - backwardDictSrcBest < forwardDictSrcBest - (dictLow + srcLow)) { | |
310 | partition->dictMid = forwardDictBest; | |
311 | partition->srcMid = forwardDictSrcBest - forwardDictBest; | |
312 | partition->lowUseHeuristics = 0; | |
313 | partition->highUseHeuristics = 1; | |
314 | } else { | |
315 | partition->dictMid = backwardDictBest; | |
316 | partition->srcMid = backwardDictSrcBest - backwardDictBest; | |
317 | partition->lowUseHeuristics = 1; | |
318 | partition->highUseHeuristics = 0; | |
319 | } | |
320 | return; | |
321 | } | |
322 | } | |
323 | } | |
324 | ||
325 | static void ZSTD_eDist_insertMatch(ZSTD_eDist_state* state, | |
326 | S32 const dictIdx, S32 const srcIdx) | |
327 | { | |
328 | state->matches[state->nbMatches].dictIdx = dictIdx; | |
329 | state->matches[state->nbMatches].srcIdx = srcIdx; | |
330 | state->matches[state->nbMatches].matchLength = 1; | |
331 | state->nbMatches++; | |
332 | } | |
333 | ||
334 | static int ZSTD_eDist_compare(ZSTD_eDist_state* state, | |
335 | S32 dictLow, S32 dictHigh, S32 srcLow, | |
336 | S32 srcHigh, int useHeuristics) | |
337 | { | |
338 | const BYTE* const dict = state->dict; | |
339 | const BYTE* const src = state->src; | |
340 | ||
341 | /* Found matches while traversing from the low end */ | |
342 | while (dictLow < dictHigh && srcLow < srcHigh && dict[dictLow] == src[srcLow]) { | |
343 | ZSTD_eDist_insertMatch(state, dictLow, srcLow); | |
344 | dictLow++; | |
345 | srcLow++; | |
346 | } | |
347 | ||
348 | /* Found matches while traversing from the high end */ | |
349 | while (dictLow < dictHigh && srcLow < srcHigh && dict[dictHigh - 1] == src[srcHigh - 1]) { | |
350 | ZSTD_eDist_insertMatch(state, dictHigh - 1, srcHigh - 1); | |
351 | dictHigh--; | |
352 | srcHigh--; | |
353 | } | |
354 | ||
355 | /* If the low and high end end up touching. If we wanted to make | |
356 | * note of the differences like most diffing algorithms do, we would | |
357 | * do so here. In our case, we're only concerned with matches | |
358 | * Note: if you wanted to find the edit distance of the algorithm, | |
359 | * you could just accumulate the cost for an insertion/deletion | |
360 | * below. */ | |
361 | if (dictLow == dictHigh) { | |
362 | while (srcLow < srcHigh) { | |
363 | /* Reaching this point means inserting src[srcLow] into | |
364 | * the current position of dict */ | |
365 | srcLow++; | |
366 | } | |
367 | } else if (srcLow == srcHigh) { | |
368 | while (dictLow < dictHigh) { | |
369 | /* Reaching this point means deleteing dict[dictLow] from | |
370 | * the current positino of dict */ | |
371 | dictLow++; | |
372 | } | |
373 | } else { | |
374 | ZSTD_eDist_partition partition; | |
375 | partition.dictMid = 0; | |
376 | partition.srcMid = 0; | |
377 | ZSTD_eDist_diag(state, &partition, dictLow, dictHigh, | |
378 | srcLow, srcHigh, useHeuristics); | |
379 | if (ZSTD_eDist_compare(state, dictLow, partition.dictMid, | |
380 | srcLow, partition.srcMid, partition.lowUseHeuristics)) | |
381 | return 1; | |
382 | if (ZSTD_eDist_compare(state, partition.dictMid, dictHigh, | |
383 | partition.srcMid, srcHigh, partition.highUseHeuristics)) | |
384 | return 1; | |
385 | } | |
386 | ||
387 | return 0; | |
388 | } | |
389 | ||
390 | static int ZSTD_eDist_matchComp(const void* p, const void* q) | |
391 | { | |
392 | S32 const l = ((ZSTD_eDist_match*)p)->srcIdx; | |
393 | S32 const r = ((ZSTD_eDist_match*)q)->srcIdx; | |
394 | return (l - r); | |
395 | } | |
396 | ||
397 | /* The matches from the approach above will all be of the form | |
398 | * (dictIdx, srcIdx, 1). this method combines contiguous matches | |
399 | * of length MINMATCH or greater. Matches less than MINMATCH | |
400 | * are discarded */ | |
401 | static void ZSTD_eDist_combineMatches(ZSTD_eDist_state* state) | |
402 | { | |
403 | /* Create a new buffer to put the combined matches into | |
404 | * and memcpy to state->matches after */ | |
405 | ZSTD_eDist_match* combinedMatches = | |
406 | ZSTD_malloc(state->nbMatches * sizeof(ZSTD_eDist_match), | |
407 | ZSTD_defaultCMem); | |
408 | ||
409 | U32 nbCombinedMatches = 1; | |
410 | size_t i; | |
411 | ||
412 | /* Make sure that the srcIdx and dictIdx are in sorted order. | |
413 | * The combination step won't work otherwise */ | |
414 | qsort(state->matches, state->nbMatches, sizeof(ZSTD_eDist_match), ZSTD_eDist_matchComp); | |
415 | ||
416 | memcpy(combinedMatches, state->matches, sizeof(ZSTD_eDist_match)); | |
417 | for (i = 1; i < state->nbMatches; i++) { | |
418 | ZSTD_eDist_match const match = state->matches[i]; | |
419 | ZSTD_eDist_match const combinedMatch = | |
420 | combinedMatches[nbCombinedMatches - 1]; | |
421 | if (combinedMatch.srcIdx + combinedMatch.matchLength == match.srcIdx && | |
422 | combinedMatch.dictIdx + combinedMatch.matchLength == match.dictIdx) { | |
423 | combinedMatches[nbCombinedMatches - 1].matchLength++; | |
424 | } else { | |
425 | /* Discard matches that are less than MINMATCH */ | |
426 | if (combinedMatches[nbCombinedMatches - 1].matchLength < MINMATCH) { | |
427 | nbCombinedMatches--; | |
428 | } | |
429 | ||
430 | memcpy(combinedMatches + nbCombinedMatches, | |
431 | state->matches + i, sizeof(ZSTD_eDist_match)); | |
432 | nbCombinedMatches++; | |
433 | } | |
434 | } | |
435 | memcpy(state->matches, combinedMatches, nbCombinedMatches * sizeof(ZSTD_eDist_match)); | |
436 | state->nbMatches = nbCombinedMatches; | |
437 | ZSTD_free(combinedMatches, ZSTD_defaultCMem); | |
438 | } | |
439 | ||
440 | static size_t ZSTD_eDist_convertMatchesToSequences(ZSTD_Sequence* sequences, | |
441 | ZSTD_eDist_state* state) | |
442 | { | |
443 | const ZSTD_eDist_match* matches = state->matches; | |
444 | size_t const nbMatches = state->nbMatches; | |
445 | size_t const dictSize = state->dictSize; | |
446 | size_t nbSequences = 0; | |
447 | size_t i; | |
448 | for (i = 0; i < nbMatches; i++) { | |
449 | ZSTD_eDist_match const match = matches[i]; | |
450 | U32 const litLength = !i ? match.srcIdx : | |
451 | match.srcIdx - (matches[i - 1].srcIdx + matches[i - 1].matchLength); | |
452 | U32 const offset = (match.srcIdx + dictSize) - match.dictIdx; | |
453 | U32 const matchLength = match.matchLength; | |
454 | sequences[nbSequences].offset = offset; | |
455 | sequences[nbSequences].litLength = litLength; | |
456 | sequences[nbSequences].matchLength = matchLength; | |
457 | nbSequences++; | |
458 | } | |
459 | return nbSequences; | |
460 | } | |
461 | ||
462 | /*-************************************* | |
463 | * Interal utils | |
464 | ***************************************/ | |
465 | ||
466 | static size_t ZSTD_eDist_hamingDist(const BYTE* const a, | |
467 | const BYTE* const b, size_t n) | |
468 | { | |
469 | size_t i; | |
470 | size_t dist = 0; | |
471 | for (i = 0; i < n; i++) | |
472 | dist += a[i] != b[i]; | |
473 | return dist; | |
474 | } | |
475 | ||
476 | /* This is a pretty naive recursive implementation that should only | |
477 | * be used for quick tests obviously. Don't try and run this on a | |
478 | * GB file or something. There are faster implementations. Use those | |
479 | * if you need to run it for large files. */ | |
480 | static size_t ZSTD_eDist_levenshteinDist(const BYTE* const s, | |
481 | size_t const sn, const BYTE* const t, | |
482 | size_t const tn) | |
483 | { | |
484 | size_t a, b, c; | |
485 | ||
486 | if (!sn) | |
487 | return tn; | |
488 | if (!tn) | |
489 | return sn; | |
490 | ||
491 | if (s[sn - 1] == t[tn - 1]) | |
492 | return ZSTD_eDist_levenshteinDist( | |
493 | s, sn - 1, t, tn - 1); | |
494 | ||
495 | a = ZSTD_eDist_levenshteinDist(s, sn - 1, t, tn - 1); | |
496 | b = ZSTD_eDist_levenshteinDist(s, sn, t, tn - 1); | |
497 | c = ZSTD_eDist_levenshteinDist(s, sn - 1, t, tn); | |
498 | ||
499 | if (a > b) | |
500 | a = b; | |
501 | if (a > c) | |
502 | a = c; | |
503 | ||
504 | return a + 1; | |
505 | } | |
506 | ||
507 | static void ZSTD_eDist_validateMatches(ZSTD_eDist_match* matches, | |
508 | size_t const nbMatches, const BYTE* const dict, | |
509 | size_t const dictSize, const BYTE* const src, | |
510 | size_t const srcSize) | |
511 | { | |
512 | size_t i; | |
513 | for (i = 0; i < nbMatches; i++) { | |
514 | ZSTD_eDist_match match = matches[i]; | |
515 | U32 const dictIdx = match.dictIdx; | |
516 | U32 const srcIdx = match.srcIdx; | |
517 | U32 const matchLength = match.matchLength; | |
518 | ||
519 | assert(dictIdx + matchLength < dictSize); | |
520 | assert(srcIdx + matchLength < srcSize); | |
521 | assert(!memcmp(dict + dictIdx, src + srcIdx, matchLength)); | |
522 | } | |
523 | } | |
524 | ||
525 | /*-************************************* | |
526 | * API | |
527 | ***************************************/ | |
528 | ||
529 | size_t ZSTD_eDist_genSequences(ZSTD_Sequence* sequences, | |
530 | const void* dict, size_t dictSize, | |
531 | const void* src, size_t srcSize, | |
532 | int useHeuristics) | |
533 | { | |
534 | size_t const nbDiags = dictSize + srcSize + 3; | |
535 | S32* buffer = ZSTD_malloc(nbDiags * 2 * sizeof(S32), ZSTD_defaultCMem); | |
536 | ZSTD_eDist_state state; | |
537 | size_t nbSequences = 0; | |
538 | ||
539 | state.dict = (const BYTE*)dict; | |
540 | state.src = (const BYTE*)src; | |
541 | state.dictSize = dictSize; | |
542 | state.srcSize = srcSize; | |
543 | state.forwardDiag = buffer; | |
544 | state.backwardDiag = buffer + nbDiags; | |
545 | state.forwardDiag += srcSize + 1; | |
546 | state.backwardDiag += srcSize + 1; | |
547 | state.matches = ZSTD_malloc(srcSize * sizeof(ZSTD_eDist_match), ZSTD_defaultCMem); | |
548 | state.nbMatches = 0; | |
549 | ||
550 | ZSTD_eDist_compare(&state, 0, dictSize, 0, srcSize, 1); | |
551 | ZSTD_eDist_combineMatches(&state); | |
552 | nbSequences = ZSTD_eDist_convertMatchesToSequences(sequences, &state); | |
553 | ||
554 | ZSTD_free(buffer, ZSTD_defaultCMem); | |
555 | ZSTD_free(state.matches, ZSTD_defaultCMem); | |
556 | ||
557 | return nbSequences; | |
558 | } |