2 * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * in the COPYING file in the root directory of this source tree).
8 * You may select, at your option, one of the above-listed licenses.
11 #include "zstd_compress_internal.h"
12 #include "zstd_lazy.h"
15 /*-*************************************
17 ***************************************/
20 ZSTD_updateDUBT(ZSTD_matchState_t
* ms
,
21 const BYTE
* ip
, const BYTE
* iend
,
24 const ZSTD_compressionParameters
* const cParams
= &ms
->cParams
;
25 U32
* const hashTable
= ms
->hashTable
;
26 U32
const hashLog
= cParams
->hashLog
;
28 U32
* const bt
= ms
->chainTable
;
29 U32
const btLog
= cParams
->chainLog
- 1;
30 U32
const btMask
= (1 << btLog
) - 1;
32 const BYTE
* const base
= ms
->window
.base
;
33 U32
const target
= (U32
)(ip
- base
);
34 U32 idx
= ms
->nextToUpdate
;
37 DEBUGLOG(7, "ZSTD_updateDUBT, from %u to %u (dictLimit:%u)",
38 idx
, target
, ms
->window
.dictLimit
);
39 assert(ip
+ 8 <= iend
); /* condition for ZSTD_hashPtr */
42 assert(idx
>= ms
->window
.dictLimit
); /* condition for valid base+idx */
43 for ( ; idx
< target
; idx
++) {
44 size_t const h
= ZSTD_hashPtr(base
+ idx
, hashLog
, mls
); /* assumption : ip + 8 <= iend */
45 U32
const matchIndex
= hashTable
[h
];
47 U32
* const nextCandidatePtr
= bt
+ 2*(idx
&btMask
);
48 U32
* const sortMarkPtr
= nextCandidatePtr
+ 1;
50 DEBUGLOG(8, "ZSTD_updateDUBT: insert %u", idx
);
51 hashTable
[h
] = idx
; /* Update Hash Table */
52 *nextCandidatePtr
= matchIndex
; /* update BT like a chain */
53 *sortMarkPtr
= ZSTD_DUBT_UNSORTED_MARK
;
55 ms
->nextToUpdate
= target
;
59 /** ZSTD_insertDUBT1() :
60 * sort one already inserted but unsorted position
61 * assumption : current >= btlow == (current - btmask)
64 ZSTD_insertDUBT1(ZSTD_matchState_t
* ms
,
65 U32 current
, const BYTE
* inputEnd
,
66 U32 nbCompares
, U32 btLow
,
67 const ZSTD_dictMode_e dictMode
)
69 const ZSTD_compressionParameters
* const cParams
= &ms
->cParams
;
70 U32
* const bt
= ms
->chainTable
;
71 U32
const btLog
= cParams
->chainLog
- 1;
72 U32
const btMask
= (1 << btLog
) - 1;
73 size_t commonLengthSmaller
=0, commonLengthLarger
=0;
74 const BYTE
* const base
= ms
->window
.base
;
75 const BYTE
* const dictBase
= ms
->window
.dictBase
;
76 const U32 dictLimit
= ms
->window
.dictLimit
;
77 const BYTE
* const ip
= (current
>=dictLimit
) ? base
+ current
: dictBase
+ current
;
78 const BYTE
* const iend
= (current
>=dictLimit
) ? inputEnd
: dictBase
+ dictLimit
;
79 const BYTE
* const dictEnd
= dictBase
+ dictLimit
;
80 const BYTE
* const prefixStart
= base
+ dictLimit
;
82 U32
* smallerPtr
= bt
+ 2*(current
&btMask
);
83 U32
* largerPtr
= smallerPtr
+ 1;
84 U32 matchIndex
= *smallerPtr
; /* this candidate is unsorted : next sorted candidate is reached through *smallerPtr, while *largerPtr contains previous unsorted candidate (which is already saved and can be overwritten) */
85 U32 dummy32
; /* to be nullified at the end */
86 U32
const windowValid
= ms
->window
.lowLimit
;
87 U32
const maxDistance
= 1U << cParams
->windowLog
;
88 U32
const windowLow
= (current
- windowValid
> maxDistance
) ? current
- maxDistance
: windowValid
;
91 DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)",
92 current
, dictLimit
, windowLow
);
93 assert(current
>= btLow
);
94 assert(ip
< iend
); /* condition for ZSTD_count */
96 while (nbCompares
-- && (matchIndex
> windowLow
)) {
97 U32
* const nextPtr
= bt
+ 2*(matchIndex
& btMask
);
98 size_t matchLength
= MIN(commonLengthSmaller
, commonLengthLarger
); /* guaranteed minimum nb of common bytes */
99 assert(matchIndex
< current
);
100 /* note : all candidates are now supposed sorted,
101 * but it's still possible to have nextPtr[1] == ZSTD_DUBT_UNSORTED_MARK
102 * when a real index has the same value as ZSTD_DUBT_UNSORTED_MARK */
104 if ( (dictMode
!= ZSTD_extDict
)
105 || (matchIndex
+matchLength
>= dictLimit
) /* both in current segment*/
106 || (current
< dictLimit
) /* both in extDict */) {
107 const BYTE
* const mBase
= ( (dictMode
!= ZSTD_extDict
)
108 || (matchIndex
+matchLength
>= dictLimit
)) ?
110 assert( (matchIndex
+matchLength
>= dictLimit
) /* might be wrong if extDict is incorrectly set to 0 */
111 || (current
< dictLimit
) );
112 match
= mBase
+ matchIndex
;
113 matchLength
+= ZSTD_count(ip
+matchLength
, match
+matchLength
, iend
);
115 match
= dictBase
+ matchIndex
;
116 matchLength
+= ZSTD_count_2segments(ip
+matchLength
, match
+matchLength
, iend
, dictEnd
, prefixStart
);
117 if (matchIndex
+matchLength
>= dictLimit
)
118 match
= base
+ matchIndex
; /* preparation for next read of match[matchLength] */
121 DEBUGLOG(8, "ZSTD_insertDUBT1: comparing %u with %u : found %u common bytes ",
122 current
, matchIndex
, (U32
)matchLength
);
124 if (ip
+matchLength
== iend
) { /* equal : no way to know if inf or sup */
125 break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
128 if (match
[matchLength
] < ip
[matchLength
]) { /* necessarily within buffer */
129 /* match is smaller than current */
130 *smallerPtr
= matchIndex
; /* update smaller idx */
131 commonLengthSmaller
= matchLength
; /* all smaller will now have at least this guaranteed common length */
132 if (matchIndex
<= btLow
) { smallerPtr
=&dummy32
; break; } /* beyond tree size, stop searching */
133 DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is smaller : next => %u",
134 matchIndex
, btLow
, nextPtr
[1]);
135 smallerPtr
= nextPtr
+1; /* new "candidate" => larger than match, which was smaller than target */
136 matchIndex
= nextPtr
[1]; /* new matchIndex, larger than previous and closer to current */
138 /* match is larger than current */
139 *largerPtr
= matchIndex
;
140 commonLengthLarger
= matchLength
;
141 if (matchIndex
<= btLow
) { largerPtr
=&dummy32
; break; } /* beyond tree size, stop searching */
142 DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is larger => %u",
143 matchIndex
, btLow
, nextPtr
[0]);
145 matchIndex
= nextPtr
[0];
148 *smallerPtr
= *largerPtr
= 0;
153 ZSTD_DUBT_findBetterDictMatch (
154 ZSTD_matchState_t
* ms
,
155 const BYTE
* const ip
, const BYTE
* const iend
,
160 const ZSTD_dictMode_e dictMode
)
162 const ZSTD_matchState_t
* const dms
= ms
->dictMatchState
;
163 const ZSTD_compressionParameters
* const dmsCParams
= &dms
->cParams
;
164 const U32
* const dictHashTable
= dms
->hashTable
;
165 U32
const hashLog
= dmsCParams
->hashLog
;
166 size_t const h
= ZSTD_hashPtr(ip
, hashLog
, mls
);
167 U32 dictMatchIndex
= dictHashTable
[h
];
169 const BYTE
* const base
= ms
->window
.base
;
170 const BYTE
* const prefixStart
= base
+ ms
->window
.dictLimit
;
171 U32
const current
= (U32
)(ip
-base
);
172 const BYTE
* const dictBase
= dms
->window
.base
;
173 const BYTE
* const dictEnd
= dms
->window
.nextSrc
;
174 U32
const dictHighLimit
= (U32
)(dms
->window
.nextSrc
- dms
->window
.base
);
175 U32
const dictLowLimit
= dms
->window
.lowLimit
;
176 U32
const dictIndexDelta
= ms
->window
.lowLimit
- dictHighLimit
;
178 U32
* const dictBt
= dms
->chainTable
;
179 U32
const btLog
= dmsCParams
->chainLog
- 1;
180 U32
const btMask
= (1 << btLog
) - 1;
181 U32
const btLow
= (btMask
>= dictHighLimit
- dictLowLimit
) ? dictLowLimit
: dictHighLimit
- btMask
;
183 size_t commonLengthSmaller
=0, commonLengthLarger
=0;
186 assert(dictMode
== ZSTD_dictMatchState
);
188 while (nbCompares
-- && (dictMatchIndex
> dictLowLimit
)) {
189 U32
* const nextPtr
= dictBt
+ 2*(dictMatchIndex
& btMask
);
190 size_t matchLength
= MIN(commonLengthSmaller
, commonLengthLarger
); /* guaranteed minimum nb of common bytes */
191 const BYTE
* match
= dictBase
+ dictMatchIndex
;
192 matchLength
+= ZSTD_count_2segments(ip
+matchLength
, match
+matchLength
, iend
, dictEnd
, prefixStart
);
193 if (dictMatchIndex
+matchLength
>= dictHighLimit
)
194 match
= base
+ dictMatchIndex
+ dictIndexDelta
; /* to prepare for next usage of match[matchLength] */
196 if (matchLength
> bestLength
) {
197 U32 matchIndex
= dictMatchIndex
+ dictIndexDelta
;
198 if ( (4*(int)(matchLength
-bestLength
)) > (int)(ZSTD_highbit32(current
-matchIndex
+1) - ZSTD_highbit32((U32
)offsetPtr
[0]+1)) ) {
199 DEBUGLOG(9, "ZSTD_DUBT_findBetterDictMatch(%u) : found better match length %u -> %u and offsetCode %u -> %u (dictMatchIndex %u, matchIndex %u)",
200 current
, (U32
)bestLength
, (U32
)matchLength
, (U32
)*offsetPtr
, ZSTD_REP_MOVE
+ current
- matchIndex
, dictMatchIndex
, matchIndex
);
201 bestLength
= matchLength
, *offsetPtr
= ZSTD_REP_MOVE
+ current
- matchIndex
;
203 if (ip
+matchLength
== iend
) { /* reached end of input : ip[matchLength] is not valid, no way to know if it's larger or smaller than match */
204 break; /* drop, to guarantee consistency (miss a little bit of compression) */
208 if (match
[matchLength
] < ip
[matchLength
]) {
209 if (dictMatchIndex
<= btLow
) { break; } /* beyond tree size, stop the search */
210 commonLengthSmaller
= matchLength
; /* all smaller will now have at least this guaranteed common length */
211 dictMatchIndex
= nextPtr
[1]; /* new matchIndex larger than previous (closer to current) */
213 /* match is larger than current */
214 if (dictMatchIndex
<= btLow
) { break; } /* beyond tree size, stop the search */
215 commonLengthLarger
= matchLength
;
216 dictMatchIndex
= nextPtr
[0];
220 if (bestLength
>= MINMATCH
) {
221 U32
const mIndex
= current
- ((U32
)*offsetPtr
- ZSTD_REP_MOVE
); (void)mIndex
;
222 DEBUGLOG(8, "ZSTD_DUBT_findBetterDictMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
223 current
, (U32
)bestLength
, (U32
)*offsetPtr
, mIndex
);
231 ZSTD_DUBT_findBestMatch(ZSTD_matchState_t
* ms
,
232 const BYTE
* const ip
, const BYTE
* const iend
,
235 const ZSTD_dictMode_e dictMode
)
237 const ZSTD_compressionParameters
* const cParams
= &ms
->cParams
;
238 U32
* const hashTable
= ms
->hashTable
;
239 U32
const hashLog
= cParams
->hashLog
;
240 size_t const h
= ZSTD_hashPtr(ip
, hashLog
, mls
);
241 U32 matchIndex
= hashTable
[h
];
243 const BYTE
* const base
= ms
->window
.base
;
244 U32
const current
= (U32
)(ip
-base
);
245 U32
const windowLow
= ZSTD_getLowestMatchIndex(ms
, current
, cParams
->windowLog
);
247 U32
* const bt
= ms
->chainTable
;
248 U32
const btLog
= cParams
->chainLog
- 1;
249 U32
const btMask
= (1 << btLog
) - 1;
250 U32
const btLow
= (btMask
>= current
) ? 0 : current
- btMask
;
251 U32
const unsortLimit
= MAX(btLow
, windowLow
);
253 U32
* nextCandidate
= bt
+ 2*(matchIndex
&btMask
);
254 U32
* unsortedMark
= bt
+ 2*(matchIndex
&btMask
) + 1;
255 U32 nbCompares
= 1U << cParams
->searchLog
;
256 U32 nbCandidates
= nbCompares
;
257 U32 previousCandidate
= 0;
259 DEBUGLOG(7, "ZSTD_DUBT_findBestMatch (%u) ", current
);
260 assert(ip
<= iend
-8); /* required for h calculation */
262 /* reach end of unsorted candidates list */
263 while ( (matchIndex
> unsortLimit
)
264 && (*unsortedMark
== ZSTD_DUBT_UNSORTED_MARK
)
265 && (nbCandidates
> 1) ) {
266 DEBUGLOG(8, "ZSTD_DUBT_findBestMatch: candidate %u is unsorted",
268 *unsortedMark
= previousCandidate
; /* the unsortedMark becomes a reversed chain, to move up back to original position */
269 previousCandidate
= matchIndex
;
270 matchIndex
= *nextCandidate
;
271 nextCandidate
= bt
+ 2*(matchIndex
&btMask
);
272 unsortedMark
= bt
+ 2*(matchIndex
&btMask
) + 1;
276 /* nullify last candidate if it's still unsorted
277 * simplification, detrimental to compression ratio, beneficial for speed */
278 if ( (matchIndex
> unsortLimit
)
279 && (*unsortedMark
==ZSTD_DUBT_UNSORTED_MARK
) ) {
280 DEBUGLOG(7, "ZSTD_DUBT_findBestMatch: nullify last unsorted candidate %u",
282 *nextCandidate
= *unsortedMark
= 0;
285 /* batch sort stacked candidates */
286 matchIndex
= previousCandidate
;
287 while (matchIndex
) { /* will end on matchIndex == 0 */
288 U32
* const nextCandidateIdxPtr
= bt
+ 2*(matchIndex
&btMask
) + 1;
289 U32
const nextCandidateIdx
= *nextCandidateIdxPtr
;
290 ZSTD_insertDUBT1(ms
, matchIndex
, iend
,
291 nbCandidates
, unsortLimit
, dictMode
);
292 matchIndex
= nextCandidateIdx
;
296 /* find longest match */
297 { size_t commonLengthSmaller
= 0, commonLengthLarger
= 0;
298 const BYTE
* const dictBase
= ms
->window
.dictBase
;
299 const U32 dictLimit
= ms
->window
.dictLimit
;
300 const BYTE
* const dictEnd
= dictBase
+ dictLimit
;
301 const BYTE
* const prefixStart
= base
+ dictLimit
;
302 U32
* smallerPtr
= bt
+ 2*(current
&btMask
);
303 U32
* largerPtr
= bt
+ 2*(current
&btMask
) + 1;
304 U32 matchEndIdx
= current
+ 8 + 1;
305 U32 dummy32
; /* to be nullified at the end */
306 size_t bestLength
= 0;
308 matchIndex
= hashTable
[h
];
309 hashTable
[h
] = current
; /* Update Hash Table */
311 while (nbCompares
-- && (matchIndex
> windowLow
)) {
312 U32
* const nextPtr
= bt
+ 2*(matchIndex
& btMask
);
313 size_t matchLength
= MIN(commonLengthSmaller
, commonLengthLarger
); /* guaranteed minimum nb of common bytes */
316 if ((dictMode
!= ZSTD_extDict
) || (matchIndex
+matchLength
>= dictLimit
)) {
317 match
= base
+ matchIndex
;
318 matchLength
+= ZSTD_count(ip
+matchLength
, match
+matchLength
, iend
);
320 match
= dictBase
+ matchIndex
;
321 matchLength
+= ZSTD_count_2segments(ip
+matchLength
, match
+matchLength
, iend
, dictEnd
, prefixStart
);
322 if (matchIndex
+matchLength
>= dictLimit
)
323 match
= base
+ matchIndex
; /* to prepare for next usage of match[matchLength] */
326 if (matchLength
> bestLength
) {
327 if (matchLength
> matchEndIdx
- matchIndex
)
328 matchEndIdx
= matchIndex
+ (U32
)matchLength
;
329 if ( (4*(int)(matchLength
-bestLength
)) > (int)(ZSTD_highbit32(current
-matchIndex
+1) - ZSTD_highbit32((U32
)offsetPtr
[0]+1)) )
330 bestLength
= matchLength
, *offsetPtr
= ZSTD_REP_MOVE
+ current
- matchIndex
;
331 if (ip
+matchLength
== iend
) { /* equal : no way to know if inf or sup */
332 if (dictMode
== ZSTD_dictMatchState
) {
333 nbCompares
= 0; /* in addition to avoiding checking any
334 * further in this loop, make sure we
335 * skip checking in the dictionary. */
337 break; /* drop, to guarantee consistency (miss a little bit of compression) */
341 if (match
[matchLength
] < ip
[matchLength
]) {
342 /* match is smaller than current */
343 *smallerPtr
= matchIndex
; /* update smaller idx */
344 commonLengthSmaller
= matchLength
; /* all smaller will now have at least this guaranteed common length */
345 if (matchIndex
<= btLow
) { smallerPtr
=&dummy32
; break; } /* beyond tree size, stop the search */
346 smallerPtr
= nextPtr
+1; /* new "smaller" => larger of match */
347 matchIndex
= nextPtr
[1]; /* new matchIndex larger than previous (closer to current) */
349 /* match is larger than current */
350 *largerPtr
= matchIndex
;
351 commonLengthLarger
= matchLength
;
352 if (matchIndex
<= btLow
) { largerPtr
=&dummy32
; break; } /* beyond tree size, stop the search */
354 matchIndex
= nextPtr
[0];
357 *smallerPtr
= *largerPtr
= 0;
359 if (dictMode
== ZSTD_dictMatchState
&& nbCompares
) {
360 bestLength
= ZSTD_DUBT_findBetterDictMatch(
362 offsetPtr
, bestLength
, nbCompares
,
366 assert(matchEndIdx
> current
+8); /* ensure nextToUpdate is increased */
367 ms
->nextToUpdate
= matchEndIdx
- 8; /* skip repetitive patterns */
368 if (bestLength
>= MINMATCH
) {
369 U32
const mIndex
= current
- ((U32
)*offsetPtr
- ZSTD_REP_MOVE
); (void)mIndex
;
370 DEBUGLOG(8, "ZSTD_DUBT_findBestMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
371 current
, (U32
)bestLength
, (U32
)*offsetPtr
, mIndex
);
378 /** ZSTD_BtFindBestMatch() : Tree updater, providing best match */
379 FORCE_INLINE_TEMPLATE
size_t
380 ZSTD_BtFindBestMatch( ZSTD_matchState_t
* ms
,
381 const BYTE
* const ip
, const BYTE
* const iLimit
,
383 const U32 mls
/* template */,
384 const ZSTD_dictMode_e dictMode
)
386 DEBUGLOG(7, "ZSTD_BtFindBestMatch");
387 if (ip
< ms
->window
.base
+ ms
->nextToUpdate
) return 0; /* skipped area */
388 ZSTD_updateDUBT(ms
, ip
, iLimit
, mls
);
389 return ZSTD_DUBT_findBestMatch(ms
, ip
, iLimit
, offsetPtr
, mls
, dictMode
);
394 ZSTD_BtFindBestMatch_selectMLS ( ZSTD_matchState_t
* ms
,
395 const BYTE
* ip
, const BYTE
* const iLimit
,
398 switch(ms
->cParams
.minMatch
)
400 default : /* includes case 3 */
401 case 4 : return ZSTD_BtFindBestMatch(ms
, ip
, iLimit
, offsetPtr
, 4, ZSTD_noDict
);
402 case 5 : return ZSTD_BtFindBestMatch(ms
, ip
, iLimit
, offsetPtr
, 5, ZSTD_noDict
);
404 case 6 : return ZSTD_BtFindBestMatch(ms
, ip
, iLimit
, offsetPtr
, 6, ZSTD_noDict
);
409 static size_t ZSTD_BtFindBestMatch_dictMatchState_selectMLS (
410 ZSTD_matchState_t
* ms
,
411 const BYTE
* ip
, const BYTE
* const iLimit
,
414 switch(ms
->cParams
.minMatch
)
416 default : /* includes case 3 */
417 case 4 : return ZSTD_BtFindBestMatch(ms
, ip
, iLimit
, offsetPtr
, 4, ZSTD_dictMatchState
);
418 case 5 : return ZSTD_BtFindBestMatch(ms
, ip
, iLimit
, offsetPtr
, 5, ZSTD_dictMatchState
);
420 case 6 : return ZSTD_BtFindBestMatch(ms
, ip
, iLimit
, offsetPtr
, 6, ZSTD_dictMatchState
);
425 static size_t ZSTD_BtFindBestMatch_extDict_selectMLS (
426 ZSTD_matchState_t
* ms
,
427 const BYTE
* ip
, const BYTE
* const iLimit
,
430 switch(ms
->cParams
.minMatch
)
432 default : /* includes case 3 */
433 case 4 : return ZSTD_BtFindBestMatch(ms
, ip
, iLimit
, offsetPtr
, 4, ZSTD_extDict
);
434 case 5 : return ZSTD_BtFindBestMatch(ms
, ip
, iLimit
, offsetPtr
, 5, ZSTD_extDict
);
436 case 6 : return ZSTD_BtFindBestMatch(ms
, ip
, iLimit
, offsetPtr
, 6, ZSTD_extDict
);
442 /* *********************************
444 ***********************************/
445 #define NEXT_IN_CHAIN(d, mask) chainTable[(d) & (mask)]
447 /* Update chains up to ip (excluded)
448 Assumption : always within prefix (i.e. not within extDict) */
449 static U32
ZSTD_insertAndFindFirstIndex_internal(
450 ZSTD_matchState_t
* ms
,
451 const ZSTD_compressionParameters
* const cParams
,
452 const BYTE
* ip
, U32
const mls
)
454 U32
* const hashTable
= ms
->hashTable
;
455 const U32 hashLog
= cParams
->hashLog
;
456 U32
* const chainTable
= ms
->chainTable
;
457 const U32 chainMask
= (1 << cParams
->chainLog
) - 1;
458 const BYTE
* const base
= ms
->window
.base
;
459 const U32 target
= (U32
)(ip
- base
);
460 U32 idx
= ms
->nextToUpdate
;
462 while(idx
< target
) { /* catch up */
463 size_t const h
= ZSTD_hashPtr(base
+idx
, hashLog
, mls
);
464 NEXT_IN_CHAIN(idx
, chainMask
) = hashTable
[h
];
469 ms
->nextToUpdate
= target
;
470 return hashTable
[ZSTD_hashPtr(ip
, hashLog
, mls
)];
473 U32
ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t
* ms
, const BYTE
* ip
) {
474 const ZSTD_compressionParameters
* const cParams
= &ms
->cParams
;
475 return ZSTD_insertAndFindFirstIndex_internal(ms
, cParams
, ip
, ms
->cParams
.minMatch
);
479 /* inlining is important to hardwire a hot branch (template emulation) */
480 FORCE_INLINE_TEMPLATE
481 size_t ZSTD_HcFindBestMatch_generic (
482 ZSTD_matchState_t
* ms
,
483 const BYTE
* const ip
, const BYTE
* const iLimit
,
485 const U32 mls
, const ZSTD_dictMode_e dictMode
)
487 const ZSTD_compressionParameters
* const cParams
= &ms
->cParams
;
488 U32
* const chainTable
= ms
->chainTable
;
489 const U32 chainSize
= (1 << cParams
->chainLog
);
490 const U32 chainMask
= chainSize
-1;
491 const BYTE
* const base
= ms
->window
.base
;
492 const BYTE
* const dictBase
= ms
->window
.dictBase
;
493 const U32 dictLimit
= ms
->window
.dictLimit
;
494 const BYTE
* const prefixStart
= base
+ dictLimit
;
495 const BYTE
* const dictEnd
= dictBase
+ dictLimit
;
496 const U32 current
= (U32
)(ip
-base
);
497 const U32 maxDistance
= 1U << cParams
->windowLog
;
498 const U32 lowestValid
= ms
->window
.lowLimit
;
499 const U32 withinMaxDistance
= (current
- lowestValid
> maxDistance
) ? current
- maxDistance
: lowestValid
;
500 const U32 isDictionary
= (ms
->loadedDictEnd
!= 0);
501 const U32 lowLimit
= isDictionary
? lowestValid
: withinMaxDistance
;
502 const U32 minChain
= current
> chainSize
? current
- chainSize
: 0;
503 U32 nbAttempts
= 1U << cParams
->searchLog
;
506 /* HC4 match finder */
507 U32 matchIndex
= ZSTD_insertAndFindFirstIndex_internal(ms
, cParams
, ip
, mls
);
509 for ( ; (matchIndex
>lowLimit
) & (nbAttempts
>0) ; nbAttempts
--) {
511 if ((dictMode
!= ZSTD_extDict
) || matchIndex
>= dictLimit
) {
512 const BYTE
* const match
= base
+ matchIndex
;
513 assert(matchIndex
>= dictLimit
); /* ensures this is true if dictMode != ZSTD_extDict */
514 if (match
[ml
] == ip
[ml
]) /* potentially better */
515 currentMl
= ZSTD_count(ip
, match
, iLimit
);
517 const BYTE
* const match
= dictBase
+ matchIndex
;
518 assert(match
+4 <= dictEnd
);
519 if (MEM_read32(match
) == MEM_read32(ip
)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */
520 currentMl
= ZSTD_count_2segments(ip
+4, match
+4, iLimit
, dictEnd
, prefixStart
) + 4;
523 /* save best solution */
524 if (currentMl
> ml
) {
526 *offsetPtr
= current
- matchIndex
+ ZSTD_REP_MOVE
;
527 if (ip
+currentMl
== iLimit
) break; /* best possible, avoids read overflow on next attempt */
530 if (matchIndex
<= minChain
) break;
531 matchIndex
= NEXT_IN_CHAIN(matchIndex
, chainMask
);
534 if (dictMode
== ZSTD_dictMatchState
) {
535 const ZSTD_matchState_t
* const dms
= ms
->dictMatchState
;
536 const U32
* const dmsChainTable
= dms
->chainTable
;
537 const U32 dmsChainSize
= (1 << dms
->cParams
.chainLog
);
538 const U32 dmsChainMask
= dmsChainSize
- 1;
539 const U32 dmsLowestIndex
= dms
->window
.dictLimit
;
540 const BYTE
* const dmsBase
= dms
->window
.base
;
541 const BYTE
* const dmsEnd
= dms
->window
.nextSrc
;
542 const U32 dmsSize
= (U32
)(dmsEnd
- dmsBase
);
543 const U32 dmsIndexDelta
= dictLimit
- dmsSize
;
544 const U32 dmsMinChain
= dmsSize
> dmsChainSize
? dmsSize
- dmsChainSize
: 0;
546 matchIndex
= dms
->hashTable
[ZSTD_hashPtr(ip
, dms
->cParams
.hashLog
, mls
)];
548 for ( ; (matchIndex
>dmsLowestIndex
) & (nbAttempts
>0) ; nbAttempts
--) {
550 const BYTE
* const match
= dmsBase
+ matchIndex
;
551 assert(match
+4 <= dmsEnd
);
552 if (MEM_read32(match
) == MEM_read32(ip
)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */
553 currentMl
= ZSTD_count_2segments(ip
+4, match
+4, iLimit
, dmsEnd
, prefixStart
) + 4;
555 /* save best solution */
556 if (currentMl
> ml
) {
558 *offsetPtr
= current
- (matchIndex
+ dmsIndexDelta
) + ZSTD_REP_MOVE
;
559 if (ip
+currentMl
== iLimit
) break; /* best possible, avoids read overflow on next attempt */
562 if (matchIndex
<= dmsMinChain
) break;
563 matchIndex
= dmsChainTable
[matchIndex
& dmsChainMask
];
571 FORCE_INLINE_TEMPLATE
size_t ZSTD_HcFindBestMatch_selectMLS (
572 ZSTD_matchState_t
* ms
,
573 const BYTE
* ip
, const BYTE
* const iLimit
,
576 switch(ms
->cParams
.minMatch
)
578 default : /* includes case 3 */
579 case 4 : return ZSTD_HcFindBestMatch_generic(ms
, ip
, iLimit
, offsetPtr
, 4, ZSTD_noDict
);
580 case 5 : return ZSTD_HcFindBestMatch_generic(ms
, ip
, iLimit
, offsetPtr
, 5, ZSTD_noDict
);
582 case 6 : return ZSTD_HcFindBestMatch_generic(ms
, ip
, iLimit
, offsetPtr
, 6, ZSTD_noDict
);
587 static size_t ZSTD_HcFindBestMatch_dictMatchState_selectMLS (
588 ZSTD_matchState_t
* ms
,
589 const BYTE
* ip
, const BYTE
* const iLimit
,
592 switch(ms
->cParams
.minMatch
)
594 default : /* includes case 3 */
595 case 4 : return ZSTD_HcFindBestMatch_generic(ms
, ip
, iLimit
, offsetPtr
, 4, ZSTD_dictMatchState
);
596 case 5 : return ZSTD_HcFindBestMatch_generic(ms
, ip
, iLimit
, offsetPtr
, 5, ZSTD_dictMatchState
);
598 case 6 : return ZSTD_HcFindBestMatch_generic(ms
, ip
, iLimit
, offsetPtr
, 6, ZSTD_dictMatchState
);
603 FORCE_INLINE_TEMPLATE
size_t ZSTD_HcFindBestMatch_extDict_selectMLS (
604 ZSTD_matchState_t
* ms
,
605 const BYTE
* ip
, const BYTE
* const iLimit
,
608 switch(ms
->cParams
.minMatch
)
610 default : /* includes case 3 */
611 case 4 : return ZSTD_HcFindBestMatch_generic(ms
, ip
, iLimit
, offsetPtr
, 4, ZSTD_extDict
);
612 case 5 : return ZSTD_HcFindBestMatch_generic(ms
, ip
, iLimit
, offsetPtr
, 5, ZSTD_extDict
);
614 case 6 : return ZSTD_HcFindBestMatch_generic(ms
, ip
, iLimit
, offsetPtr
, 6, ZSTD_extDict
);
619 /* *******************************
620 * Common parser - lazy strategy
621 *********************************/
622 typedef enum { search_hashChain
, search_binaryTree
} searchMethod_e
;
624 FORCE_INLINE_TEMPLATE
size_t
625 ZSTD_compressBlock_lazy_generic(
626 ZSTD_matchState_t
* ms
, seqStore_t
* seqStore
,
627 U32 rep
[ZSTD_REP_NUM
],
628 const void* src
, size_t srcSize
,
629 const searchMethod_e searchMethod
, const U32 depth
,
630 ZSTD_dictMode_e
const dictMode
)
632 const BYTE
* const istart
= (const BYTE
*)src
;
633 const BYTE
* ip
= istart
;
634 const BYTE
* anchor
= istart
;
635 const BYTE
* const iend
= istart
+ srcSize
;
636 const BYTE
* const ilimit
= iend
- 8;
637 const BYTE
* const base
= ms
->window
.base
;
638 const U32 prefixLowestIndex
= ms
->window
.dictLimit
;
639 const BYTE
* const prefixLowest
= base
+ prefixLowestIndex
;
641 typedef size_t (*searchMax_f
)(
642 ZSTD_matchState_t
* ms
,
643 const BYTE
* ip
, const BYTE
* iLimit
, size_t* offsetPtr
);
644 searchMax_f
const searchMax
= dictMode
== ZSTD_dictMatchState
?
645 (searchMethod
==search_binaryTree
? ZSTD_BtFindBestMatch_dictMatchState_selectMLS
646 : ZSTD_HcFindBestMatch_dictMatchState_selectMLS
) :
647 (searchMethod
==search_binaryTree
? ZSTD_BtFindBestMatch_selectMLS
648 : ZSTD_HcFindBestMatch_selectMLS
);
649 U32 offset_1
= rep
[0], offset_2
= rep
[1], savedOffset
=0;
651 const ZSTD_matchState_t
* const dms
= ms
->dictMatchState
;
652 const U32 dictLowestIndex
= dictMode
== ZSTD_dictMatchState
?
653 dms
->window
.dictLimit
: 0;
654 const BYTE
* const dictBase
= dictMode
== ZSTD_dictMatchState
?
655 dms
->window
.base
: NULL
;
656 const BYTE
* const dictLowest
= dictMode
== ZSTD_dictMatchState
?
657 dictBase
+ dictLowestIndex
: NULL
;
658 const BYTE
* const dictEnd
= dictMode
== ZSTD_dictMatchState
?
659 dms
->window
.nextSrc
: NULL
;
660 const U32 dictIndexDelta
= dictMode
== ZSTD_dictMatchState
?
661 prefixLowestIndex
- (U32
)(dictEnd
- dictBase
) :
663 const U32 dictAndPrefixLength
= (U32
)((ip
- prefixLowest
) + (dictEnd
- dictLowest
));
665 DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u)", (U32
)dictMode
);
668 ip
+= (dictAndPrefixLength
== 0);
669 if (dictMode
== ZSTD_noDict
) {
670 U32
const current
= (U32
)(ip
- base
);
671 U32
const windowLow
= ZSTD_getLowestPrefixIndex(ms
, current
, ms
->cParams
.windowLog
);
672 U32
const maxRep
= current
- windowLow
;
673 if (offset_2
> maxRep
) savedOffset
= offset_2
, offset_2
= 0;
674 if (offset_1
> maxRep
) savedOffset
= offset_1
, offset_1
= 0;
676 if (dictMode
== ZSTD_dictMatchState
) {
677 /* dictMatchState repCode checks don't currently handle repCode == 0
679 assert(offset_1
<= dictAndPrefixLength
);
680 assert(offset_2
<= dictAndPrefixLength
);
684 #if defined(__GNUC__) && defined(__x86_64__)
685 /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the
686 * code alignment is perturbed. To fix the instability align the loop on 32-bytes.
688 __asm__(".p2align 5");
690 while (ip
< ilimit
) {
691 size_t matchLength
=0;
693 const BYTE
* start
=ip
+1;
696 if (dictMode
== ZSTD_dictMatchState
) {
697 const U32 repIndex
= (U32
)(ip
- base
) + 1 - offset_1
;
698 const BYTE
* repMatch
= (dictMode
== ZSTD_dictMatchState
699 && repIndex
< prefixLowestIndex
) ?
700 dictBase
+ (repIndex
- dictIndexDelta
) :
702 if (((U32
)((prefixLowestIndex
-1) - repIndex
) >= 3 /* intentional underflow */)
703 && (MEM_read32(repMatch
) == MEM_read32(ip
+1)) ) {
704 const BYTE
* repMatchEnd
= repIndex
< prefixLowestIndex
? dictEnd
: iend
;
705 matchLength
= ZSTD_count_2segments(ip
+1+4, repMatch
+4, iend
, repMatchEnd
, prefixLowest
) + 4;
706 if (depth
==0) goto _storeSequence
;
709 if ( dictMode
== ZSTD_noDict
710 && ((offset_1
> 0) & (MEM_read32(ip
+1-offset_1
) == MEM_read32(ip
+1)))) {
711 matchLength
= ZSTD_count(ip
+1+4, ip
+1+4-offset_1
, iend
) + 4;
712 if (depth
==0) goto _storeSequence
;
715 /* first search (depth 0) */
716 { size_t offsetFound
= 999999999;
717 size_t const ml2
= searchMax(ms
, ip
, iend
, &offsetFound
);
718 if (ml2
> matchLength
)
719 matchLength
= ml2
, start
= ip
, offset
=offsetFound
;
722 if (matchLength
< 4) {
723 ip
+= ((ip
-anchor
) >> kSearchStrength
) + 1; /* jump faster over incompressible sections */
727 /* let's try to find a better solution */
731 if ( (dictMode
== ZSTD_noDict
)
732 && (offset
) && ((offset_1
>0) & (MEM_read32(ip
) == MEM_read32(ip
- offset_1
)))) {
733 size_t const mlRep
= ZSTD_count(ip
+4, ip
+4-offset_1
, iend
) + 4;
734 int const gain2
= (int)(mlRep
* 3);
735 int const gain1
= (int)(matchLength
*3 - ZSTD_highbit32((U32
)offset
+1) + 1);
736 if ((mlRep
>= 4) && (gain2
> gain1
))
737 matchLength
= mlRep
, offset
= 0, start
= ip
;
739 if (dictMode
== ZSTD_dictMatchState
) {
740 const U32 repIndex
= (U32
)(ip
- base
) - offset_1
;
741 const BYTE
* repMatch
= repIndex
< prefixLowestIndex
?
742 dictBase
+ (repIndex
- dictIndexDelta
) :
744 if (((U32
)((prefixLowestIndex
-1) - repIndex
) >= 3 /* intentional underflow */)
745 && (MEM_read32(repMatch
) == MEM_read32(ip
)) ) {
746 const BYTE
* repMatchEnd
= repIndex
< prefixLowestIndex
? dictEnd
: iend
;
747 size_t const mlRep
= ZSTD_count_2segments(ip
+4, repMatch
+4, iend
, repMatchEnd
, prefixLowest
) + 4;
748 int const gain2
= (int)(mlRep
* 3);
749 int const gain1
= (int)(matchLength
*3 - ZSTD_highbit32((U32
)offset
+1) + 1);
750 if ((mlRep
>= 4) && (gain2
> gain1
))
751 matchLength
= mlRep
, offset
= 0, start
= ip
;
754 { size_t offset2
=999999999;
755 size_t const ml2
= searchMax(ms
, ip
, iend
, &offset2
);
756 int const gain2
= (int)(ml2
*4 - ZSTD_highbit32((U32
)offset2
+1)); /* raw approx */
757 int const gain1
= (int)(matchLength
*4 - ZSTD_highbit32((U32
)offset
+1) + 4);
758 if ((ml2
>= 4) && (gain2
> gain1
)) {
759 matchLength
= ml2
, offset
= offset2
, start
= ip
;
760 continue; /* search a better one */
763 /* let's find an even better one */
764 if ((depth
==2) && (ip
<ilimit
)) {
766 if ( (dictMode
== ZSTD_noDict
)
767 && (offset
) && ((offset_1
>0) & (MEM_read32(ip
) == MEM_read32(ip
- offset_1
)))) {
768 size_t const mlRep
= ZSTD_count(ip
+4, ip
+4-offset_1
, iend
) + 4;
769 int const gain2
= (int)(mlRep
* 4);
770 int const gain1
= (int)(matchLength
*4 - ZSTD_highbit32((U32
)offset
+1) + 1);
771 if ((mlRep
>= 4) && (gain2
> gain1
))
772 matchLength
= mlRep
, offset
= 0, start
= ip
;
774 if (dictMode
== ZSTD_dictMatchState
) {
775 const U32 repIndex
= (U32
)(ip
- base
) - offset_1
;
776 const BYTE
* repMatch
= repIndex
< prefixLowestIndex
?
777 dictBase
+ (repIndex
- dictIndexDelta
) :
779 if (((U32
)((prefixLowestIndex
-1) - repIndex
) >= 3 /* intentional underflow */)
780 && (MEM_read32(repMatch
) == MEM_read32(ip
)) ) {
781 const BYTE
* repMatchEnd
= repIndex
< prefixLowestIndex
? dictEnd
: iend
;
782 size_t const mlRep
= ZSTD_count_2segments(ip
+4, repMatch
+4, iend
, repMatchEnd
, prefixLowest
) + 4;
783 int const gain2
= (int)(mlRep
* 4);
784 int const gain1
= (int)(matchLength
*4 - ZSTD_highbit32((U32
)offset
+1) + 1);
785 if ((mlRep
>= 4) && (gain2
> gain1
))
786 matchLength
= mlRep
, offset
= 0, start
= ip
;
789 { size_t offset2
=999999999;
790 size_t const ml2
= searchMax(ms
, ip
, iend
, &offset2
);
791 int const gain2
= (int)(ml2
*4 - ZSTD_highbit32((U32
)offset2
+1)); /* raw approx */
792 int const gain1
= (int)(matchLength
*4 - ZSTD_highbit32((U32
)offset
+1) + 7);
793 if ((ml2
>= 4) && (gain2
> gain1
)) {
794 matchLength
= ml2
, offset
= offset2
, start
= ip
;
797 break; /* nothing found : store previous solution */
801 * start[-offset+ZSTD_REP_MOVE-1] is undefined behavior.
802 * (-offset+ZSTD_REP_MOVE-1) is unsigned, and is added to start, which
803 * overflows the pointer, which is undefined behavior.
807 if (dictMode
== ZSTD_noDict
) {
808 while ( ((start
> anchor
) & (start
- (offset
-ZSTD_REP_MOVE
) > prefixLowest
))
809 && (start
[-1] == (start
-(offset
-ZSTD_REP_MOVE
))[-1]) ) /* only search for offset within prefix */
810 { start
--; matchLength
++; }
812 if (dictMode
== ZSTD_dictMatchState
) {
813 U32
const matchIndex
= (U32
)((start
-base
) - (offset
- ZSTD_REP_MOVE
));
814 const BYTE
* match
= (matchIndex
< prefixLowestIndex
) ? dictBase
+ matchIndex
- dictIndexDelta
: base
+ matchIndex
;
815 const BYTE
* const mStart
= (matchIndex
< prefixLowestIndex
) ? dictLowest
: prefixLowest
;
816 while ((start
>anchor
) && (match
>mStart
) && (start
[-1] == match
[-1])) { start
--; match
--; matchLength
++; } /* catch up */
818 offset_2
= offset_1
; offset_1
= (U32
)(offset
- ZSTD_REP_MOVE
);
822 { size_t const litLength
= start
- anchor
;
823 ZSTD_storeSeq(seqStore
, litLength
, anchor
, iend
, (U32
)offset
, matchLength
-MINMATCH
);
824 anchor
= ip
= start
+ matchLength
;
827 /* check immediate repcode */
828 if (dictMode
== ZSTD_dictMatchState
) {
829 while (ip
<= ilimit
) {
830 U32
const current2
= (U32
)(ip
-base
);
831 U32
const repIndex
= current2
- offset_2
;
832 const BYTE
* repMatch
= dictMode
== ZSTD_dictMatchState
833 && repIndex
< prefixLowestIndex
?
834 dictBase
- dictIndexDelta
+ repIndex
:
836 if ( ((U32
)((prefixLowestIndex
-1) - (U32
)repIndex
) >= 3 /* intentional overflow */)
837 && (MEM_read32(repMatch
) == MEM_read32(ip
)) ) {
838 const BYTE
* const repEnd2
= repIndex
< prefixLowestIndex
? dictEnd
: iend
;
839 matchLength
= ZSTD_count_2segments(ip
+4, repMatch
+4, iend
, repEnd2
, prefixLowest
) + 4;
840 offset
= offset_2
; offset_2
= offset_1
; offset_1
= (U32
)offset
; /* swap offset_2 <=> offset_1 */
841 ZSTD_storeSeq(seqStore
, 0, anchor
, iend
, 0, matchLength
-MINMATCH
);
850 if (dictMode
== ZSTD_noDict
) {
851 while ( ((ip
<= ilimit
) & (offset_2
>0))
852 && (MEM_read32(ip
) == MEM_read32(ip
- offset_2
)) ) {
854 matchLength
= ZSTD_count(ip
+4, ip
+4-offset_2
, iend
) + 4;
855 offset
= offset_2
; offset_2
= offset_1
; offset_1
= (U32
)offset
; /* swap repcodes */
856 ZSTD_storeSeq(seqStore
, 0, anchor
, iend
, 0, matchLength
-MINMATCH
);
859 continue; /* faster when present ... (?) */
862 /* Save reps for next block */
863 rep
[0] = offset_1
? offset_1
: savedOffset
;
864 rep
[1] = offset_2
? offset_2
: savedOffset
;
866 /* Return the last literals size */
867 return (size_t)(iend
- anchor
);
871 size_t ZSTD_compressBlock_btlazy2(
872 ZSTD_matchState_t
* ms
, seqStore_t
* seqStore
, U32 rep
[ZSTD_REP_NUM
],
873 void const* src
, size_t srcSize
)
875 return ZSTD_compressBlock_lazy_generic(ms
, seqStore
, rep
, src
, srcSize
, search_binaryTree
, 2, ZSTD_noDict
);
878 size_t ZSTD_compressBlock_lazy2(
879 ZSTD_matchState_t
* ms
, seqStore_t
* seqStore
, U32 rep
[ZSTD_REP_NUM
],
880 void const* src
, size_t srcSize
)
882 return ZSTD_compressBlock_lazy_generic(ms
, seqStore
, rep
, src
, srcSize
, search_hashChain
, 2, ZSTD_noDict
);
885 size_t ZSTD_compressBlock_lazy(
886 ZSTD_matchState_t
* ms
, seqStore_t
* seqStore
, U32 rep
[ZSTD_REP_NUM
],
887 void const* src
, size_t srcSize
)
889 return ZSTD_compressBlock_lazy_generic(ms
, seqStore
, rep
, src
, srcSize
, search_hashChain
, 1, ZSTD_noDict
);
892 size_t ZSTD_compressBlock_greedy(
893 ZSTD_matchState_t
* ms
, seqStore_t
* seqStore
, U32 rep
[ZSTD_REP_NUM
],
894 void const* src
, size_t srcSize
)
896 return ZSTD_compressBlock_lazy_generic(ms
, seqStore
, rep
, src
, srcSize
, search_hashChain
, 0, ZSTD_noDict
);
899 size_t ZSTD_compressBlock_btlazy2_dictMatchState(
900 ZSTD_matchState_t
* ms
, seqStore_t
* seqStore
, U32 rep
[ZSTD_REP_NUM
],
901 void const* src
, size_t srcSize
)
903 return ZSTD_compressBlock_lazy_generic(ms
, seqStore
, rep
, src
, srcSize
, search_binaryTree
, 2, ZSTD_dictMatchState
);
906 size_t ZSTD_compressBlock_lazy2_dictMatchState(
907 ZSTD_matchState_t
* ms
, seqStore_t
* seqStore
, U32 rep
[ZSTD_REP_NUM
],
908 void const* src
, size_t srcSize
)
910 return ZSTD_compressBlock_lazy_generic(ms
, seqStore
, rep
, src
, srcSize
, search_hashChain
, 2, ZSTD_dictMatchState
);
913 size_t ZSTD_compressBlock_lazy_dictMatchState(
914 ZSTD_matchState_t
* ms
, seqStore_t
* seqStore
, U32 rep
[ZSTD_REP_NUM
],
915 void const* src
, size_t srcSize
)
917 return ZSTD_compressBlock_lazy_generic(ms
, seqStore
, rep
, src
, srcSize
, search_hashChain
, 1, ZSTD_dictMatchState
);
920 size_t ZSTD_compressBlock_greedy_dictMatchState(
921 ZSTD_matchState_t
* ms
, seqStore_t
* seqStore
, U32 rep
[ZSTD_REP_NUM
],
922 void const* src
, size_t srcSize
)
924 return ZSTD_compressBlock_lazy_generic(ms
, seqStore
, rep
, src
, srcSize
, search_hashChain
, 0, ZSTD_dictMatchState
);
928 FORCE_INLINE_TEMPLATE
929 size_t ZSTD_compressBlock_lazy_extDict_generic(
930 ZSTD_matchState_t
* ms
, seqStore_t
* seqStore
,
931 U32 rep
[ZSTD_REP_NUM
],
932 const void* src
, size_t srcSize
,
933 const searchMethod_e searchMethod
, const U32 depth
)
935 const BYTE
* const istart
= (const BYTE
*)src
;
936 const BYTE
* ip
= istart
;
937 const BYTE
* anchor
= istart
;
938 const BYTE
* const iend
= istart
+ srcSize
;
939 const BYTE
* const ilimit
= iend
- 8;
940 const BYTE
* const base
= ms
->window
.base
;
941 const U32 dictLimit
= ms
->window
.dictLimit
;
942 const BYTE
* const prefixStart
= base
+ dictLimit
;
943 const BYTE
* const dictBase
= ms
->window
.dictBase
;
944 const BYTE
* const dictEnd
= dictBase
+ dictLimit
;
945 const BYTE
* const dictStart
= dictBase
+ ms
->window
.lowLimit
;
946 const U32 windowLog
= ms
->cParams
.windowLog
;
948 typedef size_t (*searchMax_f
)(
949 ZSTD_matchState_t
* ms
,
950 const BYTE
* ip
, const BYTE
* iLimit
, size_t* offsetPtr
);
951 searchMax_f searchMax
= searchMethod
==search_binaryTree
? ZSTD_BtFindBestMatch_extDict_selectMLS
: ZSTD_HcFindBestMatch_extDict_selectMLS
;
953 U32 offset_1
= rep
[0], offset_2
= rep
[1];
955 DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic");
958 ip
+= (ip
== prefixStart
);
961 #if defined(__GNUC__) && defined(__x86_64__)
962 /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the
963 * code alignment is perturbed. To fix the instability align the loop on 32-bytes.
965 __asm__(".p2align 5");
967 while (ip
< ilimit
) {
968 size_t matchLength
=0;
970 const BYTE
* start
=ip
+1;
971 U32 current
= (U32
)(ip
-base
);
974 { const U32 windowLow
= ZSTD_getLowestMatchIndex(ms
, current
+1, windowLog
);
975 const U32 repIndex
= (U32
)(current
+1 - offset_1
);
976 const BYTE
* const repBase
= repIndex
< dictLimit
? dictBase
: base
;
977 const BYTE
* const repMatch
= repBase
+ repIndex
;
978 if (((U32
)((dictLimit
-1) - repIndex
) >= 3) & (repIndex
> windowLow
)) /* intentional overflow */
979 if (MEM_read32(ip
+1) == MEM_read32(repMatch
)) {
980 /* repcode detected we should take it */
981 const BYTE
* const repEnd
= repIndex
< dictLimit
? dictEnd
: iend
;
982 matchLength
= ZSTD_count_2segments(ip
+1+4, repMatch
+4, iend
, repEnd
, prefixStart
) + 4;
983 if (depth
==0) goto _storeSequence
;
986 /* first search (depth 0) */
987 { size_t offsetFound
= 999999999;
988 size_t const ml2
= searchMax(ms
, ip
, iend
, &offsetFound
);
989 if (ml2
> matchLength
)
990 matchLength
= ml2
, start
= ip
, offset
=offsetFound
;
993 if (matchLength
< 4) {
994 ip
+= ((ip
-anchor
) >> kSearchStrength
) + 1; /* jump faster over incompressible sections */
998 /* let's try to find a better solution */
1005 const U32 windowLow
= ZSTD_getLowestMatchIndex(ms
, current
, windowLog
);
1006 const U32 repIndex
= (U32
)(current
- offset_1
);
1007 const BYTE
* const repBase
= repIndex
< dictLimit
? dictBase
: base
;
1008 const BYTE
* const repMatch
= repBase
+ repIndex
;
1009 if (((U32
)((dictLimit
-1) - repIndex
) >= 3) & (repIndex
> windowLow
)) /* intentional overflow */
1010 if (MEM_read32(ip
) == MEM_read32(repMatch
)) {
1011 /* repcode detected */
1012 const BYTE
* const repEnd
= repIndex
< dictLimit
? dictEnd
: iend
;
1013 size_t const repLength
= ZSTD_count_2segments(ip
+4, repMatch
+4, iend
, repEnd
, prefixStart
) + 4;
1014 int const gain2
= (int)(repLength
* 3);
1015 int const gain1
= (int)(matchLength
*3 - ZSTD_highbit32((U32
)offset
+1) + 1);
1016 if ((repLength
>= 4) && (gain2
> gain1
))
1017 matchLength
= repLength
, offset
= 0, start
= ip
;
1020 /* search match, depth 1 */
1021 { size_t offset2
=999999999;
1022 size_t const ml2
= searchMax(ms
, ip
, iend
, &offset2
);
1023 int const gain2
= (int)(ml2
*4 - ZSTD_highbit32((U32
)offset2
+1)); /* raw approx */
1024 int const gain1
= (int)(matchLength
*4 - ZSTD_highbit32((U32
)offset
+1) + 4);
1025 if ((ml2
>= 4) && (gain2
> gain1
)) {
1026 matchLength
= ml2
, offset
= offset2
, start
= ip
;
1027 continue; /* search a better one */
1030 /* let's find an even better one */
1031 if ((depth
==2) && (ip
<ilimit
)) {
1036 const U32 windowLow
= ZSTD_getLowestMatchIndex(ms
, current
, windowLog
);
1037 const U32 repIndex
= (U32
)(current
- offset_1
);
1038 const BYTE
* const repBase
= repIndex
< dictLimit
? dictBase
: base
;
1039 const BYTE
* const repMatch
= repBase
+ repIndex
;
1040 if (((U32
)((dictLimit
-1) - repIndex
) >= 3) & (repIndex
> windowLow
)) /* intentional overflow */
1041 if (MEM_read32(ip
) == MEM_read32(repMatch
)) {
1042 /* repcode detected */
1043 const BYTE
* const repEnd
= repIndex
< dictLimit
? dictEnd
: iend
;
1044 size_t const repLength
= ZSTD_count_2segments(ip
+4, repMatch
+4, iend
, repEnd
, prefixStart
) + 4;
1045 int const gain2
= (int)(repLength
* 4);
1046 int const gain1
= (int)(matchLength
*4 - ZSTD_highbit32((U32
)offset
+1) + 1);
1047 if ((repLength
>= 4) && (gain2
> gain1
))
1048 matchLength
= repLength
, offset
= 0, start
= ip
;
1051 /* search match, depth 2 */
1052 { size_t offset2
=999999999;
1053 size_t const ml2
= searchMax(ms
, ip
, iend
, &offset2
);
1054 int const gain2
= (int)(ml2
*4 - ZSTD_highbit32((U32
)offset2
+1)); /* raw approx */
1055 int const gain1
= (int)(matchLength
*4 - ZSTD_highbit32((U32
)offset
+1) + 7);
1056 if ((ml2
>= 4) && (gain2
> gain1
)) {
1057 matchLength
= ml2
, offset
= offset2
, start
= ip
;
1060 break; /* nothing found : store previous solution */
1065 U32
const matchIndex
= (U32
)((start
-base
) - (offset
- ZSTD_REP_MOVE
));
1066 const BYTE
* match
= (matchIndex
< dictLimit
) ? dictBase
+ matchIndex
: base
+ matchIndex
;
1067 const BYTE
* const mStart
= (matchIndex
< dictLimit
) ? dictStart
: prefixStart
;
1068 while ((start
>anchor
) && (match
>mStart
) && (start
[-1] == match
[-1])) { start
--; match
--; matchLength
++; } /* catch up */
1069 offset_2
= offset_1
; offset_1
= (U32
)(offset
- ZSTD_REP_MOVE
);
1072 /* store sequence */
1074 { size_t const litLength
= start
- anchor
;
1075 ZSTD_storeSeq(seqStore
, litLength
, anchor
, iend
, (U32
)offset
, matchLength
-MINMATCH
);
1076 anchor
= ip
= start
+ matchLength
;
1079 /* check immediate repcode */
1080 while (ip
<= ilimit
) {
1081 const U32 repCurrent
= (U32
)(ip
-base
);
1082 const U32 windowLow
= ZSTD_getLowestMatchIndex(ms
, repCurrent
, windowLog
);
1083 const U32 repIndex
= repCurrent
- offset_2
;
1084 const BYTE
* const repBase
= repIndex
< dictLimit
? dictBase
: base
;
1085 const BYTE
* const repMatch
= repBase
+ repIndex
;
1086 if (((U32
)((dictLimit
-1) - repIndex
) >= 3) & (repIndex
> windowLow
)) /* intentional overflow */
1087 if (MEM_read32(ip
) == MEM_read32(repMatch
)) {
1088 /* repcode detected we should take it */
1089 const BYTE
* const repEnd
= repIndex
< dictLimit
? dictEnd
: iend
;
1090 matchLength
= ZSTD_count_2segments(ip
+4, repMatch
+4, iend
, repEnd
, prefixStart
) + 4;
1091 offset
= offset_2
; offset_2
= offset_1
; offset_1
= (U32
)offset
; /* swap offset history */
1092 ZSTD_storeSeq(seqStore
, 0, anchor
, iend
, 0, matchLength
-MINMATCH
);
1095 continue; /* faster when present ... (?) */
1100 /* Save reps for next block */
1104 /* Return the last literals size */
1105 return (size_t)(iend
- anchor
);
1109 size_t ZSTD_compressBlock_greedy_extDict(
1110 ZSTD_matchState_t
* ms
, seqStore_t
* seqStore
, U32 rep
[ZSTD_REP_NUM
],
1111 void const* src
, size_t srcSize
)
1113 return ZSTD_compressBlock_lazy_extDict_generic(ms
, seqStore
, rep
, src
, srcSize
, search_hashChain
, 0);
1116 size_t ZSTD_compressBlock_lazy_extDict(
1117 ZSTD_matchState_t
* ms
, seqStore_t
* seqStore
, U32 rep
[ZSTD_REP_NUM
],
1118 void const* src
, size_t srcSize
)
1121 return ZSTD_compressBlock_lazy_extDict_generic(ms
, seqStore
, rep
, src
, srcSize
, search_hashChain
, 1);
1124 size_t ZSTD_compressBlock_lazy2_extDict(
1125 ZSTD_matchState_t
* ms
, seqStore_t
* seqStore
, U32 rep
[ZSTD_REP_NUM
],
1126 void const* src
, size_t srcSize
)
1129 return ZSTD_compressBlock_lazy_extDict_generic(ms
, seqStore
, rep
, src
, srcSize
, search_hashChain
, 2);
1132 size_t ZSTD_compressBlock_btlazy2_extDict(
1133 ZSTD_matchState_t
* ms
, seqStore_t
* seqStore
, U32 rep
[ZSTD_REP_NUM
],
1134 void const* src
, size_t srcSize
)
1137 return ZSTD_compressBlock_lazy_extDict_generic(ms
, seqStore
, rep
, src
, srcSize
, search_binaryTree
, 2);