]>
Commit | Line | Data |
---|---|---|
11fdf7f2 | 1 | /* |
f67539c2 | 2 | * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. |
11fdf7f2 TL |
3 | * All rights reserved. |
4 | * | |
5 | * This source code is licensed under both the BSD-style license (found in the | |
6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found | |
7 | * in the COPYING file in the root directory of this source tree). | |
8 | * You may select, at your option, one of the above-listed licenses. | |
9 | */ | |
10 | ||
f67539c2 | 11 | #include "zstd_compress_internal.h" /* ZSTD_hashPtr, ZSTD_count, ZSTD_storeSeq */ |
11fdf7f2 TL |
12 | #include "zstd_fast.h" |
13 | ||
14 | ||
9f95a23c | 15 | void ZSTD_fillHashTable(ZSTD_matchState_t* ms, |
f67539c2 TL |
16 | const void* const end, |
17 | ZSTD_dictTableLoadMethod_e dtlm) | |
11fdf7f2 | 18 | { |
9f95a23c TL |
19 | const ZSTD_compressionParameters* const cParams = &ms->cParams; |
20 | U32* const hashTable = ms->hashTable; | |
21 | U32 const hBits = cParams->hashLog; | |
22 | U32 const mls = cParams->minMatch; | |
23 | const BYTE* const base = ms->window.base; | |
24 | const BYTE* ip = base + ms->nextToUpdate; | |
11fdf7f2 | 25 | const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE; |
9f95a23c | 26 | const U32 fastHashFillStep = 3; |
11fdf7f2 | 27 | |
9f95a23c TL |
28 | /* Always insert every fastHashFillStep position into the hash table. |
29 | * Insert the other positions if their hash entry is empty. | |
30 | */ | |
31 | for ( ; ip + fastHashFillStep < iend + 2; ip += fastHashFillStep) { | |
32 | U32 const current = (U32)(ip - base); | |
33 | size_t const hash0 = ZSTD_hashPtr(ip, hBits, mls); | |
34 | hashTable[hash0] = current; | |
35 | if (dtlm == ZSTD_dtlm_fast) continue; | |
36 | /* Only load extra positions for ZSTD_dtlm_full */ | |
37 | { U32 p; | |
38 | for (p = 1; p < fastHashFillStep; ++p) { | |
39 | size_t const hash = ZSTD_hashPtr(ip + p, hBits, mls); | |
40 | if (hashTable[hash] == 0) { /* not yet filled */ | |
41 | hashTable[hash] = current + p; | |
42 | } } } } | |
11fdf7f2 TL |
43 | } |
44 | ||
f67539c2 TL |
45 | |
46 | FORCE_INLINE_TEMPLATE size_t | |
47 | ZSTD_compressBlock_fast_generic( | |
9f95a23c TL |
48 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], |
49 | void const* src, size_t srcSize, | |
50 | U32 const mls) | |
11fdf7f2 | 51 | { |
9f95a23c TL |
52 | const ZSTD_compressionParameters* const cParams = &ms->cParams; |
53 | U32* const hashTable = ms->hashTable; | |
54 | U32 const hlog = cParams->hashLog; | |
55 | /* support stepSize of 0 */ | |
56 | size_t const stepSize = cParams->targetLength + !(cParams->targetLength) + 1; | |
57 | const BYTE* const base = ms->window.base; | |
11fdf7f2 | 58 | const BYTE* const istart = (const BYTE*)src; |
9f95a23c TL |
59 | /* We check ip0 (ip + 0) and ip1 (ip + 1) each loop */ |
60 | const BYTE* ip0 = istart; | |
61 | const BYTE* ip1; | |
11fdf7f2 | 62 | const BYTE* anchor = istart; |
f67539c2 TL |
63 | const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); |
64 | const U32 prefixStartIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog); | |
9f95a23c | 65 | const BYTE* const prefixStart = base + prefixStartIndex; |
11fdf7f2 TL |
66 | const BYTE* const iend = istart + srcSize; |
67 | const BYTE* const ilimit = iend - HASH_READ_SIZE; | |
9f95a23c | 68 | U32 offset_1=rep[0], offset_2=rep[1]; |
11fdf7f2 TL |
69 | U32 offsetSaved = 0; |
70 | ||
71 | /* init */ | |
f67539c2 | 72 | DEBUGLOG(5, "ZSTD_compressBlock_fast_generic"); |
9f95a23c TL |
73 | ip0 += (ip0 == prefixStart); |
74 | ip1 = ip0 + 1; | |
f67539c2 TL |
75 | { U32 const current = (U32)(ip0 - base); |
76 | U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog); | |
77 | U32 const maxRep = current - windowLow; | |
11fdf7f2 TL |
78 | if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; |
79 | if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; | |
80 | } | |
81 | ||
9f95a23c | 82 | /* Main Search Loop */ |
f67539c2 TL |
83 | #ifdef __INTEL_COMPILER |
84 | /* From intel 'The vector pragma indicates that the loop should be | |
85 | * vectorized if it is legal to do so'. Can be used together with | |
86 | * #pragma ivdep (but have opted to exclude that because intel | |
87 | * warns against using it).*/ | |
88 | #pragma vector always | |
89 | #endif | |
9f95a23c TL |
90 | while (ip1 < ilimit) { /* < instead of <=, because check at ip0+2 */ |
91 | size_t mLength; | |
92 | BYTE const* ip2 = ip0 + 2; | |
93 | size_t const h0 = ZSTD_hashPtr(ip0, hlog, mls); | |
94 | U32 const val0 = MEM_read32(ip0); | |
95 | size_t const h1 = ZSTD_hashPtr(ip1, hlog, mls); | |
96 | U32 const val1 = MEM_read32(ip1); | |
97 | U32 const current0 = (U32)(ip0-base); | |
98 | U32 const current1 = (U32)(ip1-base); | |
99 | U32 const matchIndex0 = hashTable[h0]; | |
100 | U32 const matchIndex1 = hashTable[h1]; | |
f67539c2 | 101 | BYTE const* repMatch = ip2 - offset_1; |
9f95a23c TL |
102 | const BYTE* match0 = base + matchIndex0; |
103 | const BYTE* match1 = base + matchIndex1; | |
104 | U32 offcode; | |
f67539c2 TL |
105 | |
106 | #if defined(__aarch64__) | |
107 | PREFETCH_L1(ip0+256); | |
108 | #endif | |
109 | ||
9f95a23c TL |
110 | hashTable[h0] = current0; /* update hash table */ |
111 | hashTable[h1] = current1; /* update hash table */ | |
112 | ||
113 | assert(ip0 + 1 == ip1); | |
114 | ||
115 | if ((offset_1 > 0) & (MEM_read32(repMatch) == MEM_read32(ip2))) { | |
f67539c2 | 116 | mLength = (ip2[-1] == repMatch[-1]) ? 1 : 0; |
9f95a23c TL |
117 | ip0 = ip2 - mLength; |
118 | match0 = repMatch - mLength; | |
f67539c2 | 119 | mLength += 4; |
9f95a23c TL |
120 | offcode = 0; |
121 | goto _match; | |
122 | } | |
123 | if ((matchIndex0 > prefixStartIndex) && MEM_read32(match0) == val0) { | |
124 | /* found a regular match */ | |
125 | goto _offset; | |
126 | } | |
127 | if ((matchIndex1 > prefixStartIndex) && MEM_read32(match1) == val1) { | |
128 | /* found a regular match after one literal */ | |
129 | ip0 = ip1; | |
130 | match0 = match1; | |
131 | goto _offset; | |
132 | } | |
f67539c2 | 133 | { size_t const step = ((size_t)(ip0-anchor) >> (kSearchStrength - 1)) + stepSize; |
9f95a23c TL |
134 | assert(step >= 2); |
135 | ip0 += step; | |
136 | ip1 += step; | |
137 | continue; | |
138 | } | |
139 | _offset: /* Requires: ip0, match0 */ | |
140 | /* Compute the offset code */ | |
141 | offset_2 = offset_1; | |
142 | offset_1 = (U32)(ip0-match0); | |
143 | offcode = offset_1 + ZSTD_REP_MOVE; | |
f67539c2 | 144 | mLength = 4; |
9f95a23c TL |
145 | /* Count the backwards match length */ |
146 | while (((ip0>anchor) & (match0>prefixStart)) | |
147 | && (ip0[-1] == match0[-1])) { ip0--; match0--; mLength++; } /* catch up */ | |
148 | ||
149 | _match: /* Requires: ip0, match0, offcode */ | |
150 | /* Count the forward length */ | |
f67539c2 TL |
151 | mLength += ZSTD_count(ip0+mLength, match0+mLength, iend); |
152 | ZSTD_storeSeq(seqStore, (size_t)(ip0-anchor), anchor, iend, offcode, mLength-MINMATCH); | |
9f95a23c TL |
153 | /* match found */ |
154 | ip0 += mLength; | |
155 | anchor = ip0; | |
9f95a23c TL |
156 | |
157 | if (ip0 <= ilimit) { | |
158 | /* Fill Table */ | |
159 | assert(base+current0+2 > istart); /* check base overflow */ | |
160 | hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2; /* here because current+2 could be > iend-8 */ | |
161 | hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base); | |
162 | ||
f67539c2 TL |
163 | if (offset_2 > 0) { /* offset_2==0 means offset_2 is invalidated */ |
164 | while ( (ip0 <= ilimit) && (MEM_read32(ip0) == MEM_read32(ip0 - offset_2)) ) { | |
165 | /* store sequence */ | |
166 | size_t const rLength = ZSTD_count(ip0+4, ip0+4-offset_2, iend) + 4; | |
167 | { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */ | |
168 | hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base); | |
169 | ip0 += rLength; | |
170 | ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, 0 /*offCode*/, rLength-MINMATCH); | |
171 | anchor = ip0; | |
172 | continue; /* faster when present (confirmed on gcc-8) ... (?) */ | |
173 | } } } | |
174 | ip1 = ip0 + 1; | |
9f95a23c TL |
175 | } |
176 | ||
177 | /* save reps for next block */ | |
178 | rep[0] = offset_1 ? offset_1 : offsetSaved; | |
179 | rep[1] = offset_2 ? offset_2 : offsetSaved; | |
180 | ||
181 | /* Return the last literals size */ | |
f67539c2 | 182 | return (size_t)(iend - anchor); |
9f95a23c TL |
183 | } |
184 | ||
185 | ||
186 | size_t ZSTD_compressBlock_fast( | |
187 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |
188 | void const* src, size_t srcSize) | |
189 | { | |
f67539c2 | 190 | U32 const mls = ms->cParams.minMatch; |
9f95a23c TL |
191 | assert(ms->dictMatchState == NULL); |
192 | switch(mls) | |
193 | { | |
194 | default: /* includes case 3 */ | |
195 | case 4 : | |
196 | return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 4); | |
197 | case 5 : | |
198 | return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 5); | |
199 | case 6 : | |
200 | return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 6); | |
201 | case 7 : | |
202 | return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 7); | |
203 | } | |
204 | } | |
205 | ||
206 | FORCE_INLINE_TEMPLATE | |
207 | size_t ZSTD_compressBlock_fast_dictMatchState_generic( | |
208 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |
209 | void const* src, size_t srcSize, U32 const mls) | |
210 | { | |
211 | const ZSTD_compressionParameters* const cParams = &ms->cParams; | |
212 | U32* const hashTable = ms->hashTable; | |
213 | U32 const hlog = cParams->hashLog; | |
214 | /* support stepSize of 0 */ | |
215 | U32 const stepSize = cParams->targetLength + !(cParams->targetLength); | |
216 | const BYTE* const base = ms->window.base; | |
217 | const BYTE* const istart = (const BYTE*)src; | |
218 | const BYTE* ip = istart; | |
219 | const BYTE* anchor = istart; | |
220 | const U32 prefixStartIndex = ms->window.dictLimit; | |
221 | const BYTE* const prefixStart = base + prefixStartIndex; | |
222 | const BYTE* const iend = istart + srcSize; | |
223 | const BYTE* const ilimit = iend - HASH_READ_SIZE; | |
224 | U32 offset_1=rep[0], offset_2=rep[1]; | |
225 | U32 offsetSaved = 0; | |
226 | ||
227 | const ZSTD_matchState_t* const dms = ms->dictMatchState; | |
228 | const ZSTD_compressionParameters* const dictCParams = &dms->cParams ; | |
229 | const U32* const dictHashTable = dms->hashTable; | |
230 | const U32 dictStartIndex = dms->window.dictLimit; | |
231 | const BYTE* const dictBase = dms->window.base; | |
232 | const BYTE* const dictStart = dictBase + dictStartIndex; | |
233 | const BYTE* const dictEnd = dms->window.nextSrc; | |
234 | const U32 dictIndexDelta = prefixStartIndex - (U32)(dictEnd - dictBase); | |
235 | const U32 dictAndPrefixLength = (U32)(ip - prefixStart + dictEnd - dictStart); | |
236 | const U32 dictHLog = dictCParams->hashLog; | |
237 | ||
f67539c2 TL |
238 | /* if a dictionary is still attached, it necessarily means that |
239 | * it is within window size. So we just check it. */ | |
240 | const U32 maxDistance = 1U << cParams->windowLog; | |
241 | const U32 endIndex = (U32)((size_t)(ip - base) + srcSize); | |
242 | assert(endIndex - prefixStartIndex <= maxDistance); | |
243 | (void)maxDistance; (void)endIndex; /* these variables are not used when assert() is disabled */ | |
244 | ||
245 | /* ensure there will be no no underflow | |
246 | * when translating a dict index into a local index */ | |
9f95a23c TL |
247 | assert(prefixStartIndex >= (U32)(dictEnd - dictBase)); |
248 | ||
249 | /* init */ | |
f67539c2 | 250 | DEBUGLOG(5, "ZSTD_compressBlock_fast_dictMatchState_generic"); |
9f95a23c TL |
251 | ip += (dictAndPrefixLength == 0); |
252 | /* dictMatchState repCode checks don't currently handle repCode == 0 | |
253 | * disabling. */ | |
254 | assert(offset_1 <= dictAndPrefixLength); | |
255 | assert(offset_2 <= dictAndPrefixLength); | |
256 | ||
11fdf7f2 TL |
257 | /* Main Search Loop */ |
258 | while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ | |
259 | size_t mLength; | |
9f95a23c | 260 | size_t const h = ZSTD_hashPtr(ip, hlog, mls); |
11fdf7f2 TL |
261 | U32 const current = (U32)(ip-base); |
262 | U32 const matchIndex = hashTable[h]; | |
263 | const BYTE* match = base + matchIndex; | |
9f95a23c TL |
264 | const U32 repIndex = current + 1 - offset_1; |
265 | const BYTE* repMatch = (repIndex < prefixStartIndex) ? | |
266 | dictBase + (repIndex - dictIndexDelta) : | |
267 | base + repIndex; | |
11fdf7f2 TL |
268 | hashTable[h] = current; /* update hash table */ |
269 | ||
9f95a23c TL |
270 | if ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */ |
271 | && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { | |
272 | const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; | |
273 | mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; | |
11fdf7f2 | 274 | ip++; |
f67539c2 | 275 | ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH); |
9f95a23c TL |
276 | } else if ( (matchIndex <= prefixStartIndex) ) { |
277 | size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls); | |
278 | U32 const dictMatchIndex = dictHashTable[dictHash]; | |
279 | const BYTE* dictMatch = dictBase + dictMatchIndex; | |
280 | if (dictMatchIndex <= dictStartIndex || | |
281 | MEM_read32(dictMatch) != MEM_read32(ip)) { | |
282 | assert(stepSize >= 1); | |
283 | ip += ((ip-anchor) >> kSearchStrength) + stepSize; | |
11fdf7f2 | 284 | continue; |
9f95a23c TL |
285 | } else { |
286 | /* found a dict match */ | |
287 | U32 const offset = (U32)(current-dictMatchIndex-dictIndexDelta); | |
288 | mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, prefixStart) + 4; | |
289 | while (((ip>anchor) & (dictMatch>dictStart)) | |
290 | && (ip[-1] == dictMatch[-1])) { | |
291 | ip--; dictMatch--; mLength++; | |
292 | } /* catch up */ | |
293 | offset_2 = offset_1; | |
294 | offset_1 = offset; | |
f67539c2 | 295 | ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); |
11fdf7f2 | 296 | } |
9f95a23c TL |
297 | } else if (MEM_read32(match) != MEM_read32(ip)) { |
298 | /* it's not a match, and we're not going to check the dictionary */ | |
299 | assert(stepSize >= 1); | |
300 | ip += ((ip-anchor) >> kSearchStrength) + stepSize; | |
301 | continue; | |
302 | } else { | |
303 | /* found a regular match */ | |
304 | U32 const offset = (U32)(ip-match); | |
11fdf7f2 | 305 | mLength = ZSTD_count(ip+4, match+4, iend) + 4; |
9f95a23c TL |
306 | while (((ip>anchor) & (match>prefixStart)) |
307 | && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ | |
11fdf7f2 TL |
308 | offset_2 = offset_1; |
309 | offset_1 = offset; | |
f67539c2 | 310 | ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); |
11fdf7f2 TL |
311 | } |
312 | ||
313 | /* match found */ | |
314 | ip += mLength; | |
315 | anchor = ip; | |
316 | ||
317 | if (ip <= ilimit) { | |
318 | /* Fill Table */ | |
9f95a23c TL |
319 | assert(base+current+2 > istart); /* check base overflow */ |
320 | hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2; /* here because current+2 could be > iend-8 */ | |
321 | hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base); | |
322 | ||
11fdf7f2 | 323 | /* check immediate repcode */ |
9f95a23c TL |
324 | while (ip <= ilimit) { |
325 | U32 const current2 = (U32)(ip-base); | |
326 | U32 const repIndex2 = current2 - offset_2; | |
327 | const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? | |
328 | dictBase - dictIndexDelta + repIndex2 : | |
329 | base + repIndex2; | |
330 | if ( ((U32)((prefixStartIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */) | |
331 | && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { | |
332 | const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; | |
333 | size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; | |
334 | U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ | |
f67539c2 | 335 | ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH); |
9f95a23c TL |
336 | hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2; |
337 | ip += repLength2; | |
338 | anchor = ip; | |
339 | continue; | |
340 | } | |
341 | break; | |
342 | } | |
343 | } | |
344 | } | |
11fdf7f2 TL |
345 | |
346 | /* save reps for next block */ | |
9f95a23c TL |
347 | rep[0] = offset_1 ? offset_1 : offsetSaved; |
348 | rep[1] = offset_2 ? offset_2 : offsetSaved; | |
11fdf7f2 TL |
349 | |
350 | /* Return the last literals size */ | |
f67539c2 | 351 | return (size_t)(iend - anchor); |
11fdf7f2 TL |
352 | } |
353 | ||
9f95a23c TL |
354 | size_t ZSTD_compressBlock_fast_dictMatchState( |
355 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |
356 | void const* src, size_t srcSize) | |
11fdf7f2 | 357 | { |
f67539c2 | 358 | U32 const mls = ms->cParams.minMatch; |
9f95a23c | 359 | assert(ms->dictMatchState != NULL); |
11fdf7f2 TL |
360 | switch(mls) |
361 | { | |
362 | default: /* includes case 3 */ | |
363 | case 4 : | |
9f95a23c | 364 | return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 4); |
11fdf7f2 | 365 | case 5 : |
9f95a23c | 366 | return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 5); |
11fdf7f2 | 367 | case 6 : |
9f95a23c | 368 | return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 6); |
11fdf7f2 | 369 | case 7 : |
9f95a23c | 370 | return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 7); |
11fdf7f2 TL |
371 | } |
372 | } | |
373 | ||
374 | ||
9f95a23c TL |
375 | static size_t ZSTD_compressBlock_fast_extDict_generic( |
376 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |
377 | void const* src, size_t srcSize, U32 const mls) | |
11fdf7f2 | 378 | { |
9f95a23c TL |
379 | const ZSTD_compressionParameters* const cParams = &ms->cParams; |
380 | U32* const hashTable = ms->hashTable; | |
381 | U32 const hlog = cParams->hashLog; | |
382 | /* support stepSize of 0 */ | |
383 | U32 const stepSize = cParams->targetLength + !(cParams->targetLength); | |
384 | const BYTE* const base = ms->window.base; | |
385 | const BYTE* const dictBase = ms->window.dictBase; | |
11fdf7f2 TL |
386 | const BYTE* const istart = (const BYTE*)src; |
387 | const BYTE* ip = istart; | |
388 | const BYTE* anchor = istart; | |
f67539c2 TL |
389 | const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); |
390 | const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog); | |
391 | const U32 dictStartIndex = lowLimit; | |
9f95a23c | 392 | const BYTE* const dictStart = dictBase + dictStartIndex; |
f67539c2 TL |
393 | const U32 dictLimit = ms->window.dictLimit; |
394 | const U32 prefixStartIndex = dictLimit < lowLimit ? lowLimit : dictLimit; | |
9f95a23c TL |
395 | const BYTE* const prefixStart = base + prefixStartIndex; |
396 | const BYTE* const dictEnd = dictBase + prefixStartIndex; | |
11fdf7f2 TL |
397 | const BYTE* const iend = istart + srcSize; |
398 | const BYTE* const ilimit = iend - 8; | |
9f95a23c | 399 | U32 offset_1=rep[0], offset_2=rep[1]; |
11fdf7f2 | 400 | |
f67539c2 TL |
401 | DEBUGLOG(5, "ZSTD_compressBlock_fast_extDict_generic (offset_1=%u)", offset_1); |
402 | ||
403 | /* switch to "regular" variant if extDict is invalidated due to maxDistance */ | |
404 | if (prefixStartIndex == dictStartIndex) | |
405 | return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, mls); | |
406 | ||
11fdf7f2 TL |
407 | /* Search Loop */ |
408 | while (ip < ilimit) { /* < instead of <=, because (ip+1) */ | |
9f95a23c TL |
409 | const size_t h = ZSTD_hashPtr(ip, hlog, mls); |
410 | const U32 matchIndex = hashTable[h]; | |
411 | const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base; | |
412 | const BYTE* match = matchBase + matchIndex; | |
413 | const U32 current = (U32)(ip-base); | |
414 | const U32 repIndex = current + 1 - offset_1; | |
415 | const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base; | |
416 | const BYTE* const repMatch = repBase + repIndex; | |
11fdf7f2 | 417 | hashTable[h] = current; /* update hash table */ |
f67539c2 | 418 | DEBUGLOG(7, "offset_1 = %u , current = %u", offset_1, current); |
9f95a23c | 419 | assert(offset_1 <= current +1); /* check repIndex */ |
11fdf7f2 | 420 | |
9f95a23c | 421 | if ( (((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > dictStartIndex)) |
11fdf7f2 | 422 | && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { |
f67539c2 TL |
423 | const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; |
424 | size_t const rLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4; | |
11fdf7f2 | 425 | ip++; |
f67539c2 TL |
426 | ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, rLength-MINMATCH); |
427 | ip += rLength; | |
428 | anchor = ip; | |
11fdf7f2 | 429 | } else { |
9f95a23c | 430 | if ( (matchIndex < dictStartIndex) || |
11fdf7f2 | 431 | (MEM_read32(match) != MEM_read32(ip)) ) { |
9f95a23c TL |
432 | assert(stepSize >= 1); |
433 | ip += ((ip-anchor) >> kSearchStrength) + stepSize; | |
11fdf7f2 TL |
434 | continue; |
435 | } | |
f67539c2 TL |
436 | { const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend; |
437 | const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart; | |
438 | U32 const offset = current - matchIndex; | |
439 | size_t mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4; | |
11fdf7f2 | 440 | while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ |
f67539c2 TL |
441 | offset_2 = offset_1; offset_1 = offset; /* update offset history */ |
442 | ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); | |
443 | ip += mLength; | |
444 | anchor = ip; | |
11fdf7f2 TL |
445 | } } |
446 | ||
11fdf7f2 TL |
447 | if (ip <= ilimit) { |
448 | /* Fill Table */ | |
9f95a23c TL |
449 | hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2; |
450 | hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base); | |
11fdf7f2 TL |
451 | /* check immediate repcode */ |
452 | while (ip <= ilimit) { | |
453 | U32 const current2 = (U32)(ip-base); | |
454 | U32 const repIndex2 = current2 - offset_2; | |
f67539c2 | 455 | const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2; |
9f95a23c | 456 | if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (repIndex2 > dictStartIndex)) /* intentional overflow */ |
11fdf7f2 | 457 | && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { |
9f95a23c TL |
458 | const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; |
459 | size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; | |
f67539c2 TL |
460 | { U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; } /* swap offset_2 <=> offset_1 */ |
461 | ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, 0 /*offcode*/, repLength2-MINMATCH); | |
9f95a23c | 462 | hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2; |
11fdf7f2 TL |
463 | ip += repLength2; |
464 | anchor = ip; | |
465 | continue; | |
466 | } | |
467 | break; | |
468 | } } } | |
469 | ||
470 | /* save reps for next block */ | |
9f95a23c TL |
471 | rep[0] = offset_1; |
472 | rep[1] = offset_2; | |
11fdf7f2 TL |
473 | |
474 | /* Return the last literals size */ | |
f67539c2 | 475 | return (size_t)(iend - anchor); |
11fdf7f2 TL |
476 | } |
477 | ||
478 | ||
9f95a23c TL |
479 | size_t ZSTD_compressBlock_fast_extDict( |
480 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |
481 | void const* src, size_t srcSize) | |
11fdf7f2 | 482 | { |
f67539c2 | 483 | U32 const mls = ms->cParams.minMatch; |
11fdf7f2 TL |
484 | switch(mls) |
485 | { | |
486 | default: /* includes case 3 */ | |
487 | case 4 : | |
9f95a23c | 488 | return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 4); |
11fdf7f2 | 489 | case 5 : |
9f95a23c | 490 | return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 5); |
11fdf7f2 | 491 | case 6 : |
9f95a23c | 492 | return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 6); |
11fdf7f2 | 493 | case 7 : |
9f95a23c | 494 | return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 7); |
11fdf7f2 TL |
495 | } |
496 | } |