2 * Copyright (c) 2016-present, Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * in the COPYING file in the root directory of this source tree).
8 * You may select, at your option, one of the above-listed licenses.
11 #include "zstd_compress_internal.h"
16 #define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */
17 #define ZSTD_FREQ_DIV 4 /* log factor when using previous stats to init next stats */
18 #define ZSTD_MAX_PRICE (1<<30)
20 #define ZSTD_PREDEF_THRESHOLD 1024 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */
23 /*-*************************************
24 * Price functions for optimal parser
25 ***************************************/
27 #if 0 /* approximation at bit level */
28 # define BITCOST_ACCURACY 0
29 # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
30 # define WEIGHT(stat) ((void)opt, ZSTD_bitWeight(stat))
31 #elif 0 /* fractional bit accuracy */
32 # define BITCOST_ACCURACY 8
33 # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
34 # define WEIGHT(stat,opt) ((void)opt, ZSTD_fracWeight(stat))
35 #else /* opt==approx, ultra==accurate */
36 # define BITCOST_ACCURACY 8
37 # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
38 # define WEIGHT(stat,opt) (opt ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat))
41 MEM_STATIC U32
ZSTD_bitWeight(U32 stat
)
43 return (ZSTD_highbit32(stat
+1) * BITCOST_MULTIPLIER
);
46 MEM_STATIC U32
ZSTD_fracWeight(U32 rawStat
)
48 U32
const stat
= rawStat
+ 1;
49 U32
const hb
= ZSTD_highbit32(stat
);
50 U32
const BWeight
= hb
* BITCOST_MULTIPLIER
;
51 U32
const FWeight
= (stat
<< BITCOST_ACCURACY
) >> hb
;
52 U32
const weight
= BWeight
+ FWeight
;
53 assert(hb
+ BITCOST_ACCURACY
< 31);
58 /* debugging function,
59 * @return price in bytes as fractional value
60 * for debug messages only */
61 MEM_STATIC
double ZSTD_fCost(U32 price
)
63 return (double)price
/ (BITCOST_MULTIPLIER
*8);
67 static int ZSTD_compressedLiterals(optState_t
const* const optPtr
)
69 return optPtr
->literalCompressionMode
!= ZSTD_lcm_uncompressed
;
72 static void ZSTD_setBasePrices(optState_t
* optPtr
, int optLevel
)
74 if (ZSTD_compressedLiterals(optPtr
))
75 optPtr
->litSumBasePrice
= WEIGHT(optPtr
->litSum
, optLevel
);
76 optPtr
->litLengthSumBasePrice
= WEIGHT(optPtr
->litLengthSum
, optLevel
);
77 optPtr
->matchLengthSumBasePrice
= WEIGHT(optPtr
->matchLengthSum
, optLevel
);
78 optPtr
->offCodeSumBasePrice
= WEIGHT(optPtr
->offCodeSum
, optLevel
);
82 /* ZSTD_downscaleStat() :
83 * reduce all elements in table by a factor 2^(ZSTD_FREQ_DIV+malus)
84 * return the resulting sum of elements */
85 static U32
ZSTD_downscaleStat(unsigned* table
, U32 lastEltIndex
, int malus
)
88 DEBUGLOG(5, "ZSTD_downscaleStat (nbElts=%u)", (unsigned)lastEltIndex
+1);
89 assert(ZSTD_FREQ_DIV
+malus
> 0 && ZSTD_FREQ_DIV
+malus
< 31);
90 for (s
=0; s
<lastEltIndex
+1; s
++) {
91 table
[s
] = 1 + (table
[s
] >> (ZSTD_FREQ_DIV
+malus
));
97 /* ZSTD_rescaleFreqs() :
98 * if first block (detected by optPtr->litLengthSum == 0) : init statistics
99 * take hints from dictionary if there is one
100 * or init from zero, using src for literals stats, or flat 1 for match symbols
101 * otherwise downscale existing stats, to be used as seed for next block.
104 ZSTD_rescaleFreqs(optState_t
* const optPtr
,
105 const BYTE
* const src
, size_t const srcSize
,
108 int const compressedLiterals
= ZSTD_compressedLiterals(optPtr
);
109 DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize
);
110 optPtr
->priceType
= zop_dynamic
;
112 if (optPtr
->litLengthSum
== 0) { /* first block : init */
113 if (srcSize
<= ZSTD_PREDEF_THRESHOLD
) { /* heuristic */
114 DEBUGLOG(5, "(srcSize <= ZSTD_PREDEF_THRESHOLD) => zop_predef");
115 optPtr
->priceType
= zop_predef
;
118 assert(optPtr
->symbolCosts
!= NULL
);
119 if (optPtr
->symbolCosts
->huf
.repeatMode
== HUF_repeat_valid
) {
120 /* huffman table presumed generated by dictionary */
121 optPtr
->priceType
= zop_dynamic
;
123 if (compressedLiterals
) {
125 assert(optPtr
->litFreq
!= NULL
);
127 for (lit
=0; lit
<=MaxLit
; lit
++) {
128 U32
const scaleLog
= 11; /* scale to 2K */
129 U32
const bitCost
= HUF_getNbBits(optPtr
->symbolCosts
->huf
.CTable
, lit
);
130 assert(bitCost
<= scaleLog
);
131 optPtr
->litFreq
[lit
] = bitCost
? 1 << (scaleLog
-bitCost
) : 1 /*minimum to calculate cost*/;
132 optPtr
->litSum
+= optPtr
->litFreq
[lit
];
136 FSE_CState_t llstate
;
137 FSE_initCState(&llstate
, optPtr
->symbolCosts
->fse
.litlengthCTable
);
138 optPtr
->litLengthSum
= 0;
139 for (ll
=0; ll
<=MaxLL
; ll
++) {
140 U32
const scaleLog
= 10; /* scale to 1K */
141 U32
const bitCost
= FSE_getMaxNbBits(llstate
.symbolTT
, ll
);
142 assert(bitCost
< scaleLog
);
143 optPtr
->litLengthFreq
[ll
] = bitCost
? 1 << (scaleLog
-bitCost
) : 1 /*minimum to calculate cost*/;
144 optPtr
->litLengthSum
+= optPtr
->litLengthFreq
[ll
];
148 FSE_CState_t mlstate
;
149 FSE_initCState(&mlstate
, optPtr
->symbolCosts
->fse
.matchlengthCTable
);
150 optPtr
->matchLengthSum
= 0;
151 for (ml
=0; ml
<=MaxML
; ml
++) {
152 U32
const scaleLog
= 10;
153 U32
const bitCost
= FSE_getMaxNbBits(mlstate
.symbolTT
, ml
);
154 assert(bitCost
< scaleLog
);
155 optPtr
->matchLengthFreq
[ml
] = bitCost
? 1 << (scaleLog
-bitCost
) : 1 /*minimum to calculate cost*/;
156 optPtr
->matchLengthSum
+= optPtr
->matchLengthFreq
[ml
];
160 FSE_CState_t ofstate
;
161 FSE_initCState(&ofstate
, optPtr
->symbolCosts
->fse
.offcodeCTable
);
162 optPtr
->offCodeSum
= 0;
163 for (of
=0; of
<=MaxOff
; of
++) {
164 U32
const scaleLog
= 10;
165 U32
const bitCost
= FSE_getMaxNbBits(ofstate
.symbolTT
, of
);
166 assert(bitCost
< scaleLog
);
167 optPtr
->offCodeFreq
[of
] = bitCost
? 1 << (scaleLog
-bitCost
) : 1 /*minimum to calculate cost*/;
168 optPtr
->offCodeSum
+= optPtr
->offCodeFreq
[of
];
171 } else { /* not a dictionary */
173 assert(optPtr
->litFreq
!= NULL
);
174 if (compressedLiterals
) {
175 unsigned lit
= MaxLit
;
176 HIST_count_simple(optPtr
->litFreq
, &lit
, src
, srcSize
); /* use raw first block to init statistics */
177 optPtr
->litSum
= ZSTD_downscaleStat(optPtr
->litFreq
, MaxLit
, 1);
181 for (ll
=0; ll
<=MaxLL
; ll
++)
182 optPtr
->litLengthFreq
[ll
] = 1;
184 optPtr
->litLengthSum
= MaxLL
+1;
187 for (ml
=0; ml
<=MaxML
; ml
++)
188 optPtr
->matchLengthFreq
[ml
] = 1;
190 optPtr
->matchLengthSum
= MaxML
+1;
193 for (of
=0; of
<=MaxOff
; of
++)
194 optPtr
->offCodeFreq
[of
] = 1;
196 optPtr
->offCodeSum
= MaxOff
+1;
200 } else { /* new block : re-use previous statistics, scaled down */
202 if (compressedLiterals
)
203 optPtr
->litSum
= ZSTD_downscaleStat(optPtr
->litFreq
, MaxLit
, 1);
204 optPtr
->litLengthSum
= ZSTD_downscaleStat(optPtr
->litLengthFreq
, MaxLL
, 0);
205 optPtr
->matchLengthSum
= ZSTD_downscaleStat(optPtr
->matchLengthFreq
, MaxML
, 0);
206 optPtr
->offCodeSum
= ZSTD_downscaleStat(optPtr
->offCodeFreq
, MaxOff
, 0);
209 ZSTD_setBasePrices(optPtr
, optLevel
);
212 /* ZSTD_rawLiteralsCost() :
213 * price of literals (only) in specified segment (which length can be 0).
214 * does not include price of literalLength symbol */
215 static U32
ZSTD_rawLiteralsCost(const BYTE
* const literals
, U32
const litLength
,
216 const optState_t
* const optPtr
,
219 if (litLength
== 0) return 0;
221 if (!ZSTD_compressedLiterals(optPtr
))
222 return (litLength
<< 3) * BITCOST_MULTIPLIER
; /* Uncompressed - 8 bytes per literal. */
224 if (optPtr
->priceType
== zop_predef
)
225 return (litLength
*6) * BITCOST_MULTIPLIER
; /* 6 bit per literal - no statistic used */
227 /* dynamic statistics */
228 { U32 price
= litLength
* optPtr
->litSumBasePrice
;
230 for (u
=0; u
< litLength
; u
++) {
231 assert(WEIGHT(optPtr
->litFreq
[literals
[u
]], optLevel
) <= optPtr
->litSumBasePrice
); /* literal cost should never be negative */
232 price
-= WEIGHT(optPtr
->litFreq
[literals
[u
]], optLevel
);
238 /* ZSTD_litLengthPrice() :
239 * cost of literalLength symbol */
240 static U32
ZSTD_litLengthPrice(U32
const litLength
, const optState_t
* const optPtr
, int optLevel
)
242 if (optPtr
->priceType
== zop_predef
) return WEIGHT(litLength
, optLevel
);
244 /* dynamic statistics */
245 { U32
const llCode
= ZSTD_LLcode(litLength
);
246 return (LL_bits
[llCode
] * BITCOST_MULTIPLIER
)
247 + optPtr
->litLengthSumBasePrice
248 - WEIGHT(optPtr
->litLengthFreq
[llCode
], optLevel
);
252 /* ZSTD_litLengthContribution() :
253 * @return ( cost(litlength) - cost(0) )
254 * this value can then be added to rawLiteralsCost()
255 * to provide a cost which is directly comparable to a match ending at same position */
256 static int ZSTD_litLengthContribution(U32
const litLength
, const optState_t
* const optPtr
, int optLevel
)
258 if (optPtr
->priceType
>= zop_predef
) return WEIGHT(litLength
, optLevel
);
260 /* dynamic statistics */
261 { U32
const llCode
= ZSTD_LLcode(litLength
);
262 int const contribution
= (LL_bits
[llCode
] * BITCOST_MULTIPLIER
)
263 + WEIGHT(optPtr
->litLengthFreq
[0], optLevel
) /* note: log2litLengthSum cancel out */
264 - WEIGHT(optPtr
->litLengthFreq
[llCode
], optLevel
);
268 return MAX(0, contribution
); /* sometimes better, sometimes not ... */
273 /* ZSTD_literalsContribution() :
274 * creates a fake cost for the literals part of a sequence
275 * which can be compared to the ending cost of a match
276 * should a new match start at this position */
277 static int ZSTD_literalsContribution(const BYTE
* const literals
, U32
const litLength
,
278 const optState_t
* const optPtr
,
281 int const contribution
= ZSTD_rawLiteralsCost(literals
, litLength
, optPtr
, optLevel
)
282 + ZSTD_litLengthContribution(litLength
, optPtr
, optLevel
);
286 /* ZSTD_getMatchPrice() :
287 * Provides the cost of the match part (offset + matchLength) of a sequence
288 * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence.
289 * optLevel: when <2, favors small offset for decompression speed (improved cache efficiency) */
290 FORCE_INLINE_TEMPLATE U32
291 ZSTD_getMatchPrice(U32
const offset
,
292 U32
const matchLength
,
293 const optState_t
* const optPtr
,
297 U32
const offCode
= ZSTD_highbit32(offset
+1);
298 U32
const mlBase
= matchLength
- MINMATCH
;
299 assert(matchLength
>= MINMATCH
);
301 if (optPtr
->priceType
== zop_predef
) /* fixed scheme, do not use statistics */
302 return WEIGHT(mlBase
, optLevel
) + ((16 + offCode
) * BITCOST_MULTIPLIER
);
304 /* dynamic statistics */
305 price
= (offCode
* BITCOST_MULTIPLIER
) + (optPtr
->offCodeSumBasePrice
- WEIGHT(optPtr
->offCodeFreq
[offCode
], optLevel
));
306 if ((optLevel
<2) /*static*/ && offCode
>= 20)
307 price
+= (offCode
-19)*2 * BITCOST_MULTIPLIER
; /* handicap for long distance offsets, favor decompression speed */
310 { U32
const mlCode
= ZSTD_MLcode(mlBase
);
311 price
+= (ML_bits
[mlCode
] * BITCOST_MULTIPLIER
) + (optPtr
->matchLengthSumBasePrice
- WEIGHT(optPtr
->matchLengthFreq
[mlCode
], optLevel
));
314 price
+= BITCOST_MULTIPLIER
/ 5; /* heuristic : make matches a bit more costly to favor less sequences -> faster decompression speed */
316 DEBUGLOG(8, "ZSTD_getMatchPrice(ml:%u) = %u", matchLength
, price
);
320 /* ZSTD_updateStats() :
321 * assumption : literals + litLengtn <= iend */
322 static void ZSTD_updateStats(optState_t
* const optPtr
,
323 U32 litLength
, const BYTE
* literals
,
324 U32 offsetCode
, U32 matchLength
)
327 if (ZSTD_compressedLiterals(optPtr
)) {
329 for (u
=0; u
< litLength
; u
++)
330 optPtr
->litFreq
[literals
[u
]] += ZSTD_LITFREQ_ADD
;
331 optPtr
->litSum
+= litLength
*ZSTD_LITFREQ_ADD
;
335 { U32
const llCode
= ZSTD_LLcode(litLength
);
336 optPtr
->litLengthFreq
[llCode
]++;
337 optPtr
->litLengthSum
++;
340 /* match offset code (0-2=>repCode; 3+=>offset+2) */
341 { U32
const offCode
= ZSTD_highbit32(offsetCode
+1);
342 assert(offCode
<= MaxOff
);
343 optPtr
->offCodeFreq
[offCode
]++;
344 optPtr
->offCodeSum
++;
348 { U32
const mlBase
= matchLength
- MINMATCH
;
349 U32
const mlCode
= ZSTD_MLcode(mlBase
);
350 optPtr
->matchLengthFreq
[mlCode
]++;
351 optPtr
->matchLengthSum
++;
356 /* ZSTD_readMINMATCH() :
357 * function safe only for comparisons
358 * assumption : memPtr must be at least 4 bytes before end of buffer */
359 MEM_STATIC U32
ZSTD_readMINMATCH(const void* memPtr
, U32 length
)
364 case 4 : return MEM_read32(memPtr
);
365 case 3 : if (MEM_isLittleEndian())
366 return MEM_read32(memPtr
)<<8;
368 return MEM_read32(memPtr
)>>8;
373 /* Update hashTable3 up to ip (excluded)
374 Assumption : always within prefix (i.e. not within extDict) */
375 static U32
ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t
* ms
, const BYTE
* const ip
)
377 U32
* const hashTable3
= ms
->hashTable3
;
378 U32
const hashLog3
= ms
->hashLog3
;
379 const BYTE
* const base
= ms
->window
.base
;
380 U32 idx
= ms
->nextToUpdate3
;
381 U32
const target
= ms
->nextToUpdate3
= (U32
)(ip
- base
);
382 size_t const hash3
= ZSTD_hash3Ptr(ip
, hashLog3
);
383 assert(hashLog3
> 0);
385 while(idx
< target
) {
386 hashTable3
[ZSTD_hash3Ptr(base
+idx
, hashLog3
)] = idx
;
390 return hashTable3
[hash3
];
394 /*-*************************************
396 ***************************************/
397 /** ZSTD_insertBt1() : add one or multiple positions to tree.
398 * ip : assumed <= iend-8 .
399 * @return : nb of positions added */
400 static U32
ZSTD_insertBt1(
401 ZSTD_matchState_t
* ms
,
402 const BYTE
* const ip
, const BYTE
* const iend
,
403 U32
const mls
, const int extDict
)
405 const ZSTD_compressionParameters
* const cParams
= &ms
->cParams
;
406 U32
* const hashTable
= ms
->hashTable
;
407 U32
const hashLog
= cParams
->hashLog
;
408 size_t const h
= ZSTD_hashPtr(ip
, hashLog
, mls
);
409 U32
* const bt
= ms
->chainTable
;
410 U32
const btLog
= cParams
->chainLog
- 1;
411 U32
const btMask
= (1 << btLog
) - 1;
412 U32 matchIndex
= hashTable
[h
];
413 size_t commonLengthSmaller
=0, commonLengthLarger
=0;
414 const BYTE
* const base
= ms
->window
.base
;
415 const BYTE
* const dictBase
= ms
->window
.dictBase
;
416 const U32 dictLimit
= ms
->window
.dictLimit
;
417 const BYTE
* const dictEnd
= dictBase
+ dictLimit
;
418 const BYTE
* const prefixStart
= base
+ dictLimit
;
420 const U32 current
= (U32
)(ip
-base
);
421 const U32 btLow
= btMask
>= current
? 0 : current
- btMask
;
422 U32
* smallerPtr
= bt
+ 2*(current
&btMask
);
423 U32
* largerPtr
= smallerPtr
+ 1;
424 U32 dummy32
; /* to be nullified at the end */
425 U32
const windowLow
= ms
->window
.lowLimit
;
426 U32 matchEndIdx
= current
+8+1;
427 size_t bestLength
= 8;
428 U32 nbCompares
= 1U << cParams
->searchLog
;
429 #ifdef ZSTD_C_PREDICT
430 U32 predictedSmall
= *(bt
+ 2*((current
-1)&btMask
) + 0);
431 U32 predictedLarge
= *(bt
+ 2*((current
-1)&btMask
) + 1);
432 predictedSmall
+= (predictedSmall
>0);
433 predictedLarge
+= (predictedLarge
>0);
434 #endif /* ZSTD_C_PREDICT */
436 DEBUGLOG(8, "ZSTD_insertBt1 (%u)", current
);
438 assert(ip
<= iend
-8); /* required for h calculation */
439 hashTable
[h
] = current
; /* Update Hash Table */
441 assert(windowLow
> 0);
442 while (nbCompares
-- && (matchIndex
>= windowLow
)) {
443 U32
* const nextPtr
= bt
+ 2*(matchIndex
& btMask
);
444 size_t matchLength
= MIN(commonLengthSmaller
, commonLengthLarger
); /* guaranteed minimum nb of common bytes */
445 assert(matchIndex
< current
);
447 #ifdef ZSTD_C_PREDICT /* note : can create issues when hlog small <= 11 */
448 const U32
* predictPtr
= bt
+ 2*((matchIndex
-1) & btMask
); /* written this way, as bt is a roll buffer */
449 if (matchIndex
== predictedSmall
) {
450 /* no need to check length, result known */
451 *smallerPtr
= matchIndex
;
452 if (matchIndex
<= btLow
) { smallerPtr
=&dummy32
; break; } /* beyond tree size, stop the search */
453 smallerPtr
= nextPtr
+1; /* new "smaller" => larger of match */
454 matchIndex
= nextPtr
[1]; /* new matchIndex larger than previous (closer to current) */
455 predictedSmall
= predictPtr
[1] + (predictPtr
[1]>0);
458 if (matchIndex
== predictedLarge
) {
459 *largerPtr
= matchIndex
;
460 if (matchIndex
<= btLow
) { largerPtr
=&dummy32
; break; } /* beyond tree size, stop the search */
462 matchIndex
= nextPtr
[0];
463 predictedLarge
= predictPtr
[0] + (predictPtr
[0]>0);
468 if (!extDict
|| (matchIndex
+matchLength
>= dictLimit
)) {
469 assert(matchIndex
+matchLength
>= dictLimit
); /* might be wrong if actually extDict */
470 match
= base
+ matchIndex
;
471 matchLength
+= ZSTD_count(ip
+matchLength
, match
+matchLength
, iend
);
473 match
= dictBase
+ matchIndex
;
474 matchLength
+= ZSTD_count_2segments(ip
+matchLength
, match
+matchLength
, iend
, dictEnd
, prefixStart
);
475 if (matchIndex
+matchLength
>= dictLimit
)
476 match
= base
+ matchIndex
; /* to prepare for next usage of match[matchLength] */
479 if (matchLength
> bestLength
) {
480 bestLength
= matchLength
;
481 if (matchLength
> matchEndIdx
- matchIndex
)
482 matchEndIdx
= matchIndex
+ (U32
)matchLength
;
485 if (ip
+matchLength
== iend
) { /* equal : no way to know if inf or sup */
486 break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
489 if (match
[matchLength
] < ip
[matchLength
]) { /* necessarily within buffer */
490 /* match is smaller than current */
491 *smallerPtr
= matchIndex
; /* update smaller idx */
492 commonLengthSmaller
= matchLength
; /* all smaller will now have at least this guaranteed common length */
493 if (matchIndex
<= btLow
) { smallerPtr
=&dummy32
; break; } /* beyond tree size, stop searching */
494 smallerPtr
= nextPtr
+1; /* new "candidate" => larger than match, which was smaller than target */
495 matchIndex
= nextPtr
[1]; /* new matchIndex, larger than previous and closer to current */
497 /* match is larger than current */
498 *largerPtr
= matchIndex
;
499 commonLengthLarger
= matchLength
;
500 if (matchIndex
<= btLow
) { largerPtr
=&dummy32
; break; } /* beyond tree size, stop searching */
502 matchIndex
= nextPtr
[0];
505 *smallerPtr
= *largerPtr
= 0;
506 if (bestLength
> 384) return MIN(192, (U32
)(bestLength
- 384)); /* speed optimization */
507 assert(matchEndIdx
> current
+ 8);
508 return matchEndIdx
- (current
+ 8);
511 FORCE_INLINE_TEMPLATE
512 void ZSTD_updateTree_internal(
513 ZSTD_matchState_t
* ms
,
514 const BYTE
* const ip
, const BYTE
* const iend
,
515 const U32 mls
, const ZSTD_dictMode_e dictMode
)
517 const BYTE
* const base
= ms
->window
.base
;
518 U32
const target
= (U32
)(ip
- base
);
519 U32 idx
= ms
->nextToUpdate
;
520 DEBUGLOG(6, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)",
521 idx
, target
, dictMode
);
524 idx
+= ZSTD_insertBt1(ms
, base
+idx
, iend
, mls
, dictMode
== ZSTD_extDict
);
525 ms
->nextToUpdate
= target
;
528 void ZSTD_updateTree(ZSTD_matchState_t
* ms
, const BYTE
* ip
, const BYTE
* iend
) {
529 ZSTD_updateTree_internal(ms
, ip
, iend
, ms
->cParams
.minMatch
, ZSTD_noDict
);
532 FORCE_INLINE_TEMPLATE
533 U32
ZSTD_insertBtAndGetAllMatches (
534 ZSTD_matchState_t
* ms
,
535 const BYTE
* const ip
, const BYTE
* const iLimit
, const ZSTD_dictMode_e dictMode
,
536 U32 rep
[ZSTD_REP_NUM
],
537 U32
const ll0
, /* tells if associated literal length is 0 or not. This value must be 0 or 1 */
538 ZSTD_match_t
* matches
,
539 const U32 lengthToBeat
,
540 U32
const mls
/* template */)
542 const ZSTD_compressionParameters
* const cParams
= &ms
->cParams
;
543 U32
const sufficient_len
= MIN(cParams
->targetLength
, ZSTD_OPT_NUM
-1);
544 const BYTE
* const base
= ms
->window
.base
;
545 U32
const current
= (U32
)(ip
-base
);
546 U32
const hashLog
= cParams
->hashLog
;
547 U32
const minMatch
= (mls
==3) ? 3 : 4;
548 U32
* const hashTable
= ms
->hashTable
;
549 size_t const h
= ZSTD_hashPtr(ip
, hashLog
, mls
);
550 U32 matchIndex
= hashTable
[h
];
551 U32
* const bt
= ms
->chainTable
;
552 U32
const btLog
= cParams
->chainLog
- 1;
553 U32
const btMask
= (1U << btLog
) - 1;
554 size_t commonLengthSmaller
=0, commonLengthLarger
=0;
555 const BYTE
* const dictBase
= ms
->window
.dictBase
;
556 U32
const dictLimit
= ms
->window
.dictLimit
;
557 const BYTE
* const dictEnd
= dictBase
+ dictLimit
;
558 const BYTE
* const prefixStart
= base
+ dictLimit
;
559 U32
const btLow
= btMask
>= current
? 0 : current
- btMask
;
560 U32
const windowLow
= ms
->window
.lowLimit
;
561 U32
const matchLow
= windowLow
? windowLow
: 1;
562 U32
* smallerPtr
= bt
+ 2*(current
&btMask
);
563 U32
* largerPtr
= bt
+ 2*(current
&btMask
) + 1;
564 U32 matchEndIdx
= current
+8+1; /* farthest referenced position of any match => detects repetitive patterns */
565 U32 dummy32
; /* to be nullified at the end */
567 U32 nbCompares
= 1U << cParams
->searchLog
;
569 const ZSTD_matchState_t
* dms
= dictMode
== ZSTD_dictMatchState
? ms
->dictMatchState
: NULL
;
570 const ZSTD_compressionParameters
* const dmsCParams
=
571 dictMode
== ZSTD_dictMatchState
? &dms
->cParams
: NULL
;
572 const BYTE
* const dmsBase
= dictMode
== ZSTD_dictMatchState
? dms
->window
.base
: NULL
;
573 const BYTE
* const dmsEnd
= dictMode
== ZSTD_dictMatchState
? dms
->window
.nextSrc
: NULL
;
574 U32
const dmsHighLimit
= dictMode
== ZSTD_dictMatchState
? (U32
)(dmsEnd
- dmsBase
) : 0;
575 U32
const dmsLowLimit
= dictMode
== ZSTD_dictMatchState
? dms
->window
.lowLimit
: 0;
576 U32
const dmsIndexDelta
= dictMode
== ZSTD_dictMatchState
? windowLow
- dmsHighLimit
: 0;
577 U32
const dmsHashLog
= dictMode
== ZSTD_dictMatchState
? dmsCParams
->hashLog
: hashLog
;
578 U32
const dmsBtLog
= dictMode
== ZSTD_dictMatchState
? dmsCParams
->chainLog
- 1 : btLog
;
579 U32
const dmsBtMask
= dictMode
== ZSTD_dictMatchState
? (1U << dmsBtLog
) - 1 : 0;
580 U32
const dmsBtLow
= dictMode
== ZSTD_dictMatchState
&& dmsBtMask
< dmsHighLimit
- dmsLowLimit
? dmsHighLimit
- dmsBtMask
: dmsLowLimit
;
582 size_t bestLength
= lengthToBeat
-1;
583 DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", current
);
586 assert(ll0
<= 1); /* necessarily 1 or 0 */
587 { U32
const lastR
= ZSTD_REP_NUM
+ ll0
;
589 for (repCode
= ll0
; repCode
< lastR
; repCode
++) {
590 U32
const repOffset
= (repCode
==ZSTD_REP_NUM
) ? (rep
[0] - 1) : rep
[repCode
];
591 U32
const repIndex
= current
- repOffset
;
593 assert(current
>= dictLimit
);
594 if (repOffset
-1 /* intentional overflow, discards 0 and -1 */ < current
-dictLimit
) { /* equivalent to `current > repIndex >= dictLimit` */
595 if (ZSTD_readMINMATCH(ip
, minMatch
) == ZSTD_readMINMATCH(ip
- repOffset
, minMatch
)) {
596 repLen
= (U32
)ZSTD_count(ip
+minMatch
, ip
+minMatch
-repOffset
, iLimit
) + minMatch
;
598 } else { /* repIndex < dictLimit || repIndex >= current */
599 const BYTE
* const repMatch
= dictMode
== ZSTD_dictMatchState
?
600 dmsBase
+ repIndex
- dmsIndexDelta
:
602 assert(current
>= windowLow
);
603 if ( dictMode
== ZSTD_extDict
604 && ( ((repOffset
-1) /*intentional overflow*/ < current
- windowLow
) /* equivalent to `current > repIndex >= windowLow` */
605 & (((U32
)((dictLimit
-1) - repIndex
) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */)
606 && (ZSTD_readMINMATCH(ip
, minMatch
) == ZSTD_readMINMATCH(repMatch
, minMatch
)) ) {
607 repLen
= (U32
)ZSTD_count_2segments(ip
+minMatch
, repMatch
+minMatch
, iLimit
, dictEnd
, prefixStart
) + minMatch
;
609 if (dictMode
== ZSTD_dictMatchState
610 && ( ((repOffset
-1) /*intentional overflow*/ < current
- (dmsLowLimit
+ dmsIndexDelta
)) /* equivalent to `current > repIndex >= dmsLowLimit` */
611 & ((U32
)((dictLimit
-1) - repIndex
) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */
612 && (ZSTD_readMINMATCH(ip
, minMatch
) == ZSTD_readMINMATCH(repMatch
, minMatch
)) ) {
613 repLen
= (U32
)ZSTD_count_2segments(ip
+minMatch
, repMatch
+minMatch
, iLimit
, dmsEnd
, prefixStart
) + minMatch
;
615 /* save longer solution */
616 if (repLen
> bestLength
) {
617 DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u",
618 repCode
, ll0
, repOffset
, repLen
);
620 matches
[mnum
].off
= repCode
- ll0
;
621 matches
[mnum
].len
= (U32
)repLen
;
623 if ( (repLen
> sufficient_len
)
624 | (ip
+repLen
== iLimit
) ) { /* best possible */
628 /* HC3 match finder */
629 if ((mls
== 3) /*static*/ && (bestLength
< mls
)) {
630 U32
const matchIndex3
= ZSTD_insertAndFindFirstIndexHash3(ms
, ip
);
631 if ((matchIndex3
>= matchLow
)
632 & (current
- matchIndex3
< (1<<18)) /*heuristic : longer distance likely too expensive*/ ) {
634 if ((dictMode
== ZSTD_noDict
) /*static*/ || (dictMode
== ZSTD_dictMatchState
) /*static*/ || (matchIndex3
>= dictLimit
)) {
635 const BYTE
* const match
= base
+ matchIndex3
;
636 mlen
= ZSTD_count(ip
, match
, iLimit
);
638 const BYTE
* const match
= dictBase
+ matchIndex3
;
639 mlen
= ZSTD_count_2segments(ip
, match
, iLimit
, dictEnd
, prefixStart
);
642 /* save best solution */
643 if (mlen
>= mls
/* == 3 > bestLength */) {
644 DEBUGLOG(8, "found small match with hlog3, of length %u",
647 assert(current
> matchIndex3
);
648 assert(mnum
==0); /* no prior solution */
649 matches
[0].off
= (current
- matchIndex3
) + ZSTD_REP_MOVE
;
650 matches
[0].len
= (U32
)mlen
;
652 if ( (mlen
> sufficient_len
) |
653 (ip
+mlen
== iLimit
) ) { /* best possible length */
654 ms
->nextToUpdate
= current
+1; /* skip insertion */
659 /* no dictMatchState lookup: dicts don't have a populated HC3 table */
662 hashTable
[h
] = current
; /* Update Hash Table */
664 while (nbCompares
-- && (matchIndex
>= matchLow
)) {
665 U32
* const nextPtr
= bt
+ 2*(matchIndex
& btMask
);
666 size_t matchLength
= MIN(commonLengthSmaller
, commonLengthLarger
); /* guaranteed minimum nb of common bytes */
668 assert(current
> matchIndex
);
670 if ((dictMode
== ZSTD_noDict
) || (dictMode
== ZSTD_dictMatchState
) || (matchIndex
+matchLength
>= dictLimit
)) {
671 assert(matchIndex
+matchLength
>= dictLimit
); /* ensure the condition is correct when !extDict */
672 match
= base
+ matchIndex
;
673 matchLength
+= ZSTD_count(ip
+matchLength
, match
+matchLength
, iLimit
);
675 match
= dictBase
+ matchIndex
;
676 matchLength
+= ZSTD_count_2segments(ip
+matchLength
, match
+matchLength
, iLimit
, dictEnd
, prefixStart
);
677 if (matchIndex
+matchLength
>= dictLimit
)
678 match
= base
+ matchIndex
; /* prepare for match[matchLength] */
681 if (matchLength
> bestLength
) {
682 DEBUGLOG(8, "found match of length %u at distance %u (offCode=%u)",
683 (U32
)matchLength
, current
- matchIndex
, current
- matchIndex
+ ZSTD_REP_MOVE
);
684 assert(matchEndIdx
> matchIndex
);
685 if (matchLength
> matchEndIdx
- matchIndex
)
686 matchEndIdx
= matchIndex
+ (U32
)matchLength
;
687 bestLength
= matchLength
;
688 matches
[mnum
].off
= (current
- matchIndex
) + ZSTD_REP_MOVE
;
689 matches
[mnum
].len
= (U32
)matchLength
;
691 if ( (matchLength
> ZSTD_OPT_NUM
)
692 | (ip
+matchLength
== iLimit
) /* equal : no way to know if inf or sup */) {
693 if (dictMode
== ZSTD_dictMatchState
) nbCompares
= 0; /* break should also skip searching dms */
694 break; /* drop, to preserve bt consistency (miss a little bit of compression) */
698 if (match
[matchLength
] < ip
[matchLength
]) {
699 /* match smaller than current */
700 *smallerPtr
= matchIndex
; /* update smaller idx */
701 commonLengthSmaller
= matchLength
; /* all smaller will now have at least this guaranteed common length */
702 if (matchIndex
<= btLow
) { smallerPtr
=&dummy32
; break; } /* beyond tree size, stop the search */
703 smallerPtr
= nextPtr
+1; /* new candidate => larger than match, which was smaller than current */
704 matchIndex
= nextPtr
[1]; /* new matchIndex, larger than previous, closer to current */
706 *largerPtr
= matchIndex
;
707 commonLengthLarger
= matchLength
;
708 if (matchIndex
<= btLow
) { largerPtr
=&dummy32
; break; } /* beyond tree size, stop the search */
710 matchIndex
= nextPtr
[0];
713 *smallerPtr
= *largerPtr
= 0;
715 if (dictMode
== ZSTD_dictMatchState
&& nbCompares
) {
716 size_t const dmsH
= ZSTD_hashPtr(ip
, dmsHashLog
, mls
);
717 U32 dictMatchIndex
= dms
->hashTable
[dmsH
];
718 const U32
* const dmsBt
= dms
->chainTable
;
719 commonLengthSmaller
= commonLengthLarger
= 0;
720 while (nbCompares
-- && (dictMatchIndex
> dmsLowLimit
)) {
721 const U32
* const nextPtr
= dmsBt
+ 2*(dictMatchIndex
& dmsBtMask
);
722 size_t matchLength
= MIN(commonLengthSmaller
, commonLengthLarger
); /* guaranteed minimum nb of common bytes */
723 const BYTE
* match
= dmsBase
+ dictMatchIndex
;
724 matchLength
+= ZSTD_count_2segments(ip
+matchLength
, match
+matchLength
, iLimit
, dmsEnd
, prefixStart
);
725 if (dictMatchIndex
+matchLength
>= dmsHighLimit
)
726 match
= base
+ dictMatchIndex
+ dmsIndexDelta
; /* to prepare for next usage of match[matchLength] */
728 if (matchLength
> bestLength
) {
729 matchIndex
= dictMatchIndex
+ dmsIndexDelta
;
730 DEBUGLOG(8, "found dms match of length %u at distance %u (offCode=%u)",
731 (U32
)matchLength
, current
- matchIndex
, current
- matchIndex
+ ZSTD_REP_MOVE
);
732 if (matchLength
> matchEndIdx
- matchIndex
)
733 matchEndIdx
= matchIndex
+ (U32
)matchLength
;
734 bestLength
= matchLength
;
735 matches
[mnum
].off
= (current
- matchIndex
) + ZSTD_REP_MOVE
;
736 matches
[mnum
].len
= (U32
)matchLength
;
738 if ( (matchLength
> ZSTD_OPT_NUM
)
739 | (ip
+matchLength
== iLimit
) /* equal : no way to know if inf or sup */) {
740 break; /* drop, to guarantee consistency (miss a little bit of compression) */
744 if (dictMatchIndex
<= dmsBtLow
) { break; } /* beyond tree size, stop the search */
745 if (match
[matchLength
] < ip
[matchLength
]) {
746 commonLengthSmaller
= matchLength
; /* all smaller will now have at least this guaranteed common length */
747 dictMatchIndex
= nextPtr
[1]; /* new matchIndex larger than previous (closer to current) */
749 /* match is larger than current */
750 commonLengthLarger
= matchLength
;
751 dictMatchIndex
= nextPtr
[0];
756 assert(matchEndIdx
> current
+8);
757 ms
->nextToUpdate
= matchEndIdx
- 8; /* skip repetitive patterns */
762 FORCE_INLINE_TEMPLATE U32
ZSTD_BtGetAllMatches (
763 ZSTD_matchState_t
* ms
,
764 const BYTE
* ip
, const BYTE
* const iHighLimit
, const ZSTD_dictMode_e dictMode
,
765 U32 rep
[ZSTD_REP_NUM
], U32
const ll0
,
766 ZSTD_match_t
* matches
, U32
const lengthToBeat
)
768 const ZSTD_compressionParameters
* const cParams
= &ms
->cParams
;
769 U32
const matchLengthSearch
= cParams
->minMatch
;
770 DEBUGLOG(8, "ZSTD_BtGetAllMatches");
771 if (ip
< ms
->window
.base
+ ms
->nextToUpdate
) return 0; /* skipped area */
772 ZSTD_updateTree_internal(ms
, ip
, iHighLimit
, matchLengthSearch
, dictMode
);
773 switch(matchLengthSearch
)
775 case 3 : return ZSTD_insertBtAndGetAllMatches(ms
, ip
, iHighLimit
, dictMode
, rep
, ll0
, matches
, lengthToBeat
, 3);
777 case 4 : return ZSTD_insertBtAndGetAllMatches(ms
, ip
, iHighLimit
, dictMode
, rep
, ll0
, matches
, lengthToBeat
, 4);
778 case 5 : return ZSTD_insertBtAndGetAllMatches(ms
, ip
, iHighLimit
, dictMode
, rep
, ll0
, matches
, lengthToBeat
, 5);
780 case 6 : return ZSTD_insertBtAndGetAllMatches(ms
, ip
, iHighLimit
, dictMode
, rep
, ll0
, matches
, lengthToBeat
, 6);
785 /*-*******************************
787 *********************************/
788 typedef struct repcodes_s
{
792 static repcodes_t
ZSTD_updateRep(U32
const rep
[3], U32
const offset
, U32
const ll0
)
795 if (offset
>= ZSTD_REP_NUM
) { /* full offset */
796 newReps
.rep
[2] = rep
[1];
797 newReps
.rep
[1] = rep
[0];
798 newReps
.rep
[0] = offset
- ZSTD_REP_MOVE
;
799 } else { /* repcode */
800 U32
const repCode
= offset
+ ll0
;
801 if (repCode
> 0) { /* note : if repCode==0, no change */
802 U32
const currentOffset
= (repCode
==ZSTD_REP_NUM
) ? (rep
[0] - 1) : rep
[repCode
];
803 newReps
.rep
[2] = (repCode
>= 2) ? rep
[1] : rep
[2];
804 newReps
.rep
[1] = rep
[0];
805 newReps
.rep
[0] = currentOffset
;
806 } else { /* repCode == 0 */
807 memcpy(&newReps
, rep
, sizeof(newReps
));
814 static U32
ZSTD_totalLen(ZSTD_optimal_t sol
)
816 return sol
.litlen
+ sol
.mlen
;
822 listStats(const U32
* table
, int lastEltID
)
824 int const nbElts
= lastEltID
+ 1;
826 for (enb
=0; enb
< nbElts
; enb
++) {
828 //RAWLOG(2, "%3i:%3i, ", enb, table[enb]);
829 RAWLOG(2, "%4i,", table
[enb
]);
836 FORCE_INLINE_TEMPLATE
size_t
837 ZSTD_compressBlock_opt_generic(ZSTD_matchState_t
* ms
,
838 seqStore_t
* seqStore
,
839 U32 rep
[ZSTD_REP_NUM
],
840 const void* src
, size_t srcSize
,
842 const ZSTD_dictMode_e dictMode
)
844 optState_t
* const optStatePtr
= &ms
->opt
;
845 const BYTE
* const istart
= (const BYTE
*)src
;
846 const BYTE
* ip
= istart
;
847 const BYTE
* anchor
= istart
;
848 const BYTE
* const iend
= istart
+ srcSize
;
849 const BYTE
* const ilimit
= iend
- 8;
850 const BYTE
* const base
= ms
->window
.base
;
851 const BYTE
* const prefixStart
= base
+ ms
->window
.dictLimit
;
852 const ZSTD_compressionParameters
* const cParams
= &ms
->cParams
;
854 U32
const sufficient_len
= MIN(cParams
->targetLength
, ZSTD_OPT_NUM
-1);
855 U32
const minMatch
= (cParams
->minMatch
== 3) ? 3 : 4;
857 ZSTD_optimal_t
* const opt
= optStatePtr
->priceTable
;
858 ZSTD_match_t
* const matches
= optStatePtr
->matchTable
;
859 ZSTD_optimal_t lastSequence
;
862 DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u",
863 (U32
)(ip
- base
), ms
->window
.dictLimit
, ms
->nextToUpdate
);
864 assert(optLevel
<= 2);
865 ms
->nextToUpdate3
= ms
->nextToUpdate
;
866 ZSTD_rescaleFreqs(optStatePtr
, (const BYTE
*)src
, srcSize
, optLevel
);
867 ip
+= (ip
==prefixStart
);
870 while (ip
< ilimit
) {
871 U32 cur
, last_pos
= 0;
873 /* find first match */
874 { U32
const litlen
= (U32
)(ip
- anchor
);
875 U32
const ll0
= !litlen
;
876 U32
const nbMatches
= ZSTD_BtGetAllMatches(ms
, ip
, iend
, dictMode
, rep
, ll0
, matches
, minMatch
);
877 if (!nbMatches
) { ip
++; continue; }
879 /* initialize opt[0] */
880 { U32 i
; for (i
=0; i
<ZSTD_REP_NUM
; i
++) opt
[0].rep
[i
] = rep
[i
]; }
881 opt
[0].mlen
= 0; /* means is_a_literal */
882 opt
[0].litlen
= litlen
;
883 opt
[0].price
= ZSTD_literalsContribution(anchor
, litlen
, optStatePtr
, optLevel
);
885 /* large match -> immediate encoding */
886 { U32
const maxML
= matches
[nbMatches
-1].len
;
887 U32
const maxOffset
= matches
[nbMatches
-1].off
;
888 DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffCode=%u at cPos=%u => start new series",
889 nbMatches
, maxML
, maxOffset
, (U32
)(ip
-prefixStart
));
891 if (maxML
> sufficient_len
) {
892 lastSequence
.litlen
= litlen
;
893 lastSequence
.mlen
= maxML
;
894 lastSequence
.off
= maxOffset
;
895 DEBUGLOG(6, "large match (%u>%u), immediate encoding",
896 maxML
, sufficient_len
);
898 last_pos
= ZSTD_totalLen(lastSequence
);
902 /* set prices for first matches starting position == 0 */
903 { U32
const literalsPrice
= opt
[0].price
+ ZSTD_litLengthPrice(0, optStatePtr
, optLevel
);
906 for (pos
= 1; pos
< minMatch
; pos
++) {
907 opt
[pos
].price
= ZSTD_MAX_PRICE
; /* mlen, litlen and price will be fixed during forward scanning */
909 for (matchNb
= 0; matchNb
< nbMatches
; matchNb
++) {
910 U32
const offset
= matches
[matchNb
].off
;
911 U32
const end
= matches
[matchNb
].len
;
912 repcodes_t
const repHistory
= ZSTD_updateRep(rep
, offset
, ll0
);
913 for ( ; pos
<= end
; pos
++ ) {
914 U32
const matchPrice
= ZSTD_getMatchPrice(offset
, pos
, optStatePtr
, optLevel
);
915 U32
const sequencePrice
= literalsPrice
+ matchPrice
;
916 DEBUGLOG(7, "rPos:%u => set initial price : %.2f",
917 pos
, ZSTD_fCost(sequencePrice
));
919 opt
[pos
].off
= offset
;
920 opt
[pos
].litlen
= litlen
;
921 opt
[pos
].price
= sequencePrice
;
922 ZSTD_STATIC_ASSERT(sizeof(opt
[pos
].rep
) == sizeof(repHistory
));
923 memcpy(opt
[pos
].rep
, &repHistory
, sizeof(repHistory
));
929 /* check further positions */
930 for (cur
= 1; cur
<= last_pos
; cur
++) {
931 const BYTE
* const inr
= ip
+ cur
;
932 assert(cur
< ZSTD_OPT_NUM
);
933 DEBUGLOG(7, "cPos:%zi==rPos:%u", inr
-istart
, cur
)
935 /* Fix current position with one literal if cheaper */
936 { U32
const litlen
= (opt
[cur
-1].mlen
== 0) ? opt
[cur
-1].litlen
+ 1 : 1;
937 int const price
= opt
[cur
-1].price
938 + ZSTD_rawLiteralsCost(ip
+cur
-1, 1, optStatePtr
, optLevel
)
939 + ZSTD_litLengthPrice(litlen
, optStatePtr
, optLevel
)
940 - ZSTD_litLengthPrice(litlen
-1, optStatePtr
, optLevel
);
941 assert(price
< 1000000000); /* overflow check */
942 if (price
<= opt
[cur
].price
) {
943 DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)",
944 inr
-istart
, cur
, ZSTD_fCost(price
), ZSTD_fCost(opt
[cur
].price
), litlen
,
945 opt
[cur
-1].rep
[0], opt
[cur
-1].rep
[1], opt
[cur
-1].rep
[2]);
948 opt
[cur
].litlen
= litlen
;
949 opt
[cur
].price
= price
;
950 memcpy(opt
[cur
].rep
, opt
[cur
-1].rep
, sizeof(opt
[cur
].rep
));
952 DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f) (hist:%u,%u,%u)",
953 inr
-istart
, cur
, ZSTD_fCost(price
), ZSTD_fCost(opt
[cur
].price
),
954 opt
[cur
].rep
[0], opt
[cur
].rep
[1], opt
[cur
].rep
[2]);
958 /* last match must start at a minimum distance of 8 from oend */
959 if (inr
> ilimit
) continue;
961 if (cur
== last_pos
) break;
963 if ( (optLevel
==0) /*static_test*/
964 && (opt
[cur
+1].price
<= opt
[cur
].price
+ (BITCOST_MULTIPLIER
/2)) ) {
965 DEBUGLOG(7, "move to next rPos:%u : price is <=", cur
+1);
966 continue; /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
969 { U32
const ll0
= (opt
[cur
].mlen
!= 0);
970 U32
const litlen
= (opt
[cur
].mlen
== 0) ? opt
[cur
].litlen
: 0;
971 U32
const previousPrice
= opt
[cur
].price
;
972 U32
const basePrice
= previousPrice
+ ZSTD_litLengthPrice(0, optStatePtr
, optLevel
);
973 U32
const nbMatches
= ZSTD_BtGetAllMatches(ms
, inr
, iend
, dictMode
, opt
[cur
].rep
, ll0
, matches
, minMatch
);
976 DEBUGLOG(7, "rPos:%u : no match found", cur
);
980 { U32
const maxML
= matches
[nbMatches
-1].len
;
981 DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of maxLength=%u",
982 inr
-istart
, cur
, nbMatches
, maxML
);
984 if ( (maxML
> sufficient_len
)
985 || (cur
+ maxML
>= ZSTD_OPT_NUM
) ) {
986 lastSequence
.mlen
= maxML
;
987 lastSequence
.off
= matches
[nbMatches
-1].off
;
988 lastSequence
.litlen
= litlen
;
989 cur
-= (opt
[cur
].mlen
==0) ? opt
[cur
].litlen
: 0; /* last sequence is actually only literals, fix cur to last match - note : may underflow, in which case, it's first sequence, and it's okay */
990 last_pos
= cur
+ ZSTD_totalLen(lastSequence
);
991 if (cur
> ZSTD_OPT_NUM
) cur
= 0; /* underflow => first match */
995 /* set prices using matches found at position == cur */
996 for (matchNb
= 0; matchNb
< nbMatches
; matchNb
++) {
997 U32
const offset
= matches
[matchNb
].off
;
998 repcodes_t
const repHistory
= ZSTD_updateRep(opt
[cur
].rep
, offset
, ll0
);
999 U32
const lastML
= matches
[matchNb
].len
;
1000 U32
const startML
= (matchNb
>0) ? matches
[matchNb
-1].len
+1 : minMatch
;
1003 DEBUGLOG(7, "testing match %u => offCode=%4u, mlen=%2u, llen=%2u",
1004 matchNb
, matches
[matchNb
].off
, lastML
, litlen
);
1006 for (mlen
= lastML
; mlen
>= startML
; mlen
--) { /* scan downward */
1007 U32
const pos
= cur
+ mlen
;
1008 int const price
= basePrice
+ ZSTD_getMatchPrice(offset
, mlen
, optStatePtr
, optLevel
);
1010 if ((pos
> last_pos
) || (price
< opt
[pos
].price
)) {
1011 DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)",
1012 pos
, mlen
, ZSTD_fCost(price
), ZSTD_fCost(opt
[pos
].price
));
1013 while (last_pos
< pos
) { opt
[last_pos
+1].price
= ZSTD_MAX_PRICE
; last_pos
++; } /* fill empty positions */
1014 opt
[pos
].mlen
= mlen
;
1015 opt
[pos
].off
= offset
;
1016 opt
[pos
].litlen
= litlen
;
1017 opt
[pos
].price
= price
;
1018 ZSTD_STATIC_ASSERT(sizeof(opt
[pos
].rep
) == sizeof(repHistory
));
1019 memcpy(opt
[pos
].rep
, &repHistory
, sizeof(repHistory
));
1021 DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)",
1022 pos
, mlen
, ZSTD_fCost(price
), ZSTD_fCost(opt
[pos
].price
));
1023 if (optLevel
==0) break; /* early update abort; gets ~+10% speed for about -0.01 ratio loss */
1026 } /* for (cur = 1; cur <= last_pos; cur++) */
1028 lastSequence
= opt
[last_pos
];
1029 cur
= last_pos
> ZSTD_totalLen(lastSequence
) ? last_pos
- ZSTD_totalLen(lastSequence
) : 0; /* single sequence, and it starts before `ip` */
1030 assert(cur
< ZSTD_OPT_NUM
); /* control overflow*/
1032 _shortestPath
: /* cur, last_pos, best_mlen, best_off have to be set */
1033 assert(opt
[0].mlen
== 0);
1035 { U32
const storeEnd
= cur
+ 1;
1036 U32 storeStart
= storeEnd
;
1039 DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)",
1040 last_pos
, cur
); (void)last_pos
;
1041 assert(storeEnd
< ZSTD_OPT_NUM
);
1042 DEBUGLOG(6, "last sequence copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
1043 storeEnd
, lastSequence
.litlen
, lastSequence
.mlen
, lastSequence
.off
);
1044 opt
[storeEnd
] = lastSequence
;
1045 while (seqPos
> 0) {
1046 U32
const backDist
= ZSTD_totalLen(opt
[seqPos
]);
1048 DEBUGLOG(6, "sequence from rPos=%u copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
1049 seqPos
, storeStart
, opt
[seqPos
].litlen
, opt
[seqPos
].mlen
, opt
[seqPos
].off
);
1050 opt
[storeStart
] = opt
[seqPos
];
1051 seqPos
= (seqPos
> backDist
) ? seqPos
- backDist
: 0;
1054 /* save sequences */
1055 DEBUGLOG(6, "sending selected sequences into seqStore")
1057 for (storePos
=storeStart
; storePos
<= storeEnd
; storePos
++) {
1058 U32
const llen
= opt
[storePos
].litlen
;
1059 U32
const mlen
= opt
[storePos
].mlen
;
1060 U32
const offCode
= opt
[storePos
].off
;
1061 U32
const advance
= llen
+ mlen
;
1062 DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u",
1063 anchor
- istart
, (unsigned)llen
, (unsigned)mlen
);
1065 if (mlen
==0) { /* only literals => must be last "sequence", actually starting a new stream of sequences */
1066 assert(storePos
== storeEnd
); /* must be last sequence */
1067 ip
= anchor
+ llen
; /* last "sequence" is a bunch of literals => don't progress anchor */
1068 continue; /* will finish */
1071 /* repcodes update : like ZSTD_updateRep(), but update in place */
1072 if (offCode
>= ZSTD_REP_NUM
) { /* full offset */
1075 rep
[0] = offCode
- ZSTD_REP_MOVE
;
1076 } else { /* repcode */
1077 U32
const repCode
= offCode
+ (llen
==0);
1078 if (repCode
) { /* note : if repCode==0, no change */
1079 U32
const currentOffset
= (repCode
==ZSTD_REP_NUM
) ? (rep
[0] - 1) : rep
[repCode
];
1080 if (repCode
>= 2) rep
[2] = rep
[1];
1082 rep
[0] = currentOffset
;
1085 assert(anchor
+ llen
<= iend
);
1086 ZSTD_updateStats(optStatePtr
, llen
, anchor
, offCode
, mlen
);
1087 ZSTD_storeSeq(seqStore
, llen
, anchor
, offCode
, mlen
-MINMATCH
);
1091 ZSTD_setBasePrices(optStatePtr
, optLevel
);
1094 } /* while (ip < ilimit) */
1096 /* Return the last literals size */
1097 return iend
- anchor
;
1101 size_t ZSTD_compressBlock_btopt(
1102 ZSTD_matchState_t
* ms
, seqStore_t
* seqStore
, U32 rep
[ZSTD_REP_NUM
],
1103 const void* src
, size_t srcSize
)
1105 DEBUGLOG(5, "ZSTD_compressBlock_btopt");
1106 return ZSTD_compressBlock_opt_generic(ms
, seqStore
, rep
, src
, srcSize
, 0 /*optLevel*/, ZSTD_noDict
);
1110 /* used in 2-pass strategy */
1111 static U32
ZSTD_upscaleStat(unsigned* table
, U32 lastEltIndex
, int bonus
)
1114 assert(ZSTD_FREQ_DIV
+bonus
>= 0);
1115 for (s
=0; s
<lastEltIndex
+1; s
++) {
1116 table
[s
] <<= ZSTD_FREQ_DIV
+bonus
;
1123 /* used in 2-pass strategy */
1124 MEM_STATIC
void ZSTD_upscaleStats(optState_t
* optPtr
)
1126 if (ZSTD_compressedLiterals(optPtr
))
1127 optPtr
->litSum
= ZSTD_upscaleStat(optPtr
->litFreq
, MaxLit
, 0);
1128 optPtr
->litLengthSum
= ZSTD_upscaleStat(optPtr
->litLengthFreq
, MaxLL
, 0);
1129 optPtr
->matchLengthSum
= ZSTD_upscaleStat(optPtr
->matchLengthFreq
, MaxML
, 0);
1130 optPtr
->offCodeSum
= ZSTD_upscaleStat(optPtr
->offCodeFreq
, MaxOff
, 0);
1133 /* ZSTD_initStats_ultra():
1134 * make a first compression pass, just to seed stats with more accurate starting values.
1135 * only works on first block, with no dictionary and no ldm.
1136 * this function cannot error, hence its contract must be respected.
1139 ZSTD_initStats_ultra(ZSTD_matchState_t
* ms
,
1140 seqStore_t
* seqStore
,
1141 U32 rep
[ZSTD_REP_NUM
],
1142 const void* src
, size_t srcSize
)
1144 U32 tmpRep
[ZSTD_REP_NUM
]; /* updated rep codes will sink here */
1145 memcpy(tmpRep
, rep
, sizeof(tmpRep
));
1147 DEBUGLOG(4, "ZSTD_initStats_ultra (srcSize=%zu)", srcSize
);
1148 assert(ms
->opt
.litLengthSum
== 0); /* first block */
1149 assert(seqStore
->sequences
== seqStore
->sequencesStart
); /* no ldm */
1150 assert(ms
->window
.dictLimit
== ms
->window
.lowLimit
); /* no dictionary */
1151 assert(ms
->window
.dictLimit
- ms
->nextToUpdate
<= 1); /* no prefix (note: intentional overflow, defined as 2-complement) */
1153 ZSTD_compressBlock_opt_generic(ms
, seqStore
, tmpRep
, src
, srcSize
, 2 /*optLevel*/, ZSTD_noDict
); /* generate stats into ms->opt*/
1155 /* invalidate first scan from history */
1156 ZSTD_resetSeqStore(seqStore
);
1157 ms
->window
.base
-= srcSize
;
1158 ms
->window
.dictLimit
+= (U32
)srcSize
;
1159 ms
->window
.lowLimit
= ms
->window
.dictLimit
;
1160 ms
->nextToUpdate
= ms
->window
.dictLimit
;
1161 ms
->nextToUpdate3
= ms
->window
.dictLimit
;
1163 /* re-inforce weight of collected statistics */
1164 ZSTD_upscaleStats(&ms
->opt
);
1167 size_t ZSTD_compressBlock_btultra(
1168 ZSTD_matchState_t
* ms
, seqStore_t
* seqStore
, U32 rep
[ZSTD_REP_NUM
],
1169 const void* src
, size_t srcSize
)
1171 DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize
);
1172 return ZSTD_compressBlock_opt_generic(ms
, seqStore
, rep
, src
, srcSize
, 2 /*optLevel*/, ZSTD_noDict
);
1175 size_t ZSTD_compressBlock_btultra2(
1176 ZSTD_matchState_t
* ms
, seqStore_t
* seqStore
, U32 rep
[ZSTD_REP_NUM
],
1177 const void* src
, size_t srcSize
)
1179 U32
const current
= (U32
)((const BYTE
*)src
- ms
->window
.base
);
1180 DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize
);
1183 * this strategy makes a first pass over first block to collect statistics
1184 * and seed next round's statistics with it.
1185 * After 1st pass, function forgets everything, and starts a new block.
1186 * Consequently, this can only work if no data has been previously loaded in tables,
1187 * aka, no dictionary, no prefix, no ldm preprocessing.
1188 * The compression ratio gain is generally small (~0.5% on first block),
1189 * the cost is 2x cpu time on first block. */
1190 assert(srcSize
<= ZSTD_BLOCKSIZE_MAX
);
1191 if ( (ms
->opt
.litLengthSum
==0) /* first block */
1192 && (seqStore
->sequences
== seqStore
->sequencesStart
) /* no ldm */
1193 && (ms
->window
.dictLimit
== ms
->window
.lowLimit
) /* no dictionary */
1194 && (current
== ms
->window
.dictLimit
) /* start of frame, nothing already loaded nor skipped */
1195 && (srcSize
> ZSTD_PREDEF_THRESHOLD
)
1197 ZSTD_initStats_ultra(ms
, seqStore
, rep
, src
, srcSize
);
1200 return ZSTD_compressBlock_opt_generic(ms
, seqStore
, rep
, src
, srcSize
, 2 /*optLevel*/, ZSTD_noDict
);
1203 size_t ZSTD_compressBlock_btopt_dictMatchState(
1204 ZSTD_matchState_t
* ms
, seqStore_t
* seqStore
, U32 rep
[ZSTD_REP_NUM
],
1205 const void* src
, size_t srcSize
)
1207 return ZSTD_compressBlock_opt_generic(ms
, seqStore
, rep
, src
, srcSize
, 0 /*optLevel*/, ZSTD_dictMatchState
);
1210 size_t ZSTD_compressBlock_btultra_dictMatchState(
1211 ZSTD_matchState_t
* ms
, seqStore_t
* seqStore
, U32 rep
[ZSTD_REP_NUM
],
1212 const void* src
, size_t srcSize
)
1214 return ZSTD_compressBlock_opt_generic(ms
, seqStore
, rep
, src
, srcSize
, 2 /*optLevel*/, ZSTD_dictMatchState
);
1217 size_t ZSTD_compressBlock_btopt_extDict(
1218 ZSTD_matchState_t
* ms
, seqStore_t
* seqStore
, U32 rep
[ZSTD_REP_NUM
],
1219 const void* src
, size_t srcSize
)
1221 return ZSTD_compressBlock_opt_generic(ms
, seqStore
, rep
, src
, srcSize
, 0 /*optLevel*/, ZSTD_extDict
);
1224 size_t ZSTD_compressBlock_btultra_extDict(
1225 ZSTD_matchState_t
* ms
, seqStore_t
* seqStore
, U32 rep
[ZSTD_REP_NUM
],
1226 const void* src
, size_t srcSize
)
1228 return ZSTD_compressBlock_opt_generic(ms
, seqStore
, rep
, src
, srcSize
, 2 /*optLevel*/, ZSTD_extDict
);
1231 /* note : no btultra2 variant for extDict nor dictMatchState,
1232 * because btultra2 is not meant to work with dictionaries
1233 * and is only specific for the first block (no prefix) */