2 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * in the COPYING file in the root directory of this source tree).
8 * You may select, at your option, one of the above-listed licenses.
11 /* zstd_decompress_block :
12 * this module takes care of decompressing _compressed_ block */
14 /*-*******************************************************
16 *********************************************************/
17 #include <string.h> /* memcpy, memmove, memset */
18 #include "compiler.h" /* prefetch */
19 #include "cpu.h" /* bmi2 */
20 #include "mem.h" /* low level memory routines */
21 #define FSE_STATIC_LINKING_ONLY
23 #define HUF_STATIC_LINKING_ONLY
25 #include "zstd_internal.h"
26 #include "zstd_decompress_internal.h" /* ZSTD_DCtx */
27 #include "zstd_ddict.h" /* ZSTD_DDictDictContent */
28 #include "zstd_decompress_block.h"
30 /*_*******************************************************
32 **********************************************************/
34 /* These two optional macros force the use one way or another of the two
35 * ZSTD_decompressSequences implementations. You can't force in both directions
38 #if defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
39 defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
40 #error "Cannot force the use of the short and the long ZSTD_decompressSequences variants!"
44 /*_*******************************************************
46 **********************************************************/
47 static void ZSTD_copy4(void* dst
, const void* src
) { memcpy(dst
, src
, 4); }
50 /*-*************************************************************
52 ***************************************************************/
54 /*! ZSTD_getcBlockSize() :
55 * Provides the size of compressed block from block header `src` */
56 size_t ZSTD_getcBlockSize(const void* src
, size_t srcSize
,
57 blockProperties_t
* bpPtr
)
59 RETURN_ERROR_IF(srcSize
< ZSTD_blockHeaderSize
, srcSize_wrong
);
61 { U32
const cBlockHeader
= MEM_readLE24(src
);
62 U32
const cSize
= cBlockHeader
>> 3;
63 bpPtr
->lastBlock
= cBlockHeader
& 1;
64 bpPtr
->blockType
= (blockType_e
)((cBlockHeader
>> 1) & 3);
65 bpPtr
->origSize
= cSize
; /* only useful for RLE */
66 if (bpPtr
->blockType
== bt_rle
) return 1;
67 RETURN_ERROR_IF(bpPtr
->blockType
== bt_reserved
, corruption_detected
);
73 /* Hidden declaration for fullbench */
74 size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx
* dctx
,
75 const void* src
, size_t srcSize
);
76 /*! ZSTD_decodeLiteralsBlock() :
77 * @return : nb of bytes read from src (< srcSize )
78 * note : symbol not declared but exposed for fullbench */
79 size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx
* dctx
,
80 const void* src
, size_t srcSize
) /* note : srcSize < BLOCKSIZE */
82 RETURN_ERROR_IF(srcSize
< MIN_CBLOCK_SIZE
, corruption_detected
);
84 { const BYTE
* const istart
= (const BYTE
*) src
;
85 symbolEncodingType_e
const litEncType
= (symbolEncodingType_e
)(istart
[0] & 3);
90 RETURN_ERROR_IF(dctx
->litEntropy
==0, dictionary_corrupted
);
94 RETURN_ERROR_IF(srcSize
< 5, corruption_detected
, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3");
95 { size_t lhSize
, litSize
, litCSize
;
97 U32
const lhlCode
= (istart
[0] >> 2) & 3;
98 U32
const lhc
= MEM_readLE32(istart
);
102 case 0: case 1: default: /* note : default is impossible, since lhlCode into [0..3] */
103 /* 2 - 2 - 10 - 10 */
104 singleStream
= !lhlCode
;
106 litSize
= (lhc
>> 4) & 0x3FF;
107 litCSize
= (lhc
>> 14) & 0x3FF;
110 /* 2 - 2 - 14 - 14 */
112 litSize
= (lhc
>> 4) & 0x3FFF;
113 litCSize
= lhc
>> 18;
116 /* 2 - 2 - 18 - 18 */
118 litSize
= (lhc
>> 4) & 0x3FFFF;
119 litCSize
= (lhc
>> 22) + (istart
[4] << 10);
122 RETURN_ERROR_IF(litSize
> ZSTD_BLOCKSIZE_MAX
, corruption_detected
);
123 RETURN_ERROR_IF(litCSize
+ lhSize
> srcSize
, corruption_detected
);
125 /* prefetch huffman table if cold */
126 if (dctx
->ddictIsCold
&& (litSize
> 768 /* heuristic */)) {
127 PREFETCH_AREA(dctx
->HUFptr
, sizeof(dctx
->entropy
.hufTable
));
130 if (litEncType
==set_repeat
) {
132 hufSuccess
= HUF_decompress1X_usingDTable_bmi2(
133 dctx
->litBuffer
, litSize
, istart
+lhSize
, litCSize
,
134 dctx
->HUFptr
, dctx
->bmi2
);
136 hufSuccess
= HUF_decompress4X_usingDTable_bmi2(
137 dctx
->litBuffer
, litSize
, istart
+lhSize
, litCSize
,
138 dctx
->HUFptr
, dctx
->bmi2
);
142 #if defined(HUF_FORCE_DECOMPRESS_X2)
143 hufSuccess
= HUF_decompress1X_DCtx_wksp(
144 dctx
->entropy
.hufTable
, dctx
->litBuffer
, litSize
,
145 istart
+lhSize
, litCSize
, dctx
->workspace
,
146 sizeof(dctx
->workspace
));
148 hufSuccess
= HUF_decompress1X1_DCtx_wksp_bmi2(
149 dctx
->entropy
.hufTable
, dctx
->litBuffer
, litSize
,
150 istart
+lhSize
, litCSize
, dctx
->workspace
,
151 sizeof(dctx
->workspace
), dctx
->bmi2
);
154 hufSuccess
= HUF_decompress4X_hufOnly_wksp_bmi2(
155 dctx
->entropy
.hufTable
, dctx
->litBuffer
, litSize
,
156 istart
+lhSize
, litCSize
, dctx
->workspace
,
157 sizeof(dctx
->workspace
), dctx
->bmi2
);
161 RETURN_ERROR_IF(HUF_isError(hufSuccess
), corruption_detected
);
163 dctx
->litPtr
= dctx
->litBuffer
;
164 dctx
->litSize
= litSize
;
165 dctx
->litEntropy
= 1;
166 if (litEncType
==set_compressed
) dctx
->HUFptr
= dctx
->entropy
.hufTable
;
167 memset(dctx
->litBuffer
+ dctx
->litSize
, 0, WILDCOPY_OVERLENGTH
);
168 return litCSize
+ lhSize
;
172 { size_t litSize
, lhSize
;
173 U32
const lhlCode
= ((istart
[0]) >> 2) & 3;
176 case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */
178 litSize
= istart
[0] >> 3;
182 litSize
= MEM_readLE16(istart
) >> 4;
186 litSize
= MEM_readLE24(istart
) >> 4;
190 if (lhSize
+litSize
+WILDCOPY_OVERLENGTH
> srcSize
) { /* risk reading beyond src buffer with wildcopy */
191 RETURN_ERROR_IF(litSize
+lhSize
> srcSize
, corruption_detected
);
192 memcpy(dctx
->litBuffer
, istart
+lhSize
, litSize
);
193 dctx
->litPtr
= dctx
->litBuffer
;
194 dctx
->litSize
= litSize
;
195 memset(dctx
->litBuffer
+ dctx
->litSize
, 0, WILDCOPY_OVERLENGTH
);
196 return lhSize
+litSize
;
198 /* direct reference into compressed stream */
199 dctx
->litPtr
= istart
+lhSize
;
200 dctx
->litSize
= litSize
;
201 return lhSize
+litSize
;
205 { U32
const lhlCode
= ((istart
[0]) >> 2) & 3;
206 size_t litSize
, lhSize
;
209 case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */
211 litSize
= istart
[0] >> 3;
215 litSize
= MEM_readLE16(istart
) >> 4;
219 litSize
= MEM_readLE24(istart
) >> 4;
220 RETURN_ERROR_IF(srcSize
<4, corruption_detected
, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4");
223 RETURN_ERROR_IF(litSize
> ZSTD_BLOCKSIZE_MAX
, corruption_detected
);
224 memset(dctx
->litBuffer
, istart
[lhSize
], litSize
+ WILDCOPY_OVERLENGTH
);
225 dctx
->litPtr
= dctx
->litBuffer
;
226 dctx
->litSize
= litSize
;
230 RETURN_ERROR(corruption_detected
, "impossible");
235 /* Default FSE distribution tables.
236 * These are pre-calculated FSE decoding tables using default distributions as defined in specification :
237 * https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#default-distributions
238 * They were generated programmatically with following method :
239 * - start from default distributions, present in /lib/common/zstd_internal.h
240 * - generate tables normally, using ZSTD_buildFSETable()
241 * - printout the content of tables
242 * - pretify output, report below, test with fuzzer to ensure it's correct */
244 /* Default FSE distribution table for Literal Lengths */
245 static const ZSTD_seqSymbol LL_defaultDTable
[(1<<LL_DEFAULTNORMLOG
)+1] = {
246 { 1, 1, 1, LL_DEFAULTNORMLOG
}, /* header : fastMode, tableLog */
247 /* nextState, nbAddBits, nbBits, baseVal */
248 { 0, 0, 4, 0}, { 16, 0, 4, 0},
249 { 32, 0, 5, 1}, { 0, 0, 5, 3},
250 { 0, 0, 5, 4}, { 0, 0, 5, 6},
251 { 0, 0, 5, 7}, { 0, 0, 5, 9},
252 { 0, 0, 5, 10}, { 0, 0, 5, 12},
253 { 0, 0, 6, 14}, { 0, 1, 5, 16},
254 { 0, 1, 5, 20}, { 0, 1, 5, 22},
255 { 0, 2, 5, 28}, { 0, 3, 5, 32},
256 { 0, 4, 5, 48}, { 32, 6, 5, 64},
257 { 0, 7, 5, 128}, { 0, 8, 6, 256},
258 { 0, 10, 6, 1024}, { 0, 12, 6, 4096},
259 { 32, 0, 4, 0}, { 0, 0, 4, 1},
260 { 0, 0, 5, 2}, { 32, 0, 5, 4},
261 { 0, 0, 5, 5}, { 32, 0, 5, 7},
262 { 0, 0, 5, 8}, { 32, 0, 5, 10},
263 { 0, 0, 5, 11}, { 0, 0, 6, 13},
264 { 32, 1, 5, 16}, { 0, 1, 5, 18},
265 { 32, 1, 5, 22}, { 0, 2, 5, 24},
266 { 32, 3, 5, 32}, { 0, 3, 5, 40},
267 { 0, 6, 4, 64}, { 16, 6, 4, 64},
268 { 32, 7, 5, 128}, { 0, 9, 6, 512},
269 { 0, 11, 6, 2048}, { 48, 0, 4, 0},
270 { 16, 0, 4, 1}, { 32, 0, 5, 2},
271 { 32, 0, 5, 3}, { 32, 0, 5, 5},
272 { 32, 0, 5, 6}, { 32, 0, 5, 8},
273 { 32, 0, 5, 9}, { 32, 0, 5, 11},
274 { 32, 0, 5, 12}, { 0, 0, 6, 15},
275 { 32, 1, 5, 18}, { 32, 1, 5, 20},
276 { 32, 2, 5, 24}, { 32, 2, 5, 28},
277 { 32, 3, 5, 40}, { 32, 4, 5, 48},
278 { 0, 16, 6,65536}, { 0, 15, 6,32768},
279 { 0, 14, 6,16384}, { 0, 13, 6, 8192},
280 }; /* LL_defaultDTable */
282 /* Default FSE distribution table for Offset Codes */
283 static const ZSTD_seqSymbol OF_defaultDTable
[(1<<OF_DEFAULTNORMLOG
)+1] = {
284 { 1, 1, 1, OF_DEFAULTNORMLOG
}, /* header : fastMode, tableLog */
285 /* nextState, nbAddBits, nbBits, baseVal */
286 { 0, 0, 5, 0}, { 0, 6, 4, 61},
287 { 0, 9, 5, 509}, { 0, 15, 5,32765},
288 { 0, 21, 5,2097149}, { 0, 3, 5, 5},
289 { 0, 7, 4, 125}, { 0, 12, 5, 4093},
290 { 0, 18, 5,262141}, { 0, 23, 5,8388605},
291 { 0, 5, 5, 29}, { 0, 8, 4, 253},
292 { 0, 14, 5,16381}, { 0, 20, 5,1048573},
293 { 0, 2, 5, 1}, { 16, 7, 4, 125},
294 { 0, 11, 5, 2045}, { 0, 17, 5,131069},
295 { 0, 22, 5,4194301}, { 0, 4, 5, 13},
296 { 16, 8, 4, 253}, { 0, 13, 5, 8189},
297 { 0, 19, 5,524285}, { 0, 1, 5, 1},
298 { 16, 6, 4, 61}, { 0, 10, 5, 1021},
299 { 0, 16, 5,65533}, { 0, 28, 5,268435453},
300 { 0, 27, 5,134217725}, { 0, 26, 5,67108861},
301 { 0, 25, 5,33554429}, { 0, 24, 5,16777213},
302 }; /* OF_defaultDTable */
305 /* Default FSE distribution table for Match Lengths */
306 static const ZSTD_seqSymbol ML_defaultDTable
[(1<<ML_DEFAULTNORMLOG
)+1] = {
307 { 1, 1, 1, ML_DEFAULTNORMLOG
}, /* header : fastMode, tableLog */
308 /* nextState, nbAddBits, nbBits, baseVal */
309 { 0, 0, 6, 3}, { 0, 0, 4, 4},
310 { 32, 0, 5, 5}, { 0, 0, 5, 6},
311 { 0, 0, 5, 8}, { 0, 0, 5, 9},
312 { 0, 0, 5, 11}, { 0, 0, 6, 13},
313 { 0, 0, 6, 16}, { 0, 0, 6, 19},
314 { 0, 0, 6, 22}, { 0, 0, 6, 25},
315 { 0, 0, 6, 28}, { 0, 0, 6, 31},
316 { 0, 0, 6, 34}, { 0, 1, 6, 37},
317 { 0, 1, 6, 41}, { 0, 2, 6, 47},
318 { 0, 3, 6, 59}, { 0, 4, 6, 83},
319 { 0, 7, 6, 131}, { 0, 9, 6, 515},
320 { 16, 0, 4, 4}, { 0, 0, 4, 5},
321 { 32, 0, 5, 6}, { 0, 0, 5, 7},
322 { 32, 0, 5, 9}, { 0, 0, 5, 10},
323 { 0, 0, 6, 12}, { 0, 0, 6, 15},
324 { 0, 0, 6, 18}, { 0, 0, 6, 21},
325 { 0, 0, 6, 24}, { 0, 0, 6, 27},
326 { 0, 0, 6, 30}, { 0, 0, 6, 33},
327 { 0, 1, 6, 35}, { 0, 1, 6, 39},
328 { 0, 2, 6, 43}, { 0, 3, 6, 51},
329 { 0, 4, 6, 67}, { 0, 5, 6, 99},
330 { 0, 8, 6, 259}, { 32, 0, 4, 4},
331 { 48, 0, 4, 4}, { 16, 0, 4, 5},
332 { 32, 0, 5, 7}, { 32, 0, 5, 8},
333 { 32, 0, 5, 10}, { 32, 0, 5, 11},
334 { 0, 0, 6, 14}, { 0, 0, 6, 17},
335 { 0, 0, 6, 20}, { 0, 0, 6, 23},
336 { 0, 0, 6, 26}, { 0, 0, 6, 29},
337 { 0, 0, 6, 32}, { 0, 16, 6,65539},
338 { 0, 15, 6,32771}, { 0, 14, 6,16387},
339 { 0, 13, 6, 8195}, { 0, 12, 6, 4099},
340 { 0, 11, 6, 2051}, { 0, 10, 6, 1027},
341 }; /* ML_defaultDTable */
344 static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol
* dt
, U32 baseValue
, U32 nbAddBits
)
347 ZSTD_seqSymbol_header
* const DTableH
= (ZSTD_seqSymbol_header
*)ptr
;
348 ZSTD_seqSymbol
* const cell
= dt
+ 1;
350 DTableH
->tableLog
= 0;
351 DTableH
->fastMode
= 0;
355 assert(nbAddBits
< 255);
356 cell
->nbAdditionalBits
= (BYTE
)nbAddBits
;
357 cell
->baseValue
= baseValue
;
361 /* ZSTD_buildFSETable() :
362 * generate FSE decoding table for one symbol (ll, ml or off)
363 * cannot fail if input is valid =>
364 * all inputs are presumed validated at this stage */
366 ZSTD_buildFSETable(ZSTD_seqSymbol
* dt
,
367 const short* normalizedCounter
, unsigned maxSymbolValue
,
368 const U32
* baseValue
, const U32
* nbAdditionalBits
,
371 ZSTD_seqSymbol
* const tableDecode
= dt
+1;
372 U16 symbolNext
[MaxSeq
+1];
374 U32
const maxSV1
= maxSymbolValue
+ 1;
375 U32
const tableSize
= 1 << tableLog
;
376 U32 highThreshold
= tableSize
-1;
379 assert(maxSymbolValue
<= MaxSeq
);
380 assert(tableLog
<= MaxFSELog
);
382 /* Init, lay down lowprob symbols */
383 { ZSTD_seqSymbol_header DTableH
;
384 DTableH
.tableLog
= tableLog
;
385 DTableH
.fastMode
= 1;
386 { S16
const largeLimit
= (S16
)(1 << (tableLog
-1));
388 for (s
=0; s
<maxSV1
; s
++) {
389 if (normalizedCounter
[s
]==-1) {
390 tableDecode
[highThreshold
--].baseValue
= s
;
393 if (normalizedCounter
[s
] >= largeLimit
) DTableH
.fastMode
=0;
394 symbolNext
[s
] = normalizedCounter
[s
];
396 memcpy(dt
, &DTableH
, sizeof(DTableH
));
400 { U32
const tableMask
= tableSize
-1;
401 U32
const step
= FSE_TABLESTEP(tableSize
);
403 for (s
=0; s
<maxSV1
; s
++) {
405 for (i
=0; i
<normalizedCounter
[s
]; i
++) {
406 tableDecode
[position
].baseValue
= s
;
407 position
= (position
+ step
) & tableMask
;
408 while (position
> highThreshold
) position
= (position
+ step
) & tableMask
; /* lowprob area */
410 assert(position
== 0); /* position must reach all cells once, otherwise normalizedCounter is incorrect */
413 /* Build Decoding table */
415 for (u
=0; u
<tableSize
; u
++) {
416 U32
const symbol
= tableDecode
[u
].baseValue
;
417 U32
const nextState
= symbolNext
[symbol
]++;
418 tableDecode
[u
].nbBits
= (BYTE
) (tableLog
- BIT_highbit32(nextState
) );
419 tableDecode
[u
].nextState
= (U16
) ( (nextState
<< tableDecode
[u
].nbBits
) - tableSize
);
420 assert(nbAdditionalBits
[symbol
] < 255);
421 tableDecode
[u
].nbAdditionalBits
= (BYTE
)nbAdditionalBits
[symbol
];
422 tableDecode
[u
].baseValue
= baseValue
[symbol
];
427 /*! ZSTD_buildSeqTable() :
428 * @return : nb bytes read from src,
429 * or an error code if it fails */
430 static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol
* DTableSpace
, const ZSTD_seqSymbol
** DTablePtr
,
431 symbolEncodingType_e type
, unsigned max
, U32 maxLog
,
432 const void* src
, size_t srcSize
,
433 const U32
* baseValue
, const U32
* nbAdditionalBits
,
434 const ZSTD_seqSymbol
* defaultTable
, U32 flagRepeatTable
,
435 int ddictIsCold
, int nbSeq
)
440 RETURN_ERROR_IF(!srcSize
, srcSize_wrong
);
441 RETURN_ERROR_IF((*(const BYTE
*)src
) > max
, corruption_detected
);
442 { U32
const symbol
= *(const BYTE
*)src
;
443 U32
const baseline
= baseValue
[symbol
];
444 U32
const nbBits
= nbAdditionalBits
[symbol
];
445 ZSTD_buildSeqTable_rle(DTableSpace
, baseline
, nbBits
);
447 *DTablePtr
= DTableSpace
;
450 *DTablePtr
= defaultTable
;
453 RETURN_ERROR_IF(!flagRepeatTable
, corruption_detected
);
454 /* prefetch FSE table if used */
455 if (ddictIsCold
&& (nbSeq
> 24 /* heuristic */)) {
456 const void* const pStart
= *DTablePtr
;
457 size_t const pSize
= sizeof(ZSTD_seqSymbol
) * (SEQSYMBOL_TABLE_SIZE(maxLog
));
458 PREFETCH_AREA(pStart
, pSize
);
461 case set_compressed
:
464 size_t const headerSize
= FSE_readNCount(norm
, &max
, &tableLog
, src
, srcSize
);
465 RETURN_ERROR_IF(FSE_isError(headerSize
), corruption_detected
);
466 RETURN_ERROR_IF(tableLog
> maxLog
, corruption_detected
);
467 ZSTD_buildFSETable(DTableSpace
, norm
, max
, baseValue
, nbAdditionalBits
, tableLog
);
468 *DTablePtr
= DTableSpace
;
473 RETURN_ERROR(GENERIC
, "impossible");
477 size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx
* dctx
, int* nbSeqPtr
,
478 const void* src
, size_t srcSize
)
480 const BYTE
* const istart
= (const BYTE
* const)src
;
481 const BYTE
* const iend
= istart
+ srcSize
;
482 const BYTE
* ip
= istart
;
484 DEBUGLOG(5, "ZSTD_decodeSeqHeaders");
487 RETURN_ERROR_IF(srcSize
< MIN_SEQUENCES_SIZE
, srcSize_wrong
);
493 RETURN_ERROR_IF(srcSize
!= 1, srcSize_wrong
);
498 RETURN_ERROR_IF(ip
+2 > iend
, srcSize_wrong
);
499 nbSeq
= MEM_readLE16(ip
) + LONGNBSEQ
, ip
+=2;
501 RETURN_ERROR_IF(ip
>= iend
, srcSize_wrong
);
502 nbSeq
= ((nbSeq
-0x80)<<8) + *ip
++;
507 /* FSE table descriptors */
508 RETURN_ERROR_IF(ip
+4 > iend
, srcSize_wrong
); /* minimum possible size */
509 { symbolEncodingType_e
const LLtype
= (symbolEncodingType_e
)(*ip
>> 6);
510 symbolEncodingType_e
const OFtype
= (symbolEncodingType_e
)((*ip
>> 4) & 3);
511 symbolEncodingType_e
const MLtype
= (symbolEncodingType_e
)((*ip
>> 2) & 3);
515 { size_t const llhSize
= ZSTD_buildSeqTable(dctx
->entropy
.LLTable
, &dctx
->LLTptr
,
516 LLtype
, MaxLL
, LLFSELog
,
519 LL_defaultDTable
, dctx
->fseEntropy
,
520 dctx
->ddictIsCold
, nbSeq
);
521 RETURN_ERROR_IF(ZSTD_isError(llhSize
), corruption_detected
);
525 { size_t const ofhSize
= ZSTD_buildSeqTable(dctx
->entropy
.OFTable
, &dctx
->OFTptr
,
526 OFtype
, MaxOff
, OffFSELog
,
529 OF_defaultDTable
, dctx
->fseEntropy
,
530 dctx
->ddictIsCold
, nbSeq
);
531 RETURN_ERROR_IF(ZSTD_isError(ofhSize
), corruption_detected
);
535 { size_t const mlhSize
= ZSTD_buildSeqTable(dctx
->entropy
.MLTable
, &dctx
->MLTptr
,
536 MLtype
, MaxML
, MLFSELog
,
539 ML_defaultDTable
, dctx
->fseEntropy
,
540 dctx
->ddictIsCold
, nbSeq
);
541 RETURN_ERROR_IF(ZSTD_isError(mlhSize
), corruption_detected
);
559 const ZSTD_seqSymbol
* table
;
563 BIT_DStream_t DStream
;
564 ZSTD_fseState stateLL
;
565 ZSTD_fseState stateOffb
;
566 ZSTD_fseState stateML
;
567 size_t prevOffset
[ZSTD_REP_NUM
];
568 const BYTE
* prefixStart
;
574 /* ZSTD_execSequenceLast7():
575 * exceptional case : decompress a match starting within last 7 bytes of output buffer.
576 * requires more careful checks, to ensure there is no overflow.
577 * performance does not matter though.
578 * note : this case is supposed to be never generated "naturally" by reference encoder,
579 * since in most cases it needs at least 8 bytes to look for a match.
580 * but it's allowed by the specification. */
582 size_t ZSTD_execSequenceLast7(BYTE
* op
,
583 BYTE
* const oend
, seq_t sequence
,
584 const BYTE
** litPtr
, const BYTE
* const litLimit
,
585 const BYTE
* const base
, const BYTE
* const vBase
, const BYTE
* const dictEnd
)
587 BYTE
* const oLitEnd
= op
+ sequence
.litLength
;
588 size_t const sequenceLength
= sequence
.litLength
+ sequence
.matchLength
;
589 BYTE
* const oMatchEnd
= op
+ sequenceLength
; /* risk : address space overflow (32-bits) */
590 const BYTE
* const iLitEnd
= *litPtr
+ sequence
.litLength
;
591 const BYTE
* match
= oLitEnd
- sequence
.offset
;
594 RETURN_ERROR_IF(oMatchEnd
>oend
, dstSize_tooSmall
, "last match must fit within dstBuffer");
595 RETURN_ERROR_IF(iLitEnd
> litLimit
, corruption_detected
, "try to read beyond literal buffer");
598 while (op
< oLitEnd
) *op
++ = *(*litPtr
)++;
601 if (sequence
.offset
> (size_t)(oLitEnd
- base
)) {
602 /* offset beyond prefix */
603 RETURN_ERROR_IF(sequence
.offset
> (size_t)(oLitEnd
- vBase
),corruption_detected
);
604 match
= dictEnd
- (base
-match
);
605 if (match
+ sequence
.matchLength
<= dictEnd
) {
606 memmove(oLitEnd
, match
, sequence
.matchLength
);
607 return sequenceLength
;
609 /* span extDict & currentPrefixSegment */
610 { size_t const length1
= dictEnd
- match
;
611 memmove(oLitEnd
, match
, length1
);
612 op
= oLitEnd
+ length1
;
613 sequence
.matchLength
-= length1
;
616 while (op
< oMatchEnd
) *op
++ = *match
++;
617 return sequenceLength
;
622 size_t ZSTD_execSequence(BYTE
* op
,
623 BYTE
* const oend
, seq_t sequence
,
624 const BYTE
** litPtr
, const BYTE
* const litLimit
,
625 const BYTE
* const prefixStart
, const BYTE
* const virtualStart
, const BYTE
* const dictEnd
)
627 BYTE
* const oLitEnd
= op
+ sequence
.litLength
;
628 size_t const sequenceLength
= sequence
.litLength
+ sequence
.matchLength
;
629 BYTE
* const oMatchEnd
= op
+ sequenceLength
; /* risk : address space overflow (32-bits) */
630 BYTE
* const oend_w
= oend
- WILDCOPY_OVERLENGTH
;
631 const BYTE
* const iLitEnd
= *litPtr
+ sequence
.litLength
;
632 const BYTE
* match
= oLitEnd
- sequence
.offset
;
635 RETURN_ERROR_IF(oMatchEnd
>oend
, dstSize_tooSmall
, "last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend");
636 RETURN_ERROR_IF(iLitEnd
> litLimit
, corruption_detected
, "over-read beyond lit buffer");
637 if (oLitEnd
>oend_w
) return ZSTD_execSequenceLast7(op
, oend
, sequence
, litPtr
, litLimit
, prefixStart
, virtualStart
, dictEnd
);
640 ZSTD_copy8(op
, *litPtr
);
641 if (sequence
.litLength
> 8)
642 ZSTD_wildcopy(op
+8, (*litPtr
)+8, sequence
.litLength
- 8); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
644 *litPtr
= iLitEnd
; /* update for next sequence */
647 if (sequence
.offset
> (size_t)(oLitEnd
- prefixStart
)) {
648 /* offset beyond prefix -> go into extDict */
649 RETURN_ERROR_IF(sequence
.offset
> (size_t)(oLitEnd
- virtualStart
), corruption_detected
);
650 match
= dictEnd
+ (match
- prefixStart
);
651 if (match
+ sequence
.matchLength
<= dictEnd
) {
652 memmove(oLitEnd
, match
, sequence
.matchLength
);
653 return sequenceLength
;
655 /* span extDict & currentPrefixSegment */
656 { size_t const length1
= dictEnd
- match
;
657 memmove(oLitEnd
, match
, length1
);
658 op
= oLitEnd
+ length1
;
659 sequence
.matchLength
-= length1
;
661 if (op
> oend_w
|| sequence
.matchLength
< MINMATCH
) {
663 for (i
= 0; i
< sequence
.matchLength
; ++i
) op
[i
] = match
[i
];
664 return sequenceLength
;
667 /* Requirement: op <= oend_w && sequence.matchLength >= MINMATCH */
669 /* match within prefix */
670 if (sequence
.offset
< 8) {
671 /* close range match, overlap */
672 static const U32 dec32table
[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
673 static const int dec64table
[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
674 int const sub2
= dec64table
[sequence
.offset
];
679 match
+= dec32table
[sequence
.offset
];
680 ZSTD_copy4(op
+4, match
);
683 ZSTD_copy8(op
, match
);
687 if (oMatchEnd
> oend
-(16-MINMATCH
)) {
689 ZSTD_wildcopy(op
, match
, oend_w
- op
);
690 match
+= oend_w
- op
;
693 while (op
< oMatchEnd
) *op
++ = *match
++;
695 ZSTD_wildcopy(op
, match
, (ptrdiff_t)sequence
.matchLength
-8); /* works even if matchLength < 8 */
697 return sequenceLength
;
702 size_t ZSTD_execSequenceLong(BYTE
* op
,
703 BYTE
* const oend
, seq_t sequence
,
704 const BYTE
** litPtr
, const BYTE
* const litLimit
,
705 const BYTE
* const prefixStart
, const BYTE
* const dictStart
, const BYTE
* const dictEnd
)
707 BYTE
* const oLitEnd
= op
+ sequence
.litLength
;
708 size_t const sequenceLength
= sequence
.litLength
+ sequence
.matchLength
;
709 BYTE
* const oMatchEnd
= op
+ sequenceLength
; /* risk : address space overflow (32-bits) */
710 BYTE
* const oend_w
= oend
- WILDCOPY_OVERLENGTH
;
711 const BYTE
* const iLitEnd
= *litPtr
+ sequence
.litLength
;
712 const BYTE
* match
= sequence
.match
;
715 RETURN_ERROR_IF(oMatchEnd
> oend
, dstSize_tooSmall
, "last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend");
716 RETURN_ERROR_IF(iLitEnd
> litLimit
, corruption_detected
, "over-read beyond lit buffer");
717 if (oLitEnd
> oend_w
) return ZSTD_execSequenceLast7(op
, oend
, sequence
, litPtr
, litLimit
, prefixStart
, dictStart
, dictEnd
);
720 ZSTD_copy8(op
, *litPtr
); /* note : op <= oLitEnd <= oend_w == oend - 8 */
721 if (sequence
.litLength
> 8)
722 ZSTD_wildcopy(op
+8, (*litPtr
)+8, sequence
.litLength
- 8); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
724 *litPtr
= iLitEnd
; /* update for next sequence */
727 if (sequence
.offset
> (size_t)(oLitEnd
- prefixStart
)) {
728 /* offset beyond prefix */
729 RETURN_ERROR_IF(sequence
.offset
> (size_t)(oLitEnd
- dictStart
), corruption_detected
);
730 if (match
+ sequence
.matchLength
<= dictEnd
) {
731 memmove(oLitEnd
, match
, sequence
.matchLength
);
732 return sequenceLength
;
734 /* span extDict & currentPrefixSegment */
735 { size_t const length1
= dictEnd
- match
;
736 memmove(oLitEnd
, match
, length1
);
737 op
= oLitEnd
+ length1
;
738 sequence
.matchLength
-= length1
;
740 if (op
> oend_w
|| sequence
.matchLength
< MINMATCH
) {
742 for (i
= 0; i
< sequence
.matchLength
; ++i
) op
[i
] = match
[i
];
743 return sequenceLength
;
746 assert(op
<= oend_w
);
747 assert(sequence
.matchLength
>= MINMATCH
);
749 /* match within prefix */
750 if (sequence
.offset
< 8) {
751 /* close range match, overlap */
752 static const U32 dec32table
[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
753 static const int dec64table
[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
754 int const sub2
= dec64table
[sequence
.offset
];
759 match
+= dec32table
[sequence
.offset
];
760 ZSTD_copy4(op
+4, match
);
763 ZSTD_copy8(op
, match
);
767 if (oMatchEnd
> oend
-(16-MINMATCH
)) {
769 ZSTD_wildcopy(op
, match
, oend_w
- op
);
770 match
+= oend_w
- op
;
773 while (op
< oMatchEnd
) *op
++ = *match
++;
775 ZSTD_wildcopy(op
, match
, (ptrdiff_t)sequence
.matchLength
-8); /* works even if matchLength < 8 */
777 return sequenceLength
;
781 ZSTD_initFseState(ZSTD_fseState
* DStatePtr
, BIT_DStream_t
* bitD
, const ZSTD_seqSymbol
* dt
)
783 const void* ptr
= dt
;
784 const ZSTD_seqSymbol_header
* const DTableH
= (const ZSTD_seqSymbol_header
*)ptr
;
785 DStatePtr
->state
= BIT_readBits(bitD
, DTableH
->tableLog
);
786 DEBUGLOG(6, "ZSTD_initFseState : val=%u using %u bits",
787 (U32
)DStatePtr
->state
, DTableH
->tableLog
);
788 BIT_reloadDStream(bitD
);
789 DStatePtr
->table
= dt
+ 1;
792 FORCE_INLINE_TEMPLATE
void
793 ZSTD_updateFseState(ZSTD_fseState
* DStatePtr
, BIT_DStream_t
* bitD
)
795 ZSTD_seqSymbol
const DInfo
= DStatePtr
->table
[DStatePtr
->state
];
796 U32
const nbBits
= DInfo
.nbBits
;
797 size_t const lowBits
= BIT_readBits(bitD
, nbBits
);
798 DStatePtr
->state
= DInfo
.nextState
+ lowBits
;
801 /* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
802 * offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1)
803 * bits before reloading. This value is the maximum number of bytes we read
804 * after reloading when we are decoding long offsets.
806 #define LONG_OFFSETS_MAX_EXTRA_BITS_32 \
807 (ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32 \
808 ? ZSTD_WINDOWLOG_MAX_32 - STREAM_ACCUMULATOR_MIN_32 \
811 typedef enum { ZSTD_lo_isRegularOffset
, ZSTD_lo_isLongOffset
=1 } ZSTD_longOffset_e
;
813 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
814 FORCE_INLINE_TEMPLATE seq_t
815 ZSTD_decodeSequence(seqState_t
* seqState
, const ZSTD_longOffset_e longOffsets
)
818 U32
const llBits
= seqState
->stateLL
.table
[seqState
->stateLL
.state
].nbAdditionalBits
;
819 U32
const mlBits
= seqState
->stateML
.table
[seqState
->stateML
.state
].nbAdditionalBits
;
820 U32
const ofBits
= seqState
->stateOffb
.table
[seqState
->stateOffb
.state
].nbAdditionalBits
;
821 U32
const totalBits
= llBits
+mlBits
+ofBits
;
822 U32
const llBase
= seqState
->stateLL
.table
[seqState
->stateLL
.state
].baseValue
;
823 U32
const mlBase
= seqState
->stateML
.table
[seqState
->stateML
.state
].baseValue
;
824 U32
const ofBase
= seqState
->stateOffb
.table
[seqState
->stateOffb
.state
].baseValue
;
831 ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset
== 1);
832 ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32
== 5);
833 assert(ofBits
<= MaxOff
);
834 if (MEM_32bits() && longOffsets
&& (ofBits
>= STREAM_ACCUMULATOR_MIN_32
)) {
835 U32
const extraBits
= ofBits
- MIN(ofBits
, 32 - seqState
->DStream
.bitsConsumed
);
836 offset
= ofBase
+ (BIT_readBitsFast(&seqState
->DStream
, ofBits
- extraBits
) << extraBits
);
837 BIT_reloadDStream(&seqState
->DStream
);
838 if (extraBits
) offset
+= BIT_readBitsFast(&seqState
->DStream
, extraBits
);
839 assert(extraBits
<= LONG_OFFSETS_MAX_EXTRA_BITS_32
); /* to avoid another reload */
841 offset
= ofBase
+ BIT_readBitsFast(&seqState
->DStream
, ofBits
/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
842 if (MEM_32bits()) BIT_reloadDStream(&seqState
->DStream
);
847 offset
+= (llBase
==0);
849 size_t temp
= (offset
==3) ? seqState
->prevOffset
[0] - 1 : seqState
->prevOffset
[offset
];
850 temp
+= !temp
; /* 0 is not valid; input is corrupted; force offset to 1 */
851 if (offset
!= 1) seqState
->prevOffset
[2] = seqState
->prevOffset
[1];
852 seqState
->prevOffset
[1] = seqState
->prevOffset
[0];
853 seqState
->prevOffset
[0] = offset
= temp
;
854 } else { /* offset == 0 */
855 offset
= seqState
->prevOffset
[0];
858 seqState
->prevOffset
[2] = seqState
->prevOffset
[1];
859 seqState
->prevOffset
[1] = seqState
->prevOffset
[0];
860 seqState
->prevOffset
[0] = offset
;
865 seq
.matchLength
= mlBase
866 + ((mlBits
>0) ? BIT_readBitsFast(&seqState
->DStream
, mlBits
/*>0*/) : 0); /* <= 16 bits */
867 if (MEM_32bits() && (mlBits
+llBits
>= STREAM_ACCUMULATOR_MIN_32
-LONG_OFFSETS_MAX_EXTRA_BITS_32
))
868 BIT_reloadDStream(&seqState
->DStream
);
869 if (MEM_64bits() && (totalBits
>= STREAM_ACCUMULATOR_MIN_64
-(LLFSELog
+MLFSELog
+OffFSELog
)))
870 BIT_reloadDStream(&seqState
->DStream
);
871 /* Ensure there are enough bits to read the rest of data in 64-bit mode. */
872 ZSTD_STATIC_ASSERT(16+LLFSELog
+MLFSELog
+OffFSELog
< STREAM_ACCUMULATOR_MIN_64
);
874 seq
.litLength
= llBase
875 + ((llBits
>0) ? BIT_readBitsFast(&seqState
->DStream
, llBits
/*>0*/) : 0); /* <= 16 bits */
877 BIT_reloadDStream(&seqState
->DStream
);
879 DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
880 (U32
)seq
.litLength
, (U32
)seq
.matchLength
, (U32
)seq
.offset
);
882 /* ANS state update */
883 ZSTD_updateFseState(&seqState
->stateLL
, &seqState
->DStream
); /* <= 9 bits */
884 ZSTD_updateFseState(&seqState
->stateML
, &seqState
->DStream
); /* <= 9 bits */
885 if (MEM_32bits()) BIT_reloadDStream(&seqState
->DStream
); /* <= 18 bits */
886 ZSTD_updateFseState(&seqState
->stateOffb
, &seqState
->DStream
); /* <= 8 bits */
891 FORCE_INLINE_TEMPLATE
size_t
892 ZSTD_decompressSequences_body( ZSTD_DCtx
* dctx
,
893 void* dst
, size_t maxDstSize
,
894 const void* seqStart
, size_t seqSize
, int nbSeq
,
895 const ZSTD_longOffset_e isLongOffset
)
897 const BYTE
* ip
= (const BYTE
*)seqStart
;
898 const BYTE
* const iend
= ip
+ seqSize
;
899 BYTE
* const ostart
= (BYTE
* const)dst
;
900 BYTE
* const oend
= ostart
+ maxDstSize
;
902 const BYTE
* litPtr
= dctx
->litPtr
;
903 const BYTE
* const litEnd
= litPtr
+ dctx
->litSize
;
904 const BYTE
* const prefixStart
= (const BYTE
*) (dctx
->prefixStart
);
905 const BYTE
* const vBase
= (const BYTE
*) (dctx
->virtualStart
);
906 const BYTE
* const dictEnd
= (const BYTE
*) (dctx
->dictEnd
);
907 DEBUGLOG(5, "ZSTD_decompressSequences_body");
909 /* Regen sequences */
912 dctx
->fseEntropy
= 1;
913 { U32 i
; for (i
=0; i
<ZSTD_REP_NUM
; i
++) seqState
.prevOffset
[i
] = dctx
->entropy
.rep
[i
]; }
915 ERR_isError(BIT_initDStream(&seqState
.DStream
, ip
, iend
-ip
)),
916 corruption_detected
);
917 ZSTD_initFseState(&seqState
.stateLL
, &seqState
.DStream
, dctx
->LLTptr
);
918 ZSTD_initFseState(&seqState
.stateOffb
, &seqState
.DStream
, dctx
->OFTptr
);
919 ZSTD_initFseState(&seqState
.stateML
, &seqState
.DStream
, dctx
->MLTptr
);
921 for ( ; (BIT_reloadDStream(&(seqState
.DStream
)) <= BIT_DStream_completed
) && nbSeq
; ) {
923 { seq_t
const sequence
= ZSTD_decodeSequence(&seqState
, isLongOffset
);
924 size_t const oneSeqSize
= ZSTD_execSequence(op
, oend
, sequence
, &litPtr
, litEnd
, prefixStart
, vBase
, dictEnd
);
925 DEBUGLOG(6, "regenerated sequence size : %u", (U32
)oneSeqSize
);
926 if (ZSTD_isError(oneSeqSize
)) return oneSeqSize
;
930 /* check if reached exact end */
931 DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq
);
932 RETURN_ERROR_IF(nbSeq
, corruption_detected
);
933 /* save reps for next block */
934 { U32 i
; for (i
=0; i
<ZSTD_REP_NUM
; i
++) dctx
->entropy
.rep
[i
] = (U32
)(seqState
.prevOffset
[i
]); }
937 /* last literal segment */
938 { size_t const lastLLSize
= litEnd
- litPtr
;
939 RETURN_ERROR_IF(lastLLSize
> (size_t)(oend
-op
), dstSize_tooSmall
);
940 memcpy(op
, litPtr
, lastLLSize
);
948 ZSTD_decompressSequences_default(ZSTD_DCtx
* dctx
,
949 void* dst
, size_t maxDstSize
,
950 const void* seqStart
, size_t seqSize
, int nbSeq
,
951 const ZSTD_longOffset_e isLongOffset
)
953 return ZSTD_decompressSequences_body(dctx
, dst
, maxDstSize
, seqStart
, seqSize
, nbSeq
, isLongOffset
);
955 #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
959 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
960 FORCE_INLINE_TEMPLATE seq_t
961 ZSTD_decodeSequenceLong(seqState_t
* seqState
, ZSTD_longOffset_e
const longOffsets
)
964 U32
const llBits
= seqState
->stateLL
.table
[seqState
->stateLL
.state
].nbAdditionalBits
;
965 U32
const mlBits
= seqState
->stateML
.table
[seqState
->stateML
.state
].nbAdditionalBits
;
966 U32
const ofBits
= seqState
->stateOffb
.table
[seqState
->stateOffb
.state
].nbAdditionalBits
;
967 U32
const totalBits
= llBits
+mlBits
+ofBits
;
968 U32
const llBase
= seqState
->stateLL
.table
[seqState
->stateLL
.state
].baseValue
;
969 U32
const mlBase
= seqState
->stateML
.table
[seqState
->stateML
.state
].baseValue
;
970 U32
const ofBase
= seqState
->stateOffb
.table
[seqState
->stateOffb
.state
].baseValue
;
977 ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset
== 1);
978 ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32
== 5);
979 assert(ofBits
<= MaxOff
);
980 if (MEM_32bits() && longOffsets
) {
981 U32
const extraBits
= ofBits
- MIN(ofBits
, STREAM_ACCUMULATOR_MIN_32
-1);
982 offset
= ofBase
+ (BIT_readBitsFast(&seqState
->DStream
, ofBits
- extraBits
) << extraBits
);
983 if (MEM_32bits() || extraBits
) BIT_reloadDStream(&seqState
->DStream
);
984 if (extraBits
) offset
+= BIT_readBitsFast(&seqState
->DStream
, extraBits
);
986 offset
= ofBase
+ BIT_readBitsFast(&seqState
->DStream
, ofBits
); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
987 if (MEM_32bits()) BIT_reloadDStream(&seqState
->DStream
);
992 offset
+= (llBase
==0);
994 size_t temp
= (offset
==3) ? seqState
->prevOffset
[0] - 1 : seqState
->prevOffset
[offset
];
995 temp
+= !temp
; /* 0 is not valid; input is corrupted; force offset to 1 */
996 if (offset
!= 1) seqState
->prevOffset
[2] = seqState
->prevOffset
[1];
997 seqState
->prevOffset
[1] = seqState
->prevOffset
[0];
998 seqState
->prevOffset
[0] = offset
= temp
;
1000 offset
= seqState
->prevOffset
[0];
1003 seqState
->prevOffset
[2] = seqState
->prevOffset
[1];
1004 seqState
->prevOffset
[1] = seqState
->prevOffset
[0];
1005 seqState
->prevOffset
[0] = offset
;
1007 seq
.offset
= offset
;
1010 seq
.matchLength
= mlBase
+ ((mlBits
>0) ? BIT_readBitsFast(&seqState
->DStream
, mlBits
) : 0); /* <= 16 bits */
1011 if (MEM_32bits() && (mlBits
+llBits
>= STREAM_ACCUMULATOR_MIN_32
-LONG_OFFSETS_MAX_EXTRA_BITS_32
))
1012 BIT_reloadDStream(&seqState
->DStream
);
1013 if (MEM_64bits() && (totalBits
>= STREAM_ACCUMULATOR_MIN_64
-(LLFSELog
+MLFSELog
+OffFSELog
)))
1014 BIT_reloadDStream(&seqState
->DStream
);
1015 /* Verify that there is enough bits to read the rest of the data in 64-bit mode. */
1016 ZSTD_STATIC_ASSERT(16+LLFSELog
+MLFSELog
+OffFSELog
< STREAM_ACCUMULATOR_MIN_64
);
1018 seq
.litLength
= llBase
+ ((llBits
>0) ? BIT_readBitsFast(&seqState
->DStream
, llBits
) : 0); /* <= 16 bits */
1020 BIT_reloadDStream(&seqState
->DStream
);
1022 { size_t const pos
= seqState
->pos
+ seq
.litLength
;
1023 const BYTE
* const matchBase
= (seq
.offset
> pos
) ? seqState
->dictEnd
: seqState
->prefixStart
;
1024 seq
.match
= matchBase
+ pos
- seq
.offset
; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
1025 * No consequence though : no memory access will occur, overly large offset will be detected in ZSTD_execSequenceLong() */
1026 seqState
->pos
= pos
+ seq
.matchLength
;
1029 /* ANS state update */
1030 ZSTD_updateFseState(&seqState
->stateLL
, &seqState
->DStream
); /* <= 9 bits */
1031 ZSTD_updateFseState(&seqState
->stateML
, &seqState
->DStream
); /* <= 9 bits */
1032 if (MEM_32bits()) BIT_reloadDStream(&seqState
->DStream
); /* <= 18 bits */
1033 ZSTD_updateFseState(&seqState
->stateOffb
, &seqState
->DStream
); /* <= 8 bits */
1038 FORCE_INLINE_TEMPLATE
size_t
1039 ZSTD_decompressSequencesLong_body(
1041 void* dst
, size_t maxDstSize
,
1042 const void* seqStart
, size_t seqSize
, int nbSeq
,
1043 const ZSTD_longOffset_e isLongOffset
)
1045 const BYTE
* ip
= (const BYTE
*)seqStart
;
1046 const BYTE
* const iend
= ip
+ seqSize
;
1047 BYTE
* const ostart
= (BYTE
* const)dst
;
1048 BYTE
* const oend
= ostart
+ maxDstSize
;
1050 const BYTE
* litPtr
= dctx
->litPtr
;
1051 const BYTE
* const litEnd
= litPtr
+ dctx
->litSize
;
1052 const BYTE
* const prefixStart
= (const BYTE
*) (dctx
->prefixStart
);
1053 const BYTE
* const dictStart
= (const BYTE
*) (dctx
->virtualStart
);
1054 const BYTE
* const dictEnd
= (const BYTE
*) (dctx
->dictEnd
);
1056 /* Regen sequences */
1058 #define STORED_SEQS 4
1059 #define STORED_SEQS_MASK (STORED_SEQS-1)
1060 #define ADVANCED_SEQS 4
1061 seq_t sequences
[STORED_SEQS
];
1062 int const seqAdvance
= MIN(nbSeq
, ADVANCED_SEQS
);
1063 seqState_t seqState
;
1065 dctx
->fseEntropy
= 1;
1066 { int i
; for (i
=0; i
<ZSTD_REP_NUM
; i
++) seqState
.prevOffset
[i
] = dctx
->entropy
.rep
[i
]; }
1067 seqState
.prefixStart
= prefixStart
;
1068 seqState
.pos
= (size_t)(op
-prefixStart
);
1069 seqState
.dictEnd
= dictEnd
;
1072 ERR_isError(BIT_initDStream(&seqState
.DStream
, ip
, iend
-ip
)),
1073 corruption_detected
);
1074 ZSTD_initFseState(&seqState
.stateLL
, &seqState
.DStream
, dctx
->LLTptr
);
1075 ZSTD_initFseState(&seqState
.stateOffb
, &seqState
.DStream
, dctx
->OFTptr
);
1076 ZSTD_initFseState(&seqState
.stateML
, &seqState
.DStream
, dctx
->MLTptr
);
1078 /* prepare in advance */
1079 for (seqNb
=0; (BIT_reloadDStream(&seqState
.DStream
) <= BIT_DStream_completed
) && (seqNb
<seqAdvance
); seqNb
++) {
1080 sequences
[seqNb
] = ZSTD_decodeSequenceLong(&seqState
, isLongOffset
);
1081 PREFETCH_L1(sequences
[seqNb
].match
); PREFETCH_L1(sequences
[seqNb
].match
+ sequences
[seqNb
].matchLength
- 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
1083 RETURN_ERROR_IF(seqNb
<seqAdvance
, corruption_detected
);
1085 /* decode and decompress */
1086 for ( ; (BIT_reloadDStream(&(seqState
.DStream
)) <= BIT_DStream_completed
) && (seqNb
<nbSeq
) ; seqNb
++) {
1087 seq_t
const sequence
= ZSTD_decodeSequenceLong(&seqState
, isLongOffset
);
1088 size_t const oneSeqSize
= ZSTD_execSequenceLong(op
, oend
, sequences
[(seqNb
-ADVANCED_SEQS
) & STORED_SEQS_MASK
], &litPtr
, litEnd
, prefixStart
, dictStart
, dictEnd
);
1089 if (ZSTD_isError(oneSeqSize
)) return oneSeqSize
;
1090 PREFETCH_L1(sequence
.match
); PREFETCH_L1(sequence
.match
+ sequence
.matchLength
- 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
1091 sequences
[seqNb
& STORED_SEQS_MASK
] = sequence
;
1094 RETURN_ERROR_IF(seqNb
<nbSeq
, corruption_detected
);
1097 seqNb
-= seqAdvance
;
1098 for ( ; seqNb
<nbSeq
; seqNb
++) {
1099 size_t const oneSeqSize
= ZSTD_execSequenceLong(op
, oend
, sequences
[seqNb
&STORED_SEQS_MASK
], &litPtr
, litEnd
, prefixStart
, dictStart
, dictEnd
);
1100 if (ZSTD_isError(oneSeqSize
)) return oneSeqSize
;
1104 /* save reps for next block */
1105 { U32 i
; for (i
=0; i
<ZSTD_REP_NUM
; i
++) dctx
->entropy
.rep
[i
] = (U32
)(seqState
.prevOffset
[i
]); }
1108 /* last literal segment */
1109 { size_t const lastLLSize
= litEnd
- litPtr
;
1110 RETURN_ERROR_IF(lastLLSize
> (size_t)(oend
-op
), dstSize_tooSmall
);
1111 memcpy(op
, litPtr
, lastLLSize
);
1119 ZSTD_decompressSequencesLong_default(ZSTD_DCtx
* dctx
,
1120 void* dst
, size_t maxDstSize
,
1121 const void* seqStart
, size_t seqSize
, int nbSeq
,
1122 const ZSTD_longOffset_e isLongOffset
)
1124 return ZSTD_decompressSequencesLong_body(dctx
, dst
, maxDstSize
, seqStart
, seqSize
, nbSeq
, isLongOffset
);
1126 #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
1132 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
1133 static TARGET_ATTRIBUTE("bmi2") size_t
1134 ZSTD_decompressSequences_bmi2(ZSTD_DCtx
* dctx
,
1135 void* dst
, size_t maxDstSize
,
1136 const void* seqStart
, size_t seqSize
, int nbSeq
,
1137 const ZSTD_longOffset_e isLongOffset
)
1139 return ZSTD_decompressSequences_body(dctx
, dst
, maxDstSize
, seqStart
, seqSize
, nbSeq
, isLongOffset
);
1141 #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
1143 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
1144 static TARGET_ATTRIBUTE("bmi2") size_t
1145 ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx
* dctx
,
1146 void* dst
, size_t maxDstSize
,
1147 const void* seqStart
, size_t seqSize
, int nbSeq
,
1148 const ZSTD_longOffset_e isLongOffset
)
1150 return ZSTD_decompressSequencesLong_body(dctx
, dst
, maxDstSize
, seqStart
, seqSize
, nbSeq
, isLongOffset
);
1152 #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
1154 #endif /* DYNAMIC_BMI2 */
1156 typedef size_t (*ZSTD_decompressSequences_t
)(
1158 void* dst
, size_t maxDstSize
,
1159 const void* seqStart
, size_t seqSize
, int nbSeq
,
1160 const ZSTD_longOffset_e isLongOffset
);
1162 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
1164 ZSTD_decompressSequences(ZSTD_DCtx
* dctx
, void* dst
, size_t maxDstSize
,
1165 const void* seqStart
, size_t seqSize
, int nbSeq
,
1166 const ZSTD_longOffset_e isLongOffset
)
1168 DEBUGLOG(5, "ZSTD_decompressSequences");
1171 return ZSTD_decompressSequences_bmi2(dctx
, dst
, maxDstSize
, seqStart
, seqSize
, nbSeq
, isLongOffset
);
1174 return ZSTD_decompressSequences_default(dctx
, dst
, maxDstSize
, seqStart
, seqSize
, nbSeq
, isLongOffset
);
1176 #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
1179 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
1180 /* ZSTD_decompressSequencesLong() :
1181 * decompression function triggered when a minimum share of offsets is considered "long",
1183 * note : "long" definition seems overloaded here, sometimes meaning "wider than bitstream register", and sometimes meaning "farther than memory cache distance".
1184 * This function will try to mitigate main memory latency through the use of prefetching */
1186 ZSTD_decompressSequencesLong(ZSTD_DCtx
* dctx
,
1187 void* dst
, size_t maxDstSize
,
1188 const void* seqStart
, size_t seqSize
, int nbSeq
,
1189 const ZSTD_longOffset_e isLongOffset
)
1191 DEBUGLOG(5, "ZSTD_decompressSequencesLong");
1194 return ZSTD_decompressSequencesLong_bmi2(dctx
, dst
, maxDstSize
, seqStart
, seqSize
, nbSeq
, isLongOffset
);
1197 return ZSTD_decompressSequencesLong_default(dctx
, dst
, maxDstSize
, seqStart
, seqSize
, nbSeq
, isLongOffset
);
1199 #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
1203 #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
1204 !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
1205 /* ZSTD_getLongOffsetsShare() :
1206 * condition : offTable must be valid
1207 * @return : "share" of long offsets (arbitrarily defined as > (1<<23))
1208 * compared to maximum possible of (1<<OffFSELog) */
1210 ZSTD_getLongOffsetsShare(const ZSTD_seqSymbol
* offTable
)
1212 const void* ptr
= offTable
;
1213 U32
const tableLog
= ((const ZSTD_seqSymbol_header
*)ptr
)[0].tableLog
;
1214 const ZSTD_seqSymbol
* table
= offTable
+ 1;
1215 U32
const max
= 1 << tableLog
;
1217 DEBUGLOG(5, "ZSTD_getLongOffsetsShare: (tableLog=%u)", tableLog
);
1219 assert(max
<= (1 << OffFSELog
)); /* max not too large */
1220 for (u
=0; u
<max
; u
++) {
1221 if (table
[u
].nbAdditionalBits
> 22) total
+= 1;
1224 assert(tableLog
<= OffFSELog
);
1225 total
<<= (OffFSELog
- tableLog
); /* scale to OffFSELog */
1233 ZSTD_decompressBlock_internal(ZSTD_DCtx
* dctx
,
1234 void* dst
, size_t dstCapacity
,
1235 const void* src
, size_t srcSize
, const int frame
)
1236 { /* blockType == blockCompressed */
1237 const BYTE
* ip
= (const BYTE
*)src
;
1238 /* isLongOffset must be true if there are long offsets.
1239 * Offsets are long if they are larger than 2^STREAM_ACCUMULATOR_MIN.
1240 * We don't expect that to be the case in 64-bit mode.
1241 * In block mode, window size is not known, so we have to be conservative.
1242 * (note: but it could be evaluated from current-lowLimit)
1244 ZSTD_longOffset_e
const isLongOffset
= (ZSTD_longOffset_e
)(MEM_32bits() && (!frame
|| (dctx
->fParams
.windowSize
> (1ULL << STREAM_ACCUMULATOR_MIN
))));
1245 DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32
)srcSize
);
1247 RETURN_ERROR_IF(srcSize
>= ZSTD_BLOCKSIZE_MAX
, srcSize_wrong
);
1249 /* Decode literals section */
1250 { size_t const litCSize
= ZSTD_decodeLiteralsBlock(dctx
, src
, srcSize
);
1251 DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : %u", (U32
)litCSize
);
1252 if (ZSTD_isError(litCSize
)) return litCSize
;
1254 srcSize
-= litCSize
;
1257 /* Build Decoding Tables */
1259 /* These macros control at build-time which decompressor implementation
1260 * we use. If neither is defined, we do some inspection and dispatch at
1263 #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
1264 !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
1265 int usePrefetchDecoder
= dctx
->ddictIsCold
;
1268 size_t const seqHSize
= ZSTD_decodeSeqHeaders(dctx
, &nbSeq
, ip
, srcSize
);
1269 if (ZSTD_isError(seqHSize
)) return seqHSize
;
1271 srcSize
-= seqHSize
;
1273 #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
1274 !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
1275 if ( !usePrefetchDecoder
1276 && (!frame
|| (dctx
->fParams
.windowSize
> (1<<24)))
1277 && (nbSeq
>ADVANCED_SEQS
) ) { /* could probably use a larger nbSeq limit */
1278 U32
const shareLongOffsets
= ZSTD_getLongOffsetsShare(dctx
->OFTptr
);
1279 U32
const minShare
= MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */
1280 usePrefetchDecoder
= (shareLongOffsets
>= minShare
);
1284 dctx
->ddictIsCold
= 0;
1286 #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
1287 !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
1288 if (usePrefetchDecoder
)
1290 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
1291 return ZSTD_decompressSequencesLong(dctx
, dst
, dstCapacity
, ip
, srcSize
, nbSeq
, isLongOffset
);
1294 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
1296 return ZSTD_decompressSequences(dctx
, dst
, dstCapacity
, ip
, srcSize
, nbSeq
, isLongOffset
);
1302 size_t ZSTD_decompressBlock(ZSTD_DCtx
* dctx
,
1303 void* dst
, size_t dstCapacity
,
1304 const void* src
, size_t srcSize
)
1307 ZSTD_checkContinuity(dctx
, dst
);
1308 dSize
= ZSTD_decompressBlock_internal(dctx
, dst
, dstCapacity
, src
, srcSize
, /* frame */ 0);
1309 dctx
->previousDstEnd
= (char*)dst
+ dSize
;