2 * Copyright (c) 2017-present, Facebook, Inc.
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * in the COPYING file in the root directory of this source tree).
8 * You may select, at your option, one of the above-listed licenses.
11 /* *********************************************************
12 * Turn on Large Files support (>4GB) for 32-bit Linux/Unix
13 ***********************************************************/
14 #if !defined(__64BIT__) || defined(__MINGW32__) /* No point defining Large file for 64 bit but MinGW-w64 requires it */
15 # if !defined(_FILE_OFFSET_BITS)
16 # define _FILE_OFFSET_BITS 64 /* turn off_t into a 64-bit type for ftello, fseeko */
18 # if !defined(_LARGEFILE_SOURCE) /* obsolete macro, replaced with _FILE_OFFSET_BITS */
19 # define _LARGEFILE_SOURCE 1 /* Large File Support extension (LFS) - fseeko, ftello */
21 # if defined(_AIX) || defined(__hpux)
22 # define _LARGE_FILES /* Large file support on 32-bits AIX and HP-UX */
26 /* ************************************************************
27 * Avoid fseek()'s 2GiB barrier with MSVC, macOS, *BSD, MinGW
28 ***************************************************************/
29 #if defined(_MSC_VER) && _MSC_VER >= 1400
30 # define LONG_SEEK _fseeki64
31 #elif !defined(__64BIT__) && (PLATFORM_POSIX_VERSION >= 200112L) /* No point defining Large file for 64 bit */
32 # define LONG_SEEK fseeko
33 #elif defined(__MINGW32__) && !defined(__STRICT_ANSI__) && !defined(__NO_MINGW_LFS) && defined(__MSVCRT__)
34 # define LONG_SEEK fseeko64
35 #elif defined(_WIN32) && !defined(__DJGPP__)
37 static int LONG_SEEK(FILE* file
, __int64 offset
, int origin
) {
40 off
.QuadPart
= offset
;
41 if (origin
== SEEK_END
)
43 else if (origin
== SEEK_CUR
)
44 method
= FILE_CURRENT
;
48 if (SetFilePointerEx((HANDLE
) _get_osfhandle(_fileno(file
)), off
, NULL
, method
))
54 # define LONG_SEEK fseek
57 #include <stdlib.h> /* malloc, free */
58 #include <stdio.h> /* FILE* */
59 #include <limits.h> /* UNIT_MAX */
62 #define XXH_STATIC_LINKING_ONLY
63 #define XXH_NAMESPACE ZSTD_
66 #define ZSTD_STATIC_LINKING_ONLY
68 #include "zstd_errors.h"
70 #include "zstd_seekable.h"
73 #define ERROR(name) ((size_t)-ZSTD_error_##name)
75 #define CHECK_IO(f) { int const errcod = (f); if (errcod < 0) return ERROR(seekableIO); }
79 #define MIN(a, b) ((a) < (b) ? (a) : (b))
80 #define MAX(a, b) ((a) > (b) ? (a) : (b))
82 /* Special-case callbacks for FILE* and in-memory modes, so that we can treat
83 * them the same way as the advanced API */
84 static int ZSTD_seekable_read_FILE(void* opaque
, void* buffer
, size_t n
)
86 size_t const result
= fread(buffer
, 1, n
, (FILE*)opaque
);
93 static int ZSTD_seekable_seek_FILE(void* opaque
, long long offset
, int origin
)
95 int const ret
= LONG_SEEK((FILE*)opaque
, offset
, origin
);
97 return fflush((FILE*)opaque
);
106 static int ZSTD_seekable_read_buff(void* opaque
, void* buffer
, size_t n
)
108 buffWrapper_t
* buff
= (buffWrapper_t
*) opaque
;
109 if (buff
->pos
+ n
> buff
->size
) return -1;
110 memcpy(buffer
, (const BYTE
*)buff
->ptr
+ buff
->pos
, n
);
115 static int ZSTD_seekable_seek_buff(void* opaque
, long long offset
, int origin
)
117 buffWrapper_t
* const buff
= (buffWrapper_t
*) opaque
;
118 unsigned long long newOffset
;
124 newOffset
= (unsigned long long)buff
->pos
+ offset
;
127 newOffset
= (unsigned long long)buff
->size
+ offset
;
130 assert(0); /* not possible */
132 if (newOffset
> buff
->size
) {
135 buff
->pos
= newOffset
;
146 seekEntry_t
* entries
;
152 #define SEEKABLE_BUFF_SIZE ZSTD_BLOCKSIZE_MAX
154 struct ZSTD_seekable_s
{
155 ZSTD_DStream
* dstream
;
156 seekTable_t seekTable
;
157 ZSTD_seekable_customFile src
;
159 U64 decompressedOffset
;
162 BYTE inBuff
[SEEKABLE_BUFF_SIZE
]; /* need to do our own input buffering */
163 BYTE outBuff
[SEEKABLE_BUFF_SIZE
]; /* so we can efficiently decompress the
164 starts of chunks before we get to the
166 ZSTD_inBuffer in
; /* maintain continuity across ZSTD_seekable_decompress operations */
167 buffWrapper_t buffWrapper
; /* for `src.opaque` in in-memory mode */
169 XXH64_state_t xxhState
;
172 ZSTD_seekable
* ZSTD_seekable_create(void)
174 ZSTD_seekable
* zs
= malloc(sizeof(ZSTD_seekable
));
176 if (zs
== NULL
) return NULL
;
178 /* also initializes stage to zsds_init */
179 memset(zs
, 0, sizeof(*zs
));
181 zs
->dstream
= ZSTD_createDStream();
182 if (zs
->dstream
== NULL
) {
190 size_t ZSTD_seekable_free(ZSTD_seekable
* zs
)
192 if (zs
== NULL
) return 0; /* support free on null */
193 ZSTD_freeDStream(zs
->dstream
);
194 free(zs
->seekTable
.entries
);
200 /** ZSTD_seekable_offsetToFrameIndex() :
201 * Performs a binary search to find the last frame with a decompressed offset
203 * @return : the frame's index */
204 unsigned ZSTD_seekable_offsetToFrameIndex(ZSTD_seekable
* const zs
, unsigned long long pos
)
207 U32 hi
= (U32
)zs
->seekTable
.tableLen
;
208 assert(zs
->seekTable
.tableLen
<= UINT_MAX
);
210 if (pos
>= zs
->seekTable
.entries
[zs
->seekTable
.tableLen
].dOffset
) {
211 return (U32
)zs
->seekTable
.tableLen
;
214 while (lo
+ 1 < hi
) {
215 U32
const mid
= lo
+ ((hi
- lo
) >> 1);
216 if (zs
->seekTable
.entries
[mid
].dOffset
<= pos
) {
225 unsigned ZSTD_seekable_getNumFrames(ZSTD_seekable
* const zs
)
227 assert(zs
->seekTable
.tableLen
<= UINT_MAX
);
228 return (unsigned)zs
->seekTable
.tableLen
;
231 unsigned long long ZSTD_seekable_getFrameCompressedOffset(ZSTD_seekable
* const zs
, unsigned frameIndex
)
233 if (frameIndex
>= zs
->seekTable
.tableLen
) return ZSTD_SEEKABLE_FRAMEINDEX_TOOLARGE
;
234 return zs
->seekTable
.entries
[frameIndex
].cOffset
;
237 unsigned long long ZSTD_seekable_getFrameDecompressedOffset(ZSTD_seekable
* const zs
, unsigned frameIndex
)
239 if (frameIndex
>= zs
->seekTable
.tableLen
) return ZSTD_SEEKABLE_FRAMEINDEX_TOOLARGE
;
240 return zs
->seekTable
.entries
[frameIndex
].dOffset
;
243 size_t ZSTD_seekable_getFrameCompressedSize(ZSTD_seekable
* const zs
, unsigned frameIndex
)
245 if (frameIndex
>= zs
->seekTable
.tableLen
) return ERROR(frameIndex_tooLarge
);
246 return zs
->seekTable
.entries
[frameIndex
+ 1].cOffset
-
247 zs
->seekTable
.entries
[frameIndex
].cOffset
;
250 size_t ZSTD_seekable_getFrameDecompressedSize(ZSTD_seekable
* const zs
, unsigned frameIndex
)
252 if (frameIndex
> zs
->seekTable
.tableLen
) return ERROR(frameIndex_tooLarge
);
253 return zs
->seekTable
.entries
[frameIndex
+ 1].dOffset
-
254 zs
->seekTable
.entries
[frameIndex
].dOffset
;
257 static size_t ZSTD_seekable_loadSeekTable(ZSTD_seekable
* zs
)
260 ZSTD_seekable_customFile src
= zs
->src
;
261 /* read the footer, fixed size */
262 CHECK_IO(src
.seek(src
.opaque
, -(int)ZSTD_seekTableFooterSize
, SEEK_END
));
263 CHECK_IO(src
.read(src
.opaque
, zs
->inBuff
, ZSTD_seekTableFooterSize
));
265 if (MEM_readLE32(zs
->inBuff
+ 5) != ZSTD_SEEKABLE_MAGICNUMBER
) {
266 return ERROR(prefix_unknown
);
269 { BYTE
const sfd
= zs
->inBuff
[4];
270 checksumFlag
= sfd
>> 7;
272 /* check reserved bits */
273 if ((checksumFlag
>> 2) & 0x1f) {
274 return ERROR(corruption_detected
);
278 { U32
const numFrames
= MEM_readLE32(zs
->inBuff
);
279 U32
const sizePerEntry
= 8 + (checksumFlag
?4:0);
280 U32
const tableSize
= sizePerEntry
* numFrames
;
281 U32
const frameSize
= tableSize
+ ZSTD_seekTableFooterSize
+ ZSTD_SKIPPABLEHEADERSIZE
;
283 U32 remaining
= frameSize
- ZSTD_seekTableFooterSize
; /* don't need to re-read footer */
285 U32
const toRead
= MIN(remaining
, SEEKABLE_BUFF_SIZE
);
287 CHECK_IO(src
.seek(src
.opaque
, -(S64
)frameSize
, SEEK_END
));
288 CHECK_IO(src
.read(src
.opaque
, zs
->inBuff
, toRead
));
293 if (MEM_readLE32(zs
->inBuff
) != (ZSTD_MAGIC_SKIPPABLE_START
| 0xE)) {
294 return ERROR(prefix_unknown
);
296 if (MEM_readLE32(zs
->inBuff
+4) + ZSTD_SKIPPABLEHEADERSIZE
!= frameSize
) {
297 return ERROR(prefix_unknown
);
300 { /* Allocate an extra entry at the end so that we can do size
301 * computations on the last element without special case */
302 seekEntry_t
* entries
= (seekEntry_t
*)malloc(sizeof(seekEntry_t
) * (numFrames
+ 1));
313 return ERROR(memory_allocation
);
316 /* compute cumulative positions */
317 for (; idx
< numFrames
; idx
++) {
318 if (pos
+ sizePerEntry
> SEEKABLE_BUFF_SIZE
) {
319 U32
const offset
= SEEKABLE_BUFF_SIZE
- pos
;
320 U32
const toRead
= MIN(remaining
, SEEKABLE_BUFF_SIZE
- offset
);
321 memmove(zs
->inBuff
, zs
->inBuff
+ pos
, offset
); /* move any data we haven't read yet */
322 CHECK_IO(src
.read(src
.opaque
, zs
->inBuff
+offset
, toRead
));
326 entries
[idx
].cOffset
= cOffset
;
327 entries
[idx
].dOffset
= dOffset
;
329 cOffset
+= MEM_readLE32(zs
->inBuff
+ pos
);
331 dOffset
+= MEM_readLE32(zs
->inBuff
+ pos
);
334 entries
[idx
].checksum
= MEM_readLE32(zs
->inBuff
+ pos
);
338 entries
[numFrames
].cOffset
= cOffset
;
339 entries
[numFrames
].dOffset
= dOffset
;
341 zs
->seekTable
.entries
= entries
;
342 zs
->seekTable
.tableLen
= numFrames
;
343 zs
->seekTable
.checksumFlag
= checksumFlag
;
349 size_t ZSTD_seekable_initBuff(ZSTD_seekable
* zs
, const void* src
, size_t srcSize
)
351 zs
->buffWrapper
= (buffWrapper_t
){src
, srcSize
, 0};
352 { ZSTD_seekable_customFile srcFile
= {&zs
->buffWrapper
,
353 &ZSTD_seekable_read_buff
,
354 &ZSTD_seekable_seek_buff
};
355 return ZSTD_seekable_initAdvanced(zs
, srcFile
); }
358 size_t ZSTD_seekable_initFile(ZSTD_seekable
* zs
, FILE* src
)
360 ZSTD_seekable_customFile srcFile
= {src
, &ZSTD_seekable_read_FILE
,
361 &ZSTD_seekable_seek_FILE
};
362 return ZSTD_seekable_initAdvanced(zs
, srcFile
);
365 size_t ZSTD_seekable_initAdvanced(ZSTD_seekable
* zs
, ZSTD_seekable_customFile src
)
369 { const size_t seekTableInit
= ZSTD_seekable_loadSeekTable(zs
);
370 if (ZSTD_isError(seekTableInit
)) return seekTableInit
; }
372 zs
->decompressedOffset
= (U64
)-1;
373 zs
->curFrame
= (U32
)-1;
375 { const size_t dstreamInit
= ZSTD_initDStream(zs
->dstream
);
376 if (ZSTD_isError(dstreamInit
)) return dstreamInit
; }
380 size_t ZSTD_seekable_decompress(ZSTD_seekable
* zs
, void* dst
, size_t len
, unsigned long long offset
)
382 U32 targetFrame
= ZSTD_seekable_offsetToFrameIndex(zs
, offset
);
384 /* check if we can continue from a previous decompress job */
385 if (targetFrame
!= zs
->curFrame
|| offset
!= zs
->decompressedOffset
) {
386 zs
->decompressedOffset
= zs
->seekTable
.entries
[targetFrame
].dOffset
;
387 zs
->curFrame
= targetFrame
;
389 CHECK_IO(zs
->src
.seek(zs
->src
.opaque
,
390 zs
->seekTable
.entries
[targetFrame
].cOffset
,
392 zs
->in
= (ZSTD_inBuffer
){zs
->inBuff
, 0, 0};
393 XXH64_reset(&zs
->xxhState
, 0);
394 ZSTD_resetDStream(zs
->dstream
);
397 while (zs
->decompressedOffset
< offset
+ len
) {
399 ZSTD_outBuffer outTmp
;
401 if (zs
->decompressedOffset
< offset
) {
402 /* dummy decompressions until we get to the target offset */
403 outTmp
= (ZSTD_outBuffer
){zs
->outBuff
, MIN(SEEKABLE_BUFF_SIZE
, offset
- zs
->decompressedOffset
), 0};
405 outTmp
= (ZSTD_outBuffer
){dst
, len
, zs
->decompressedOffset
- offset
};
408 prevOutPos
= outTmp
.pos
;
409 toRead
= ZSTD_decompressStream(zs
->dstream
, &outTmp
, &zs
->in
);
410 if (ZSTD_isError(toRead
)) {
414 if (zs
->seekTable
.checksumFlag
) {
415 XXH64_update(&zs
->xxhState
, (BYTE
*)outTmp
.dst
+ prevOutPos
,
416 outTmp
.pos
- prevOutPos
);
418 zs
->decompressedOffset
+= outTmp
.pos
- prevOutPos
;
423 /* verify checksum */
424 if (zs
->seekTable
.checksumFlag
&&
425 (XXH64_digest(&zs
->xxhState
) & 0xFFFFFFFFU
) !=
426 zs
->seekTable
.entries
[targetFrame
].checksum
) {
427 return ERROR(corruption_detected
);
430 if (zs
->decompressedOffset
< offset
+ len
) {
431 /* go back to the start and force a reset of the stream */
432 targetFrame
= ZSTD_seekable_offsetToFrameIndex(zs
, zs
->decompressedOffset
);
437 /* read in more data if we're done with this buffer */
438 if (zs
->in
.pos
== zs
->in
.size
) {
439 toRead
= MIN(toRead
, SEEKABLE_BUFF_SIZE
);
440 CHECK_IO(zs
->src
.read(zs
->src
.opaque
, zs
->inBuff
, toRead
));
441 zs
->in
.size
= toRead
;
445 } while (zs
->decompressedOffset
!= offset
+ len
);
450 size_t ZSTD_seekable_decompressFrame(ZSTD_seekable
* zs
, void* dst
, size_t dstSize
, unsigned frameIndex
)
452 if (frameIndex
>= zs
->seekTable
.tableLen
) {
453 return ERROR(frameIndex_tooLarge
);
457 size_t const decompressedSize
=
458 zs
->seekTable
.entries
[frameIndex
+ 1].dOffset
-
459 zs
->seekTable
.entries
[frameIndex
].dOffset
;
460 if (dstSize
< decompressedSize
) {
461 return ERROR(dstSize_tooSmall
);
463 return ZSTD_seekable_decompress(
464 zs
, dst
, decompressedSize
,
465 zs
->seekTable
.entries
[frameIndex
].dOffset
);