]>
Commit | Line | Data |
---|---|---|
9759c60f | 1 | /* |
d6c1bbdd RE |
2 | LZ4 - Fast LZ compression algorithm |
3 | Copyright (C) 2011-present, Yann Collet. | |
4 | ||
5 | BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) | |
6 | ||
7 | Redistribution and use in source and binary forms, with or without | |
8 | modification, are permitted provided that the following conditions are | |
9 | met: | |
10 | ||
11 | * Redistributions of source code must retain the above copyright | |
12 | notice, this list of conditions and the following disclaimer. | |
13 | * Redistributions in binary form must reproduce the above | |
14 | copyright notice, this list of conditions and the following disclaimer | |
15 | in the documentation and/or other materials provided with the | |
16 | distribution. | |
17 | ||
18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
19 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
20 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
21 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
22 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
23 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
24 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
25 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
26 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
27 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
29 | ||
30 | You can contact the author at : | |
31 | - LZ4 homepage : http://www.lz4.org | |
32 | - LZ4 source repository : https://github.com/lz4/lz4 | |
33 | */ | |
34 | ||
35 | /* | |
36 | * This file contains unmodified code from lz4 1.9.3's decompressor, plus | |
37 | * associated macros and constants. | |
9759c60f | 38 | * |
d6c1bbdd RE |
39 | * It also contains a couple of defines from the old lz4.c to make things |
40 | * fit together smoothly. | |
9759c60f | 41 | * |
9759c60f ED |
42 | */ |
43 | ||
44 | #include <sys/zfs_context.h> | |
45 | ||
d6c1bbdd | 46 | int LZ4_uncompress_unknownOutputSize(const char *source, char *dest, |
9759c60f | 47 | int isize, int maxOutputSize); |
9759c60f ED |
48 | |
49 | /* | |
50 | * Tuning parameters | |
51 | */ | |
52 | ||
53 | /* | |
54 | * COMPRESSIONLEVEL: Increasing this value improves compression ratio | |
55 | * Lowering this value reduces memory usage. Reduced memory usage | |
56 | * typically improves speed, due to cache effect (ex: L1 32KB for Intel, | |
57 | * L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes | |
58 | * (examples : 12 -> 16KB ; 17 -> 512KB) | |
59 | */ | |
60 | #define COMPRESSIONLEVEL 12 | |
61 | ||
62 | /* | |
63 | * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the | |
64 | * algorithm skip faster data segments considered "incompressible". | |
65 | * This may decrease compression ratio dramatically, but will be | |
66 | * faster on incompressible data. Increasing this value will make | |
67 | * the algorithm search more before declaring a segment "incompressible". | |
68 | * This could improve compression a bit, but will be slower on | |
69 | * incompressible data. The default value (6) is recommended. | |
70 | */ | |
71 | #define NOTCOMPRESSIBLE_CONFIRMATION 6 | |
72 | ||
9759c60f ED |
73 | /* |
74 | * Little Endian or Big Endian? | |
4e33ba4c | 75 | * Note: overwrite the below #define if you know your architecture endianness. |
9759c60f | 76 | */ |
5678d3f5 | 77 | #if defined(_ZFS_BIG_ENDIAN) |
9759c60f ED |
78 | #define LZ4_BIG_ENDIAN 1 |
79 | #else | |
80 | /* | |
81 | * Little Endian assumed. PDP Endian and other very rare endian format | |
82 | * are unsupported. | |
83 | */ | |
de39ec11 | 84 | #undef LZ4_BIG_ENDIAN |
9759c60f ED |
85 | #endif |
86 | ||
d6c1bbdd RE |
87 | /*-************************************ |
88 | * CPU Feature Detection | |
89 | **************************************/ | |
90 | /* LZ4_FORCE_MEMORY_ACCESS | |
91 | * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. | |
92 | * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. | |
93 | * The below switch allow to select different access method for improved performance. | |
94 | * Method 0 (default) : use `memcpy()`. Safe and portable. | |
95 | * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). | |
96 | * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. | |
97 | * Method 2 : direct access. This method is portable but violate C standard. | |
98 | * It can generate buggy code on targets which assembly generation depends on alignment. | |
99 | * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) | |
100 | * See https://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details. | |
101 | * Prefer these methods in priority order (0 > 1 > 2) | |
9759c60f | 102 | */ |
d6c1bbdd RE |
103 | #ifndef LZ4_FORCE_MEMORY_ACCESS /* can be defined externally */ |
104 | # if defined(__GNUC__) && \ | |
105 | ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) \ | |
106 | || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) | |
107 | # define LZ4_FORCE_MEMORY_ACCESS 2 | |
108 | # elif (defined(__INTEL_COMPILER) && !defined(_WIN32)) || defined(__GNUC__) | |
109 | # define LZ4_FORCE_MEMORY_ACCESS 1 | |
110 | # endif | |
9759c60f ED |
111 | #endif |
112 | ||
d6c1bbdd RE |
113 | /* |
114 | * LZ4_FORCE_SW_BITCOUNT | |
115 | * Define this parameter if your target system or compiler does not support hardware bit count | |
116 | */ | |
9759c60f ED |
117 | /* |
118 | * Illumos : we can't use GCC's __builtin_ctz family of builtins in the | |
119 | * kernel | |
120 | * Linux : we can use GCC's __builtin_ctz family of builtins in the | |
121 | * kernel | |
122 | */ | |
123 | #undef LZ4_FORCE_SW_BITCOUNT | |
d6c1bbdd | 124 | #if defined(__sunos__) |
4fcc4379 BB |
125 | #define LZ4_FORCE_SW_BITCOUNT |
126 | #endif | |
9759c60f ED |
127 | |
128 | /* | |
129 | * Compiler Options | |
130 | */ | |
131 | /* Disable restrict */ | |
132 | #define restrict | |
133 | ||
1c24b699 RY |
134 | /* |
135 | * Linux : GCC_VERSION is defined as of 3.9-rc1, so undefine it. | |
136 | * torvalds/linux@3f3f8d2f48acfd8ed3b8e6b7377935da57b27b16 | |
137 | */ | |
138 | #ifdef GCC_VERSION | |
139 | #undef GCC_VERSION | |
140 | #endif | |
141 | ||
9759c60f ED |
142 | #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) |
143 | ||
d6c1bbdd RE |
144 | #ifndef LZ4_FORCE_INLINE |
145 | # ifdef _MSC_VER /* Visual Studio */ | |
146 | # define LZ4_FORCE_INLINE static __forceinline | |
147 | # else | |
148 | # if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ | |
149 | # ifdef __GNUC__ | |
150 | # define LZ4_FORCE_INLINE static inline __attribute__((always_inline)) | |
151 | # else | |
152 | # define LZ4_FORCE_INLINE static inline | |
153 | # endif | |
154 | # else | |
155 | # define LZ4_FORCE_INLINE static | |
156 | # endif /* __STDC_VERSION__ */ | |
157 | # endif /* _MSC_VER */ | |
158 | #endif /* LZ4_FORCE_INLINE */ | |
159 | ||
160 | /* LZ4_FORCE_O2 and LZ4_FORCE_INLINE | |
161 | * gcc on ppc64le generates an unrolled SIMDized loop for LZ4_wildCopy8, | |
162 | * together with a simple 8-byte copy loop as a fall-back path. | |
163 | * However, this optimization hurts the decompression speed by >30%, | |
164 | * because the execution does not go to the optimized loop | |
165 | * for typical compressible data, and all of the preamble checks | |
166 | * before going to the fall-back path become useless overhead. | |
167 | * This optimization happens only with the -O3 flag, and -O2 generates | |
168 | * a simple 8-byte copy loop. | |
169 | * With gcc on ppc64le, all of the LZ4_decompress_* and LZ4_wildCopy8 | |
170 | * functions are annotated with __attribute__((optimize("O2"))), | |
171 | * and also LZ4_wildCopy8 is forcibly inlined, so that the O2 attribute | |
172 | * of LZ4_wildCopy8 does not affect the compression speed. | |
173 | */ | |
174 | #if defined(__PPC64__) && defined(__LITTLE_ENDIAN__) && defined(__GNUC__) && !defined(__clang__) | |
175 | # define LZ4_FORCE_O2 __attribute__((optimize("O2"))) | |
176 | # undef LZ4_FORCE_INLINE | |
177 | # define LZ4_FORCE_INLINE static __inline __attribute__((optimize("O2"),always_inline)) | |
9759c60f | 178 | #else |
d6c1bbdd RE |
179 | # define LZ4_FORCE_O2 |
180 | #endif | |
181 | ||
182 | #ifndef expect | |
183 | #if (defined(__GNUC__) && (__GNUC__ >= 3)) || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) || defined(__clang__) | |
184 | # define expect(expr,value) (__builtin_expect ((expr),(value)) ) | |
185 | #else | |
186 | # define expect(expr,value) (expr) | |
187 | #endif | |
9759c60f ED |
188 | #endif |
189 | ||
190 | #ifndef likely | |
191 | #define likely(expr) expect((expr) != 0, 1) | |
192 | #endif | |
193 | ||
194 | #ifndef unlikely | |
195 | #define unlikely(expr) expect((expr) != 0, 0) | |
196 | #endif | |
197 | ||
d6c1bbdd RE |
198 | #ifndef _KERNEL |
199 | #include <stdlib.h> /* malloc, calloc, free */ | |
200 | #include <string.h> /* memset, memcpy */ | |
201 | #endif | |
202 | #define ALLOC(s) malloc(s) | |
203 | #define ALLOC_AND_ZERO(s) calloc(1,s) | |
204 | #define FREEMEM(p) free(p) | |
205 | ||
206 | #define MEM_INIT(p,v,s) memset((p),(v),(s)) | |
207 | ||
9759c60f | 208 | |
d6c1bbdd RE |
209 | /*-************************************ |
210 | * Common Constants | |
211 | **************************************/ | |
212 | #define MINMATCH 4 | |
9759c60f | 213 | |
d6c1bbdd RE |
214 | #define WILDCOPYLENGTH 8 |
215 | #define LASTLITERALS 5 /* see ../doc/lz4_Block_format.md#parsing-restrictions */ | |
216 | #define MFLIMIT 12 /* see ../doc/lz4_Block_format.md#parsing-restrictions */ | |
217 | #define MATCH_SAFEGUARD_DISTANCE ((2*WILDCOPYLENGTH) - MINMATCH) /* ensure it's possible to write 2 x wildcopyLength without overflowing output buffer */ | |
218 | #define FASTLOOP_SAFE_DISTANCE 64 | |
219 | ||
220 | #define KB *(1 <<10) | |
221 | #define MB *(1 <<20) | |
222 | #define GB *(1U<<30) | |
223 | ||
224 | #ifndef LZ4_DISTANCE_MAX /* history window size; can be user-defined at compile time */ | |
225 | # define LZ4_DISTANCE_MAX 65535 /* set to maximum value by default */ | |
9759c60f ED |
226 | #endif |
227 | ||
d6c1bbdd RE |
228 | #define LZ4_DISTANCE_ABSOLUTE_MAX 65535 |
229 | #if (LZ4_DISTANCE_MAX > LZ4_DISTANCE_ABSOLUTE_MAX) /* max supported by LZ4 format */ | |
230 | # error "LZ4_DISTANCE_MAX is too big : must be <= 65535" | |
9759c60f ED |
231 | #endif |
232 | ||
d6c1bbdd RE |
233 | #define ML_BITS 4 |
234 | #define ML_MASK ((1U<<ML_BITS)-1) | |
235 | #define RUN_BITS (8-ML_BITS) | |
236 | #define RUN_MASK ((1U<<RUN_BITS)-1) | |
9759c60f | 237 | |
d6c1bbdd | 238 | #define DEBUGLOG(l, ...) {} /* disabled */ |
9759c60f | 239 | |
d6c1bbdd RE |
240 | #ifndef assert |
241 | #define assert ASSERT | |
242 | #endif | |
9759c60f | 243 | |
d6c1bbdd RE |
244 | /*-************************************ |
245 | * Types | |
246 | **************************************/ | |
247 | #ifndef _KERNEL | |
248 | #include <limits.h> | |
249 | #endif | |
250 | #if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) | |
251 | #ifndef _KERNEL | |
252 | #include <stdint.h> | |
253 | #endif | |
254 | typedef uint8_t BYTE; | |
255 | typedef uint16_t U16; | |
256 | typedef uint32_t U32; | |
257 | typedef int32_t S32; | |
258 | typedef uint64_t U64; | |
259 | typedef uintptr_t uptrval; | |
260 | #else | |
261 | # if UINT_MAX != 4294967295UL | |
262 | # error "LZ4 code (when not C++ or C99) assumes that sizeof(int) == 4" | |
263 | # endif | |
264 | typedef unsigned char BYTE; | |
265 | typedef unsigned short U16; | |
266 | typedef unsigned int U32; | |
267 | typedef signed int S32; | |
268 | typedef unsigned long long U64; | |
269 | typedef size_t uptrval; /* generally true, except OpenVMS-64 */ | |
270 | #endif | |
9759c60f | 271 | |
d6c1bbdd RE |
272 | #if defined(__x86_64__) |
273 | typedef U64 reg_t; /* 64-bits in x32 mode */ | |
274 | #else | |
275 | typedef size_t reg_t; /* 32-bits in x32 mode */ | |
276 | #endif | |
9759c60f | 277 | |
d6c1bbdd RE |
278 | typedef enum { |
279 | notLimited = 0, | |
280 | limitedOutput = 1, | |
281 | fillOutput = 2 | |
282 | } limitedOutput_directive; | |
9759c60f | 283 | |
9759c60f | 284 | |
d6c1bbdd RE |
285 | /*-************************************ |
286 | * Reading and writing into memory | |
287 | **************************************/ | |
9759c60f | 288 | |
d6c1bbdd RE |
289 | /** |
290 | * LZ4 relies on memcpy with a constant size being inlined. In freestanding | |
291 | * environments, the compiler can't assume the implementation of memcpy() is | |
292 | * standard compliant, so it can't apply its specialized memcpy() inlining | |
293 | * logic. When possible, use __builtin_memcpy() to tell the compiler to analyze | |
294 | * memcpy() as if it were standard compliant, so it can inline it in freestanding | |
295 | * environments. This is needed when decompressing the Linux Kernel, for example. | |
9759c60f | 296 | */ |
d6c1bbdd RE |
297 | #if defined(__GNUC__) && (__GNUC__ >= 4) |
298 | #define LZ4_memcpy(dst, src, size) __builtin_memcpy(dst, src, size) | |
9759c60f | 299 | #else |
d6c1bbdd | 300 | #define LZ4_memcpy(dst, src, size) memcpy(dst, src, size) |
9759c60f ED |
301 | #endif |
302 | ||
d6c1bbdd RE |
303 | static unsigned LZ4_isLittleEndian(void) |
304 | { | |
305 | const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */ | |
306 | return one.c[0]; | |
307 | } | |
308 | ||
309 | ||
310 | #if defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==2) | |
311 | /* lie to the compiler about data alignment; use with caution */ | |
312 | ||
313 | static U16 LZ4_read16(const void* memPtr) { return *(const U16*) memPtr; } | |
9759c60f | 314 | |
d6c1bbdd RE |
315 | static void LZ4_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; } |
316 | static void LZ4_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; } | |
9759c60f | 317 | |
d6c1bbdd | 318 | #elif defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==1) |
9759c60f | 319 | |
d6c1bbdd RE |
320 | /* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ |
321 | /* currently only defined for gcc and icc */ | |
322 | typedef union { U16 u16; U32 u32; reg_t uArch; } __attribute__((packed)) unalign; | |
9759c60f | 323 | |
d6c1bbdd | 324 | static U16 LZ4_read16(const void* ptr) { return ((const unalign*)ptr)->u16; } |
9759c60f | 325 | |
d6c1bbdd | 326 | static void LZ4_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; } |
9759c60f | 327 | |
d6c1bbdd RE |
328 | #else /* safe and portable access using memcpy() */ |
329 | ||
330 | static U16 LZ4_read16(const void* memPtr) | |
9759c60f | 331 | { |
d6c1bbdd | 332 | U16 val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val; |
9759c60f ED |
333 | } |
334 | ||
d6c1bbdd | 335 | static void LZ4_write32(void* memPtr, U32 value) |
9759c60f | 336 | { |
d6c1bbdd | 337 | LZ4_memcpy(memPtr, &value, sizeof(value)); |
9759c60f ED |
338 | } |
339 | ||
d6c1bbdd | 340 | #endif /* LZ4_FORCE_MEMORY_ACCESS */ |
9759c60f | 341 | |
d6c1bbdd RE |
342 | static U16 LZ4_readLE16(const void* memPtr) |
343 | { | |
344 | if (LZ4_isLittleEndian()) { | |
345 | return LZ4_read16(memPtr); | |
346 | } else { | |
347 | const BYTE* p = (const BYTE*)memPtr; | |
348 | return (U16)((U16)p[0] + (p[1]<<8)); | |
349 | } | |
350 | } | |
9759c60f | 351 | |
d6c1bbdd RE |
352 | /* customized variant of memcpy, which can overwrite up to 8 bytes beyond dstEnd */ |
353 | LZ4_FORCE_INLINE | |
354 | void LZ4_wildCopy8(void* dstPtr, const void* srcPtr, void* dstEnd) | |
9759c60f | 355 | { |
d6c1bbdd RE |
356 | BYTE* d = (BYTE*)dstPtr; |
357 | const BYTE* s = (const BYTE*)srcPtr; | |
358 | BYTE* const e = (BYTE*)dstEnd; | |
359 | ||
360 | do { LZ4_memcpy(d,s,8); d+=8; s+=8; } while (d<e); | |
9759c60f ED |
361 | } |
362 | ||
d6c1bbdd RE |
363 | static const unsigned inc32table[8] = {0, 1, 2, 1, 0, 4, 4, 4}; |
364 | static const int dec64table[8] = {0, 0, 0, -1, -4, 1, 2, 3}; | |
365 | ||
366 | ||
367 | #ifndef LZ4_FAST_DEC_LOOP | |
368 | # if defined __i386__ || defined _M_IX86 || defined __x86_64__ || defined _M_X64 | |
369 | # define LZ4_FAST_DEC_LOOP 1 | |
370 | # elif defined(__aarch64__) && !defined(__clang__) | |
371 | /* On aarch64, we disable this optimization for clang because on certain | |
372 | * mobile chipsets, performance is reduced with clang. For information | |
373 | * refer to https://github.com/lz4/lz4/pull/707 */ | |
374 | # define LZ4_FAST_DEC_LOOP 1 | |
375 | # else | |
376 | # define LZ4_FAST_DEC_LOOP 0 | |
377 | # endif | |
378 | #endif | |
9759c60f | 379 | |
d6c1bbdd | 380 | #if LZ4_FAST_DEC_LOOP |
9759c60f | 381 | |
d6c1bbdd RE |
382 | LZ4_FORCE_INLINE void |
383 | LZ4_memcpy_using_offset_base(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset) | |
384 | { | |
385 | assert(srcPtr + offset == dstPtr); | |
386 | if (offset < 8) { | |
387 | LZ4_write32(dstPtr, 0); /* silence an msan warning when offset==0 */ | |
388 | dstPtr[0] = srcPtr[0]; | |
389 | dstPtr[1] = srcPtr[1]; | |
390 | dstPtr[2] = srcPtr[2]; | |
391 | dstPtr[3] = srcPtr[3]; | |
392 | srcPtr += inc32table[offset]; | |
393 | LZ4_memcpy(dstPtr+4, srcPtr, 4); | |
394 | srcPtr -= dec64table[offset]; | |
395 | dstPtr += 8; | |
396 | } else { | |
397 | LZ4_memcpy(dstPtr, srcPtr, 8); | |
398 | dstPtr += 8; | |
399 | srcPtr += 8; | |
400 | } | |
401 | ||
402 | LZ4_wildCopy8(dstPtr, srcPtr, dstEnd); | |
403 | } | |
9759c60f | 404 | |
d6c1bbdd RE |
405 | /* customized variant of memcpy, which can overwrite up to 32 bytes beyond dstEnd |
406 | * this version copies two times 16 bytes (instead of one time 32 bytes) | |
407 | * because it must be compatible with offsets >= 16. */ | |
408 | LZ4_FORCE_INLINE void | |
409 | LZ4_wildCopy32(void* dstPtr, const void* srcPtr, void* dstEnd) | |
9759c60f | 410 | { |
d6c1bbdd RE |
411 | BYTE* d = (BYTE*)dstPtr; |
412 | const BYTE* s = (const BYTE*)srcPtr; | |
413 | BYTE* const e = (BYTE*)dstEnd; | |
414 | ||
415 | do { LZ4_memcpy(d,s,16); LZ4_memcpy(d+16,s+16,16); d+=32; s+=32; } while (d<e); | |
9759c60f ED |
416 | } |
417 | ||
d6c1bbdd RE |
418 | /* LZ4_memcpy_using_offset() presumes : |
419 | * - dstEnd >= dstPtr + MINMATCH | |
420 | * - there is at least 8 bytes available to write after dstEnd */ | |
421 | LZ4_FORCE_INLINE void | |
422 | LZ4_memcpy_using_offset(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset) | |
9759c60f | 423 | { |
d6c1bbdd RE |
424 | BYTE v[8]; |
425 | ||
426 | assert(dstEnd >= dstPtr + MINMATCH); | |
427 | ||
428 | switch(offset) { | |
429 | case 1: | |
430 | MEM_INIT(v, *srcPtr, 8); | |
431 | break; | |
432 | case 2: | |
433 | LZ4_memcpy(v, srcPtr, 2); | |
434 | LZ4_memcpy(&v[2], srcPtr, 2); | |
435 | LZ4_memcpy(&v[4], v, 4); | |
436 | break; | |
437 | case 4: | |
438 | LZ4_memcpy(v, srcPtr, 4); | |
439 | LZ4_memcpy(&v[4], srcPtr, 4); | |
440 | break; | |
441 | default: | |
442 | LZ4_memcpy_using_offset_base(dstPtr, srcPtr, dstEnd, offset); | |
443 | return; | |
444 | } | |
445 | ||
446 | LZ4_memcpy(dstPtr, v, 8); | |
447 | dstPtr += 8; | |
448 | while (dstPtr < dstEnd) { | |
449 | LZ4_memcpy(dstPtr, v, 8); | |
450 | dstPtr += 8; | |
451 | } | |
452 | } | |
453 | #endif | |
454 | ||
455 | ||
456 | /*-************************************ | |
457 | * Local Structures and types | |
458 | **************************************/ | |
459 | typedef enum { clearedTable = 0, byPtr, byU32, byU16 } tableType_t; | |
9759c60f | 460 | |
d6c1bbdd RE |
461 | /** |
462 | * This enum distinguishes several different modes of accessing previous | |
463 | * content in the stream. | |
464 | * | |
465 | * - noDict : There is no preceding content. | |
466 | * - withPrefix64k : Table entries up to ctx->dictSize before the current blob | |
467 | * blob being compressed are valid and refer to the preceding | |
468 | * content (of length ctx->dictSize), which is available | |
469 | * contiguously preceding in memory the content currently | |
470 | * being compressed. | |
471 | * - usingExtDict : Like withPrefix64k, but the preceding content is somewhere | |
472 | * else in memory, starting at ctx->dictionary with length | |
473 | * ctx->dictSize. | |
474 | * - usingDictCtx : Like usingExtDict, but everything concerning the preceding | |
475 | * content is in a separate context, pointed to by | |
476 | * ctx->dictCtx. ctx->dictionary, ctx->dictSize, and table | |
477 | * entries in the current context that refer to positions | |
478 | * preceding the beginning of the current compression are | |
479 | * ignored. Instead, ctx->dictCtx->dictionary and ctx->dictCtx | |
480 | * ->dictSize describe the location and size of the preceding | |
481 | * content, and matches are found by looking in the ctx | |
482 | * ->dictCtx->hashTable. | |
483 | */ | |
484 | typedef enum { noDict = 0, withPrefix64k, usingExtDict, usingDictCtx } dict_directive; | |
485 | typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive; | |
9759c60f | 486 | |
d6c1bbdd RE |
487 | /*-******************************* |
488 | * Decompression functions | |
489 | ********************************/ | |
9759c60f | 490 | |
d6c1bbdd RE |
491 | typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive; |
492 | typedef enum { decode_full_block = 0, partial_decode = 1 } earlyEnd_directive; | |
9759c60f | 493 | |
d6c1bbdd | 494 | typedef enum { loop_error = -2, initial_error = -1, ok = 0 } variable_length_error; |
9759c60f | 495 | |
d6c1bbdd RE |
496 | LZ4_FORCE_INLINE unsigned |
497 | read_variable_length(const BYTE**ip, const BYTE* lencheck, | |
498 | int loop_check, int initial_check, | |
499 | variable_length_error* error) | |
500 | { | |
501 | U32 length = 0; | |
502 | U32 s; | |
503 | if (initial_check && unlikely((*ip) >= lencheck)) { /* overflow detection */ | |
504 | *error = initial_error; | |
505 | return length; | |
506 | } | |
507 | do { | |
508 | s = **ip; | |
509 | (*ip)++; | |
510 | length += s; | |
511 | if (loop_check && unlikely((*ip) >= lencheck)) { /* overflow detection */ | |
512 | *error = loop_error; | |
513 | return length; | |
514 | } | |
515 | } while (s==255); | |
516 | ||
517 | return length; | |
9759c60f ED |
518 | } |
519 | ||
d6c1bbdd | 520 | #define LZ4_STATIC_ASSERT(c) ASSERT(c) |
9759c60f | 521 | |
9759c60f | 522 | |
d6c1bbdd RE |
523 | /*! LZ4_decompress_generic() : |
524 | * This generic decompression function covers all use cases. | |
525 | * It shall be instantiated several times, using different sets of directives. | |
526 | * Note that it is important for performance that this function really get inlined, | |
527 | * in order to remove useless branches during compilation optimization. | |
528 | */ | |
529 | LZ4_FORCE_INLINE int | |
530 | LZ4_decompress_generic( | |
531 | const char* const src, | |
532 | char* const dst, | |
533 | int srcSize, | |
534 | int outputSize, /* If endOnInput==endOnInputSize, this value is `dstCapacity` */ | |
535 | ||
536 | endCondition_directive endOnInput, /* endOnOutputSize, endOnInputSize */ | |
537 | earlyEnd_directive partialDecoding, /* full, partial */ | |
538 | dict_directive dict, /* noDict, withPrefix64k, usingExtDict */ | |
539 | const BYTE* const lowPrefix, /* always <= dst, == dst when no prefix */ | |
540 | const BYTE* const dictStart, /* only if dict==usingExtDict */ | |
541 | const size_t dictSize /* note : = 0 if noDict */ | |
542 | ) | |
543 | { | |
63f4bfd6 | 544 | if ((src == NULL) || (outputSize < 0)) { return -1; } |
d6c1bbdd RE |
545 | |
546 | { const BYTE* ip = (const BYTE*) src; | |
547 | const BYTE* const iend = ip + srcSize; | |
548 | ||
549 | BYTE* op = (BYTE*) dst; | |
550 | BYTE* const oend = op + outputSize; | |
551 | BYTE* cpy; | |
552 | ||
553 | const BYTE* const dictEnd = (dictStart == NULL) ? NULL : dictStart + dictSize; | |
554 | ||
555 | const int safeDecode = (endOnInput==endOnInputSize); | |
556 | const int checkOffset = ((safeDecode) && (dictSize < (int)(64 KB))); | |
557 | ||
558 | ||
559 | /* Set up the "end" pointers for the shortcut. */ | |
560 | const BYTE* const shortiend = iend - (endOnInput ? 14 : 8) /*maxLL*/ - 2 /*offset*/; | |
561 | const BYTE* const shortoend = oend - (endOnInput ? 14 : 8) /*maxLL*/ - 18 /*maxML*/; | |
562 | ||
563 | const BYTE* match; | |
564 | size_t offset; | |
565 | unsigned token; | |
566 | size_t length; | |
567 | ||
568 | ||
569 | DEBUGLOG(5, "LZ4_decompress_generic (srcSize:%i, dstSize:%i)", srcSize, outputSize); | |
570 | ||
571 | /* Special cases */ | |
572 | assert(lowPrefix <= op); | |
573 | if ((endOnInput) && (unlikely(outputSize==0))) { | |
574 | /* Empty output buffer */ | |
575 | if (partialDecoding) return 0; | |
576 | return ((srcSize==1) && (*ip==0)) ? 0 : -1; | |
577 | } | |
578 | if ((!endOnInput) && (unlikely(outputSize==0))) { return (*ip==0 ? 1 : -1); } | |
579 | if ((endOnInput) && unlikely(srcSize==0)) { return -1; } | |
580 | ||
581 | /* Currently the fast loop shows a regression on qualcomm arm chips. */ | |
582 | #if LZ4_FAST_DEC_LOOP | |
583 | if ((oend - op) < FASTLOOP_SAFE_DISTANCE) { | |
584 | DEBUGLOG(6, "skip fast decode loop"); | |
585 | goto safe_decode; | |
586 | } | |
587 | ||
588 | /* Fast loop : decode sequences as long as output < iend-FASTLOOP_SAFE_DISTANCE */ | |
589 | while (1) { | |
590 | /* Main fastloop assertion: We can always wildcopy FASTLOOP_SAFE_DISTANCE */ | |
591 | assert(oend - op >= FASTLOOP_SAFE_DISTANCE); | |
592 | if (endOnInput) { assert(ip < iend); } | |
593 | token = *ip++; | |
594 | length = token >> ML_BITS; /* literal length */ | |
595 | ||
596 | assert(!endOnInput || ip <= iend); /* ip < iend before the increment */ | |
597 | ||
598 | /* decode literal length */ | |
599 | if (length == RUN_MASK) { | |
600 | variable_length_error error = ok; | |
601 | length += read_variable_length(&ip, iend-RUN_MASK, (int)endOnInput, (int)endOnInput, &error); | |
602 | if (error == initial_error) { goto _output_error; } | |
603 | if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */ | |
604 | if ((safeDecode) && unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */ | |
605 | ||
606 | /* copy literals */ | |
607 | cpy = op+length; | |
608 | LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH); | |
609 | if (endOnInput) { /* LZ4_decompress_safe() */ | |
610 | if ((cpy>oend-32) || (ip+length>iend-32)) { goto safe_literal_copy; } | |
611 | LZ4_wildCopy32(op, ip, cpy); | |
612 | } else { /* LZ4_decompress_fast() */ | |
613 | if (cpy>oend-8) { goto safe_literal_copy; } | |
614 | LZ4_wildCopy8(op, ip, cpy); /* LZ4_decompress_fast() cannot copy more than 8 bytes at a time : | |
615 | * it doesn't know input length, and only relies on end-of-block properties */ | |
616 | } | |
617 | ip += length; op = cpy; | |
618 | } else { | |
619 | cpy = op+length; | |
620 | if (endOnInput) { /* LZ4_decompress_safe() */ | |
621 | DEBUGLOG(7, "copy %u bytes in a 16-bytes stripe", (unsigned)length); | |
622 | /* We don't need to check oend, since we check it once for each loop below */ | |
623 | if (ip > iend-(16 + 1/*max lit + offset + nextToken*/)) { goto safe_literal_copy; } | |
624 | /* Literals can only be 14, but hope compilers optimize if we copy by a register size */ | |
625 | LZ4_memcpy(op, ip, 16); | |
626 | } else { /* LZ4_decompress_fast() */ | |
627 | /* LZ4_decompress_fast() cannot copy more than 8 bytes at a time : | |
628 | * it doesn't know input length, and relies on end-of-block properties */ | |
629 | LZ4_memcpy(op, ip, 8); | |
630 | if (length > 8) { LZ4_memcpy(op+8, ip+8, 8); } | |
631 | } | |
632 | ip += length; op = cpy; | |
633 | } | |
634 | ||
635 | /* get offset */ | |
636 | offset = LZ4_readLE16(ip); ip+=2; | |
637 | match = op - offset; | |
638 | assert(match <= op); | |
639 | ||
640 | /* get matchlength */ | |
641 | length = token & ML_MASK; | |
642 | ||
643 | if (length == ML_MASK) { | |
644 | variable_length_error error = ok; | |
645 | if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */ | |
646 | length += read_variable_length(&ip, iend - LASTLITERALS + 1, (int)endOnInput, 0, &error); | |
647 | if (error != ok) { goto _output_error; } | |
648 | if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)op)) { goto _output_error; } /* overflow detection */ | |
649 | length += MINMATCH; | |
650 | if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) { | |
651 | goto safe_match_copy; | |
652 | } | |
653 | } else { | |
654 | length += MINMATCH; | |
655 | if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) { | |
656 | goto safe_match_copy; | |
657 | } | |
658 | ||
659 | /* Fastpath check: Avoids a branch in LZ4_wildCopy32 if true */ | |
660 | if ((dict == withPrefix64k) || (match >= lowPrefix)) { | |
661 | if (offset >= 8) { | |
662 | assert(match >= lowPrefix); | |
663 | assert(match <= op); | |
664 | assert(op + 18 <= oend); | |
665 | ||
666 | LZ4_memcpy(op, match, 8); | |
667 | LZ4_memcpy(op+8, match+8, 8); | |
668 | LZ4_memcpy(op+16, match+16, 2); | |
669 | op += length; | |
670 | continue; | |
671 | } } } | |
672 | ||
673 | if (checkOffset && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */ | |
674 | /* match starting within external dictionary */ | |
675 | if ((dict==usingExtDict) && (match < lowPrefix)) { | |
676 | if (unlikely(op+length > oend-LASTLITERALS)) { | |
677 | if (partialDecoding) { | |
678 | DEBUGLOG(7, "partialDecoding: dictionary match, close to dstEnd"); | |
679 | length = MIN(length, (size_t)(oend-op)); | |
680 | } else { | |
681 | goto _output_error; /* end-of-block condition violated */ | |
682 | } } | |
683 | ||
684 | if (length <= (size_t)(lowPrefix-match)) { | |
685 | /* match fits entirely within external dictionary : just copy */ | |
686 | memmove(op, dictEnd - (lowPrefix-match), length); | |
687 | op += length; | |
688 | } else { | |
689 | /* match stretches into both external dictionary and current block */ | |
690 | size_t const copySize = (size_t)(lowPrefix - match); | |
691 | size_t const restSize = length - copySize; | |
692 | LZ4_memcpy(op, dictEnd - copySize, copySize); | |
693 | op += copySize; | |
694 | if (restSize > (size_t)(op - lowPrefix)) { /* overlap copy */ | |
695 | BYTE* const endOfMatch = op + restSize; | |
696 | const BYTE* copyFrom = lowPrefix; | |
697 | while (op < endOfMatch) { *op++ = *copyFrom++; } | |
698 | } else { | |
699 | LZ4_memcpy(op, lowPrefix, restSize); | |
700 | op += restSize; | |
701 | } } | |
702 | continue; | |
703 | } | |
704 | ||
705 | /* copy match within block */ | |
706 | cpy = op + length; | |
707 | ||
708 | assert((op <= oend) && (oend-op >= 32)); | |
709 | if (unlikely(offset<16)) { | |
710 | LZ4_memcpy_using_offset(op, match, cpy, offset); | |
711 | } else { | |
712 | LZ4_wildCopy32(op, match, cpy); | |
713 | } | |
714 | ||
715 | op = cpy; /* wildcopy correction */ | |
716 | } | |
717 | safe_decode: | |
f871ab6e FS |
718 | #endif |
719 | ||
d6c1bbdd RE |
720 | /* Main Loop : decode remaining sequences where output < FASTLOOP_SAFE_DISTANCE */ |
721 | while (1) { | |
722 | token = *ip++; | |
723 | length = token >> ML_BITS; /* literal length */ | |
724 | ||
725 | assert(!endOnInput || ip <= iend); /* ip < iend before the increment */ | |
726 | ||
727 | /* A two-stage shortcut for the most common case: | |
728 | * 1) If the literal length is 0..14, and there is enough space, | |
729 | * enter the shortcut and copy 16 bytes on behalf of the literals | |
730 | * (in the fast mode, only 8 bytes can be safely copied this way). | |
731 | * 2) Further if the match length is 4..18, copy 18 bytes in a similar | |
732 | * manner; but we ensure that there's enough space in the output for | |
733 | * those 18 bytes earlier, upon entering the shortcut (in other words, | |
734 | * there is a combined check for both stages). | |
735 | */ | |
736 | if ( (endOnInput ? length != RUN_MASK : length <= 8) | |
737 | /* strictly "less than" on input, to re-enter the loop with at least one byte */ | |
738 | && likely((endOnInput ? ip < shortiend : 1) & (op <= shortoend)) ) { | |
739 | /* Copy the literals */ | |
740 | LZ4_memcpy(op, ip, endOnInput ? 16 : 8); | |
741 | op += length; ip += length; | |
742 | ||
743 | /* The second stage: prepare for match copying, decode full info. | |
744 | * If it doesn't work out, the info won't be wasted. */ | |
745 | length = token & ML_MASK; /* match length */ | |
746 | offset = LZ4_readLE16(ip); ip += 2; | |
747 | match = op - offset; | |
748 | assert(match <= op); /* check overflow */ | |
749 | ||
750 | /* Do not deal with overlapping matches. */ | |
751 | if ( (length != ML_MASK) | |
752 | && (offset >= 8) | |
753 | && (dict==withPrefix64k || match >= lowPrefix) ) { | |
754 | /* Copy the match. */ | |
755 | LZ4_memcpy(op + 0, match + 0, 8); | |
756 | LZ4_memcpy(op + 8, match + 8, 8); | |
757 | LZ4_memcpy(op +16, match +16, 2); | |
758 | op += length + MINMATCH; | |
759 | /* Both stages worked, load the next token. */ | |
760 | continue; | |
761 | } | |
762 | ||
763 | /* The second stage didn't work out, but the info is ready. | |
764 | * Propel it right to the point of match copying. */ | |
765 | goto _copy_match; | |
766 | } | |
767 | ||
768 | /* decode literal length */ | |
769 | if (length == RUN_MASK) { | |
770 | variable_length_error error = ok; | |
771 | length += read_variable_length(&ip, iend-RUN_MASK, (int)endOnInput, (int)endOnInput, &error); | |
772 | if (error == initial_error) { goto _output_error; } | |
773 | if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */ | |
774 | if ((safeDecode) && unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */ | |
775 | } | |
776 | ||
777 | /* copy literals */ | |
778 | cpy = op+length; | |
779 | #if LZ4_FAST_DEC_LOOP | |
780 | safe_literal_copy: | |
9759c60f | 781 | #endif |
d6c1bbdd RE |
782 | LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH); |
783 | if ( ((endOnInput) && ((cpy>oend-MFLIMIT) || (ip+length>iend-(2+1+LASTLITERALS))) ) | |
784 | || ((!endOnInput) && (cpy>oend-WILDCOPYLENGTH)) ) | |
785 | { | |
786 | /* We've either hit the input parsing restriction or the output parsing restriction. | |
787 | * In the normal scenario, decoding a full block, it must be the last sequence, | |
788 | * otherwise it's an error (invalid input or dimensions). | |
789 | * In partialDecoding scenario, it's necessary to ensure there is no buffer overflow. | |
790 | */ | |
791 | if (partialDecoding) { | |
792 | /* Since we are partial decoding we may be in this block because of the output parsing | |
793 | * restriction, which is not valid since the output buffer is allowed to be undersized. | |
794 | */ | |
795 | assert(endOnInput); | |
796 | DEBUGLOG(7, "partialDecoding: copying literals, close to input or output end") | |
797 | DEBUGLOG(7, "partialDecoding: literal length = %u", (unsigned)length); | |
798 | DEBUGLOG(7, "partialDecoding: remaining space in dstBuffer : %i", (int)(oend - op)); | |
799 | DEBUGLOG(7, "partialDecoding: remaining space in srcBuffer : %i", (int)(iend - ip)); | |
800 | /* Finishing in the middle of a literals segment, | |
801 | * due to lack of input. | |
802 | */ | |
803 | if (ip+length > iend) { | |
804 | length = (size_t)(iend-ip); | |
805 | cpy = op + length; | |
806 | } | |
807 | /* Finishing in the middle of a literals segment, | |
808 | * due to lack of output space. | |
809 | */ | |
810 | if (cpy > oend) { | |
811 | cpy = oend; | |
812 | assert(op<=oend); | |
813 | length = (size_t)(oend-op); | |
814 | } | |
815 | } else { | |
816 | /* We must be on the last sequence because of the parsing limitations so check | |
817 | * that we exactly regenerate the original size (must be exact when !endOnInput). | |
818 | */ | |
819 | if ((!endOnInput) && (cpy != oend)) { goto _output_error; } | |
820 | /* We must be on the last sequence (or invalid) because of the parsing limitations | |
821 | * so check that we exactly consume the input and don't overrun the output buffer. | |
822 | */ | |
823 | if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) { | |
824 | DEBUGLOG(6, "should have been last run of literals") | |
825 | DEBUGLOG(6, "ip(%p) + length(%i) = %p != iend (%p)", ip, (int)length, ip+length, iend); | |
826 | DEBUGLOG(6, "or cpy(%p) > oend(%p)", cpy, oend); | |
827 | goto _output_error; | |
828 | } | |
829 | } | |
830 | memmove(op, ip, length); /* supports overlapping memory regions; only matters for in-place decompression scenarios */ | |
831 | ip += length; | |
832 | op += length; | |
833 | /* Necessarily EOF when !partialDecoding. | |
834 | * When partialDecoding, it is EOF if we've either | |
835 | * filled the output buffer or | |
836 | * can't proceed with reading an offset for following match. | |
837 | */ | |
838 | if (!partialDecoding || (cpy == oend) || (ip >= (iend-2))) { | |
839 | break; | |
840 | } | |
841 | } else { | |
842 | LZ4_wildCopy8(op, ip, cpy); /* may overwrite up to WILDCOPYLENGTH beyond cpy */ | |
843 | ip += length; op = cpy; | |
844 | } | |
845 | ||
846 | /* get offset */ | |
847 | offset = LZ4_readLE16(ip); ip+=2; | |
848 | match = op - offset; | |
849 | ||
850 | /* get matchlength */ | |
851 | length = token & ML_MASK; | |
852 | ||
853 | _copy_match: | |
854 | if (length == ML_MASK) { | |
855 | variable_length_error error = ok; | |
856 | length += read_variable_length(&ip, iend - LASTLITERALS + 1, (int)endOnInput, 0, &error); | |
857 | if (error != ok) goto _output_error; | |
858 | if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)op)) goto _output_error; /* overflow detection */ | |
859 | } | |
860 | length += MINMATCH; | |
861 | ||
862 | #if LZ4_FAST_DEC_LOOP | |
863 | safe_match_copy: | |
f871ab6e | 864 | #endif |
d6c1bbdd RE |
865 | if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) goto _output_error; /* Error : offset outside buffers */ |
866 | /* match starting within external dictionary */ | |
867 | if ((dict==usingExtDict) && (match < lowPrefix)) { | |
868 | if (unlikely(op+length > oend-LASTLITERALS)) { | |
869 | if (partialDecoding) length = MIN(length, (size_t)(oend-op)); | |
870 | else goto _output_error; /* doesn't respect parsing restriction */ | |
871 | } | |
872 | ||
873 | if (length <= (size_t)(lowPrefix-match)) { | |
874 | /* match fits entirely within external dictionary : just copy */ | |
875 | memmove(op, dictEnd - (lowPrefix-match), length); | |
876 | op += length; | |
877 | } else { | |
878 | /* match stretches into both external dictionary and current block */ | |
879 | size_t const copySize = (size_t)(lowPrefix - match); | |
880 | size_t const restSize = length - copySize; | |
881 | LZ4_memcpy(op, dictEnd - copySize, copySize); | |
882 | op += copySize; | |
883 | if (restSize > (size_t)(op - lowPrefix)) { /* overlap copy */ | |
884 | BYTE* const endOfMatch = op + restSize; | |
885 | const BYTE* copyFrom = lowPrefix; | |
886 | while (op < endOfMatch) *op++ = *copyFrom++; | |
887 | } else { | |
888 | LZ4_memcpy(op, lowPrefix, restSize); | |
889 | op += restSize; | |
890 | } } | |
891 | continue; | |
892 | } | |
893 | assert(match >= lowPrefix); | |
894 | ||
895 | /* copy match within block */ | |
896 | cpy = op + length; | |
897 | ||
898 | /* partialDecoding : may end anywhere within the block */ | |
899 | assert(op<=oend); | |
900 | if (partialDecoding && (cpy > oend-MATCH_SAFEGUARD_DISTANCE)) { | |
901 | size_t const mlen = MIN(length, (size_t)(oend-op)); | |
902 | const BYTE* const matchEnd = match + mlen; | |
903 | BYTE* const copyEnd = op + mlen; | |
904 | if (matchEnd > op) { /* overlap copy */ | |
905 | while (op < copyEnd) { *op++ = *match++; } | |
906 | } else { | |
907 | LZ4_memcpy(op, match, mlen); | |
908 | } | |
909 | op = copyEnd; | |
910 | if (op == oend) { break; } | |
911 | continue; | |
912 | } | |
913 | ||
914 | if (unlikely(offset<8)) { | |
915 | LZ4_write32(op, 0); /* silence msan warning when offset==0 */ | |
916 | op[0] = match[0]; | |
917 | op[1] = match[1]; | |
918 | op[2] = match[2]; | |
919 | op[3] = match[3]; | |
920 | match += inc32table[offset]; | |
921 | LZ4_memcpy(op+4, match, 4); | |
922 | match -= dec64table[offset]; | |
923 | } else { | |
924 | LZ4_memcpy(op, match, 8); | |
925 | match += 8; | |
926 | } | |
927 | op += 8; | |
928 | ||
929 | if (unlikely(cpy > oend-MATCH_SAFEGUARD_DISTANCE)) { | |
930 | BYTE* const oCopyLimit = oend - (WILDCOPYLENGTH-1); | |
931 | if (cpy > oend-LASTLITERALS) { goto _output_error; } /* Error : last LASTLITERALS bytes must be literals (uncompressed) */ | |
932 | if (op < oCopyLimit) { | |
933 | LZ4_wildCopy8(op, match, oCopyLimit); | |
934 | match += oCopyLimit - op; | |
935 | op = oCopyLimit; | |
936 | } | |
937 | while (op < cpy) { *op++ = *match++; } | |
938 | } else { | |
939 | LZ4_memcpy(op, match, 8); | |
940 | if (length > 16) { LZ4_wildCopy8(op+8, match+8, cpy); } | |
941 | } | |
942 | op = cpy; /* wildcopy correction */ | |
943 | } | |
944 | ||
945 | /* end of decoding */ | |
946 | if (endOnInput) { | |
947 | DEBUGLOG(5, "decoded %i bytes", (int) (((char*)op)-dst)); | |
948 | return (int) (((char*)op)-dst); /* Nb of output bytes decoded */ | |
949 | } else { | |
950 | return (int) (((const char*)ip)-src); /* Nb of input bytes read */ | |
951 | } | |
952 | ||
953 | /* Overflow error detected */ | |
954 | _output_error: | |
955 | return (int) (-(((const char*)ip)-src))-1; | |
956 | } | |
9759c60f ED |
957 | } |
958 | ||
d6c1bbdd RE |
959 | /* |
960 | * LZ4_uncompress_unknownOutputSize() : | |
961 | * isize : is the input size, therefore the compressed size | |
962 | * maxOutputSize : is the size of the destination buffer (which must be | |
963 | * already allocated) | |
964 | * return : the number of bytes decoded in the destination buffer | |
965 | * (necessarily <= maxOutputSize). If the source stream is | |
966 | * malformed, the function will stop decoding and return a | |
967 | * negative result, indicating the byte position of the faulty | |
968 | * instruction. This function never writes beyond dest + | |
969 | * maxOutputSize, and is therefore protected against malicious | |
970 | * data packets. | |
971 | * note : Destination buffer must be already allocated. | |
972 | * This version is slightly slower than real_LZ4_uncompress() | |
973 | * | |
974 | */ | |
9759c60f | 975 | |
d6c1bbdd RE |
976 | /* |
977 | * Note: In upstream code, LZ4_uncompress_unknownOutputSize is now a legacy | |
978 | * wrapper for LZ4_decompress_safe which is a wrapper for | |
979 | * LZ4_decompress_generic; this wrapper flattens that, rather than | |
980 | * rewriting the callers. | |
981 | */ | |
982 | int LZ4_uncompress_unknownOutputSize(const char* source, char* dest, int compressedSize, int maxDecompressedSize) | |
9759c60f | 983 | { |
d6c1bbdd RE |
984 | return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize, |
985 | endOnInputSize, decode_full_block, noDict, | |
986 | (BYTE*)dest, NULL, 0); | |
9759c60f | 987 | } |