]>
Commit | Line | Data |
---|---|---|
9759c60f ED |
1 | /* |
2 | * LZ4 - Fast LZ compression algorithm | |
3 | * Header File | |
4 | * Copyright (C) 2011-2013, Yann Collet. | |
5 | * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) | |
6 | * | |
7 | * Redistribution and use in source and binary forms, with or without | |
8 | * modification, are permitted provided that the following conditions are | |
9 | * met: | |
10 | * | |
11 | * * Redistributions of source code must retain the above copyright | |
12 | * notice, this list of conditions and the following disclaimer. | |
13 | * * Redistributions in binary form must reproduce the above | |
14 | * copyright notice, this list of conditions and the following disclaimer | |
15 | * in the documentation and/or other materials provided with the | |
16 | * distribution. | |
17 | * | |
18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
19 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
20 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
21 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
22 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
23 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
24 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
25 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
26 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
27 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
28 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
29 | * | |
30 | * You can contact the author at : | |
31 | * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html | |
32 | * - LZ4 source repository : http://code.google.com/p/lz4/ | |
33 | */ | |
34 | ||
35 | #include <sys/zfs_context.h> | |
36 | ||
37 | static int real_LZ4_compress(const char *source, char *dest, int isize, | |
38 | int osize); | |
39 | static int LZ4_uncompress_unknownOutputSize(const char *source, char *dest, | |
40 | int isize, int maxOutputSize); | |
41 | static int LZ4_compressCtx(void *ctx, const char *source, char *dest, | |
42 | int isize, int osize); | |
43 | static int LZ4_compress64kCtx(void *ctx, const char *source, char *dest, | |
44 | int isize, int osize); | |
45 | ||
46 | static kmem_cache_t *lz4_cache; | |
47 | ||
48 | /*ARGSUSED*/ | |
49 | size_t | |
d1d7e268 MK |
50 | lz4_compress_zfs(void *s_start, void *d_start, size_t s_len, |
51 | size_t d_len, int n) | |
9759c60f ED |
52 | { |
53 | uint32_t bufsiz; | |
54 | char *dest = d_start; | |
55 | ||
56 | ASSERT(d_len >= sizeof (bufsiz)); | |
57 | ||
58 | bufsiz = real_LZ4_compress(s_start, &dest[sizeof (bufsiz)], s_len, | |
59 | d_len - sizeof (bufsiz)); | |
60 | ||
61 | /* Signal an error if the compression routine returned zero. */ | |
62 | if (bufsiz == 0) | |
63 | return (s_len); | |
64 | ||
65 | /* | |
4e33ba4c | 66 | * Encode the compressed buffer size at the start. We'll need this in |
9759c60f ED |
67 | * decompression to counter the effects of padding which might be |
68 | * added to the compressed buffer and which, if unhandled, would | |
69 | * confuse the hell out of our decompression function. | |
70 | */ | |
71 | *(uint32_t *)dest = BE_32(bufsiz); | |
72 | ||
73 | return (bufsiz + sizeof (bufsiz)); | |
74 | } | |
75 | ||
76 | /*ARGSUSED*/ | |
77 | int | |
d1d7e268 MK |
78 | lz4_decompress_zfs(void *s_start, void *d_start, size_t s_len, |
79 | size_t d_len, int n) | |
9759c60f ED |
80 | { |
81 | const char *src = s_start; | |
82 | uint32_t bufsiz = BE_IN32(src); | |
83 | ||
84 | /* invalid compressed buffer size encoded at start */ | |
85 | if (bufsiz + sizeof (bufsiz) > s_len) | |
86 | return (1); | |
87 | ||
88 | /* | |
89 | * Returns 0 on success (decompression function returned non-negative) | |
a7004725 | 90 | * and non-zero on failure (decompression function returned negative). |
9759c60f ED |
91 | */ |
92 | return (LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)], | |
93 | d_start, bufsiz, d_len) < 0); | |
94 | } | |
95 | ||
96 | /* | |
97 | * LZ4 API Description: | |
98 | * | |
99 | * Simple Functions: | |
100 | * real_LZ4_compress() : | |
101 | * isize : is the input size. Max supported value is ~1.9GB | |
102 | * return : the number of bytes written in buffer dest | |
103 | * or 0 if the compression fails (if LZ4_COMPRESSMIN is set). | |
104 | * note : destination buffer must be already allocated. | |
105 | * destination buffer must be sized to handle worst cases | |
106 | * situations (input data not compressible) worst case size | |
107 | * evaluation is provided by function LZ4_compressBound(). | |
108 | * | |
109 | * real_LZ4_uncompress() : | |
110 | * osize : is the output size, therefore the original size | |
111 | * return : the number of bytes read in the source buffer. | |
112 | * If the source stream is malformed, the function will stop | |
113 | * decoding and return a negative result, indicating the byte | |
114 | * position of the faulty instruction. This function never | |
115 | * writes beyond dest + osize, and is therefore protected | |
116 | * against malicious data packets. | |
117 | * note : destination buffer must be already allocated | |
4240dc33 TC |
118 | * note : real_LZ4_uncompress() is not used in ZFS so its code |
119 | * is not present here. | |
9759c60f ED |
120 | * |
121 | * Advanced Functions | |
122 | * | |
123 | * LZ4_compressBound() : | |
124 | * Provides the maximum size that LZ4 may output in a "worst case" | |
125 | * scenario (input data not compressible) primarily useful for memory | |
126 | * allocation of output buffer. | |
127 | * | |
128 | * isize : is the input size. Max supported value is ~1.9GB | |
129 | * return : maximum output size in a "worst case" scenario | |
130 | * note : this function is limited by "int" range (2^31-1) | |
131 | * | |
132 | * LZ4_uncompress_unknownOutputSize() : | |
133 | * isize : is the input size, therefore the compressed size | |
134 | * maxOutputSize : is the size of the destination buffer (which must be | |
135 | * already allocated) | |
136 | * return : the number of bytes decoded in the destination buffer | |
137 | * (necessarily <= maxOutputSize). If the source stream is | |
138 | * malformed, the function will stop decoding and return a | |
139 | * negative result, indicating the byte position of the faulty | |
140 | * instruction. This function never writes beyond dest + | |
141 | * maxOutputSize, and is therefore protected against malicious | |
142 | * data packets. | |
143 | * note : Destination buffer must be already allocated. | |
144 | * This version is slightly slower than real_LZ4_uncompress() | |
145 | * | |
146 | * LZ4_compressCtx() : | |
147 | * This function explicitly handles the CTX memory structure. | |
148 | * | |
149 | * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated | |
d1d7e268 MK |
150 | * by the caller (either on the stack or using kmem_cache_alloc). Passing |
151 | * NULL isn't valid. | |
9759c60f ED |
152 | * |
153 | * LZ4_compress64kCtx() : | |
154 | * Same as LZ4_compressCtx(), but specific to small inputs (<64KB). | |
155 | * isize *Must* be <64KB, otherwise the output will be corrupted. | |
156 | * | |
157 | * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated | |
d1d7e268 MK |
158 | * by the caller (either on the stack or using kmem_cache_alloc). Passing |
159 | * NULL isn't valid. | |
9759c60f ED |
160 | */ |
161 | ||
162 | /* | |
163 | * Tuning parameters | |
164 | */ | |
165 | ||
166 | /* | |
167 | * COMPRESSIONLEVEL: Increasing this value improves compression ratio | |
168 | * Lowering this value reduces memory usage. Reduced memory usage | |
169 | * typically improves speed, due to cache effect (ex: L1 32KB for Intel, | |
170 | * L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes | |
171 | * (examples : 12 -> 16KB ; 17 -> 512KB) | |
172 | */ | |
173 | #define COMPRESSIONLEVEL 12 | |
174 | ||
175 | /* | |
176 | * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the | |
177 | * algorithm skip faster data segments considered "incompressible". | |
178 | * This may decrease compression ratio dramatically, but will be | |
179 | * faster on incompressible data. Increasing this value will make | |
180 | * the algorithm search more before declaring a segment "incompressible". | |
181 | * This could improve compression a bit, but will be slower on | |
182 | * incompressible data. The default value (6) is recommended. | |
183 | */ | |
184 | #define NOTCOMPRESSIBLE_CONFIRMATION 6 | |
185 | ||
186 | /* | |
187 | * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE: This will provide a boost to | |
188 | * performance for big endian cpu, but the resulting compressed stream | |
189 | * will be incompatible with little-endian CPU. You can set this option | |
190 | * to 1 in situations where data will stay within closed environment. | |
191 | * This option is useless on Little_Endian CPU (such as x86). | |
192 | */ | |
193 | /* #define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */ | |
194 | ||
195 | /* | |
196 | * CPU Feature Detection | |
197 | */ | |
198 | ||
199 | /* 32 or 64 bits ? */ | |
de39ec11 | 200 | #if defined(_LP64) |
9759c60f ED |
201 | #define LZ4_ARCH64 1 |
202 | #else | |
203 | #define LZ4_ARCH64 0 | |
204 | #endif | |
205 | ||
206 | /* | |
207 | * Little Endian or Big Endian? | |
4e33ba4c | 208 | * Note: overwrite the below #define if you know your architecture endianness. |
9759c60f | 209 | */ |
de39ec11 | 210 | #if defined(_BIG_ENDIAN) |
9759c60f ED |
211 | #define LZ4_BIG_ENDIAN 1 |
212 | #else | |
213 | /* | |
214 | * Little Endian assumed. PDP Endian and other very rare endian format | |
215 | * are unsupported. | |
216 | */ | |
de39ec11 | 217 | #undef LZ4_BIG_ENDIAN |
9759c60f ED |
218 | #endif |
219 | ||
220 | /* | |
221 | * Unaligned memory access is automatically enabled for "common" CPU, | |
222 | * such as x86. For others CPU, the compiler will be more cautious, and | |
223 | * insert extra code to ensure aligned access is respected. If you know | |
224 | * your target CPU supports unaligned memory access, you may want to | |
225 | * force this option manually to improve performance | |
226 | */ | |
227 | #if defined(__ARM_FEATURE_UNALIGNED) | |
228 | #define LZ4_FORCE_UNALIGNED_ACCESS 1 | |
229 | #endif | |
230 | ||
231 | /* | |
232 | * Illumos : we can't use GCC's __builtin_ctz family of builtins in the | |
233 | * kernel | |
234 | * Linux : we can use GCC's __builtin_ctz family of builtins in the | |
235 | * kernel | |
236 | */ | |
237 | #undef LZ4_FORCE_SW_BITCOUNT | |
4fcc4379 BB |
238 | #if defined(__sparc) |
239 | #define LZ4_FORCE_SW_BITCOUNT | |
240 | #endif | |
9759c60f ED |
241 | |
242 | /* | |
243 | * Compiler Options | |
244 | */ | |
245 | /* Disable restrict */ | |
246 | #define restrict | |
247 | ||
1c24b699 RY |
248 | /* |
249 | * Linux : GCC_VERSION is defined as of 3.9-rc1, so undefine it. | |
250 | * torvalds/linux@3f3f8d2f48acfd8ed3b8e6b7377935da57b27b16 | |
251 | */ | |
252 | #ifdef GCC_VERSION | |
253 | #undef GCC_VERSION | |
254 | #endif | |
255 | ||
9759c60f ED |
256 | #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) |
257 | ||
258 | #if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__) | |
259 | #define expect(expr, value) (__builtin_expect((expr), (value))) | |
260 | #else | |
261 | #define expect(expr, value) (expr) | |
262 | #endif | |
263 | ||
264 | #ifndef likely | |
265 | #define likely(expr) expect((expr) != 0, 1) | |
266 | #endif | |
267 | ||
268 | #ifndef unlikely | |
269 | #define unlikely(expr) expect((expr) != 0, 0) | |
270 | #endif | |
271 | ||
d1d7e268 | 272 | #define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | \ |
9759c60f ED |
273 | (((x) & 0xffu) << 8))) |
274 | ||
275 | /* Basic types */ | |
276 | #define BYTE uint8_t | |
277 | #define U16 uint16_t | |
278 | #define U32 uint32_t | |
279 | #define S32 int32_t | |
280 | #define U64 uint64_t | |
281 | ||
282 | #ifndef LZ4_FORCE_UNALIGNED_ACCESS | |
283 | #pragma pack(1) | |
284 | #endif | |
285 | ||
286 | typedef struct _U16_S { | |
287 | U16 v; | |
288 | } U16_S; | |
289 | typedef struct _U32_S { | |
290 | U32 v; | |
291 | } U32_S; | |
292 | typedef struct _U64_S { | |
293 | U64 v; | |
294 | } U64_S; | |
295 | ||
296 | #ifndef LZ4_FORCE_UNALIGNED_ACCESS | |
297 | #pragma pack() | |
298 | #endif | |
299 | ||
300 | #define A64(x) (((U64_S *)(x))->v) | |
301 | #define A32(x) (((U32_S *)(x))->v) | |
302 | #define A16(x) (((U16_S *)(x))->v) | |
303 | ||
304 | /* | |
305 | * Constants | |
306 | */ | |
307 | #define MINMATCH 4 | |
308 | ||
309 | #define HASH_LOG COMPRESSIONLEVEL | |
310 | #define HASHTABLESIZE (1 << HASH_LOG) | |
311 | #define HASH_MASK (HASHTABLESIZE - 1) | |
312 | ||
313 | #define SKIPSTRENGTH (NOTCOMPRESSIBLE_CONFIRMATION > 2 ? \ | |
314 | NOTCOMPRESSIBLE_CONFIRMATION : 2) | |
315 | ||
316 | #define COPYLENGTH 8 | |
317 | #define LASTLITERALS 5 | |
318 | #define MFLIMIT (COPYLENGTH + MINMATCH) | |
319 | #define MINLENGTH (MFLIMIT + 1) | |
320 | ||
321 | #define MAXD_LOG 16 | |
322 | #define MAX_DISTANCE ((1 << MAXD_LOG) - 1) | |
323 | ||
324 | #define ML_BITS 4 | |
325 | #define ML_MASK ((1U<<ML_BITS)-1) | |
326 | #define RUN_BITS (8-ML_BITS) | |
327 | #define RUN_MASK ((1U<<RUN_BITS)-1) | |
328 | ||
329 | ||
330 | /* | |
331 | * Architecture-specific macros | |
332 | */ | |
333 | #if LZ4_ARCH64 | |
334 | #define STEPSIZE 8 | |
335 | #define UARCH U64 | |
336 | #define AARCH A64 | |
337 | #define LZ4_COPYSTEP(s, d) A64(d) = A64(s); d += 8; s += 8; | |
338 | #define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d) | |
339 | #define LZ4_SECURECOPY(s, d, e) if (d < e) LZ4_WILDCOPY(s, d, e) | |
340 | #define HTYPE U32 | |
341 | #define INITBASE(base) const BYTE* const base = ip | |
342 | #else /* !LZ4_ARCH64 */ | |
343 | #define STEPSIZE 4 | |
344 | #define UARCH U32 | |
345 | #define AARCH A32 | |
346 | #define LZ4_COPYSTEP(s, d) A32(d) = A32(s); d += 4; s += 4; | |
347 | #define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d); LZ4_COPYSTEP(s, d); | |
348 | #define LZ4_SECURECOPY LZ4_WILDCOPY | |
349 | #define HTYPE const BYTE * | |
350 | #define INITBASE(base) const int base = 0 | |
351 | #endif /* !LZ4_ARCH64 */ | |
352 | ||
353 | #if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE)) | |
354 | #define LZ4_READ_LITTLEENDIAN_16(d, s, p) \ | |
355 | { U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; } | |
356 | #define LZ4_WRITE_LITTLEENDIAN_16(p, i) \ | |
357 | { U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p += 2; } | |
358 | #else | |
359 | #define LZ4_READ_LITTLEENDIAN_16(d, s, p) { d = (s) - A16(p); } | |
360 | #define LZ4_WRITE_LITTLEENDIAN_16(p, v) { A16(p) = v; p += 2; } | |
361 | #endif | |
362 | ||
363 | ||
364 | /* Local structures */ | |
365 | struct refTables { | |
366 | HTYPE hashTable[HASHTABLESIZE]; | |
367 | }; | |
368 | ||
369 | ||
370 | /* Macros */ | |
371 | #define LZ4_HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH * 8) - \ | |
372 | HASH_LOG)) | |
373 | #define LZ4_HASH_VALUE(p) LZ4_HASH_FUNCTION(A32(p)) | |
374 | #define LZ4_WILDCOPY(s, d, e) do { LZ4_COPYPACKET(s, d) } while (d < e); | |
375 | #define LZ4_BLINDCOPY(s, d, l) { BYTE* e = (d) + l; LZ4_WILDCOPY(s, d, e); \ | |
376 | d = e; } | |
377 | ||
378 | ||
379 | /* Private functions */ | |
380 | #if LZ4_ARCH64 | |
381 | ||
382 | static inline int | |
383 | LZ4_NbCommonBytes(register U64 val) | |
384 | { | |
385 | #if defined(LZ4_BIG_ENDIAN) | |
386 | #if defined(__GNUC__) && (GCC_VERSION >= 304) && \ | |
387 | !defined(LZ4_FORCE_SW_BITCOUNT) | |
388 | return (__builtin_clzll(val) >> 3); | |
389 | #else | |
390 | int r; | |
391 | if (!(val >> 32)) { | |
392 | r = 4; | |
393 | } else { | |
394 | r = 0; | |
395 | val >>= 32; | |
396 | } | |
397 | if (!(val >> 16)) { | |
398 | r += 2; | |
399 | val >>= 8; | |
400 | } else { | |
401 | val >>= 24; | |
402 | } | |
403 | r += (!val); | |
404 | return (r); | |
405 | #endif | |
406 | #else | |
407 | #if defined(__GNUC__) && (GCC_VERSION >= 304) && \ | |
408 | !defined(LZ4_FORCE_SW_BITCOUNT) | |
409 | return (__builtin_ctzll(val) >> 3); | |
410 | #else | |
411 | static const int DeBruijnBytePos[64] = | |
412 | { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, | |
413 | 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, | |
414 | 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, | |
415 | 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 | |
416 | }; | |
417 | return DeBruijnBytePos[((U64) ((val & -val) * 0x0218A392CDABBD3F)) >> | |
418 | 58]; | |
419 | #endif | |
420 | #endif | |
421 | } | |
422 | ||
423 | #else | |
424 | ||
425 | static inline int | |
426 | LZ4_NbCommonBytes(register U32 val) | |
427 | { | |
428 | #if defined(LZ4_BIG_ENDIAN) | |
429 | #if defined(__GNUC__) && (GCC_VERSION >= 304) && \ | |
430 | !defined(LZ4_FORCE_SW_BITCOUNT) | |
431 | return (__builtin_clz(val) >> 3); | |
432 | #else | |
433 | int r; | |
434 | if (!(val >> 16)) { | |
435 | r = 2; | |
436 | val >>= 8; | |
437 | } else { | |
438 | r = 0; | |
439 | val >>= 24; | |
440 | } | |
441 | r += (!val); | |
442 | return (r); | |
443 | #endif | |
444 | #else | |
445 | #if defined(__GNUC__) && (GCC_VERSION >= 304) && \ | |
446 | !defined(LZ4_FORCE_SW_BITCOUNT) | |
447 | return (__builtin_ctz(val) >> 3); | |
448 | #else | |
449 | static const int DeBruijnBytePos[32] = { | |
450 | 0, 0, 3, 0, 3, 1, 3, 0, | |
451 | 3, 2, 2, 1, 3, 2, 0, 1, | |
452 | 3, 3, 1, 2, 2, 2, 2, 0, | |
453 | 3, 1, 2, 0, 1, 0, 1, 1 | |
454 | }; | |
455 | return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >> | |
456 | 27]; | |
457 | #endif | |
458 | #endif | |
459 | } | |
460 | ||
461 | #endif | |
462 | ||
463 | /* Compression functions */ | |
464 | ||
465 | /*ARGSUSED*/ | |
466 | static int | |
467 | LZ4_compressCtx(void *ctx, const char *source, char *dest, int isize, | |
468 | int osize) | |
469 | { | |
470 | struct refTables *srt = (struct refTables *)ctx; | |
471 | HTYPE *HashTable = (HTYPE *) (srt->hashTable); | |
472 | ||
473 | const BYTE *ip = (BYTE *) source; | |
474 | INITBASE(base); | |
475 | const BYTE *anchor = ip; | |
476 | const BYTE *const iend = ip + isize; | |
477 | const BYTE *const oend = (BYTE *) dest + osize; | |
478 | const BYTE *const mflimit = iend - MFLIMIT; | |
479 | #define matchlimit (iend - LASTLITERALS) | |
480 | ||
481 | BYTE *op = (BYTE *) dest; | |
482 | ||
483 | int len, length; | |
484 | const int skipStrength = SKIPSTRENGTH; | |
485 | U32 forwardH; | |
486 | ||
487 | ||
488 | /* Init */ | |
489 | if (isize < MINLENGTH) | |
490 | goto _last_literals; | |
491 | ||
492 | /* First Byte */ | |
493 | HashTable[LZ4_HASH_VALUE(ip)] = ip - base; | |
494 | ip++; | |
495 | forwardH = LZ4_HASH_VALUE(ip); | |
496 | ||
497 | /* Main Loop */ | |
498 | for (;;) { | |
499 | int findMatchAttempts = (1U << skipStrength) + 3; | |
500 | const BYTE *forwardIp = ip; | |
501 | const BYTE *ref; | |
502 | BYTE *token; | |
503 | ||
504 | /* Find a match */ | |
505 | do { | |
506 | U32 h = forwardH; | |
507 | int step = findMatchAttempts++ >> skipStrength; | |
508 | ip = forwardIp; | |
509 | forwardIp = ip + step; | |
510 | ||
511 | if (unlikely(forwardIp > mflimit)) { | |
512 | goto _last_literals; | |
513 | } | |
514 | ||
515 | forwardH = LZ4_HASH_VALUE(forwardIp); | |
516 | ref = base + HashTable[h]; | |
517 | HashTable[h] = ip - base; | |
518 | ||
519 | } while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip))); | |
520 | ||
521 | /* Catch up */ | |
522 | while ((ip > anchor) && (ref > (BYTE *) source) && | |
523 | unlikely(ip[-1] == ref[-1])) { | |
524 | ip--; | |
525 | ref--; | |
526 | } | |
527 | ||
528 | /* Encode Literal length */ | |
529 | length = ip - anchor; | |
530 | token = op++; | |
531 | ||
532 | /* Check output limit */ | |
533 | if (unlikely(op + length + (2 + 1 + LASTLITERALS) + | |
534 | (length >> 8) > oend)) | |
535 | return (0); | |
536 | ||
537 | if (length >= (int)RUN_MASK) { | |
538 | *token = (RUN_MASK << ML_BITS); | |
539 | len = length - RUN_MASK; | |
540 | for (; len > 254; len -= 255) | |
541 | *op++ = 255; | |
542 | *op++ = (BYTE)len; | |
543 | } else | |
544 | *token = (length << ML_BITS); | |
545 | ||
546 | /* Copy Literals */ | |
547 | LZ4_BLINDCOPY(anchor, op, length); | |
548 | ||
549 | _next_match: | |
550 | /* Encode Offset */ | |
551 | LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref); | |
552 | ||
553 | /* Start Counting */ | |
554 | ip += MINMATCH; | |
555 | ref += MINMATCH; /* MinMatch verified */ | |
556 | anchor = ip; | |
557 | while (likely(ip < matchlimit - (STEPSIZE - 1))) { | |
558 | UARCH diff = AARCH(ref) ^ AARCH(ip); | |
559 | if (!diff) { | |
560 | ip += STEPSIZE; | |
561 | ref += STEPSIZE; | |
562 | continue; | |
563 | } | |
564 | ip += LZ4_NbCommonBytes(diff); | |
565 | goto _endCount; | |
566 | } | |
567 | #if LZ4_ARCH64 | |
568 | if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) { | |
569 | ip += 4; | |
570 | ref += 4; | |
571 | } | |
572 | #endif | |
573 | if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) { | |
574 | ip += 2; | |
575 | ref += 2; | |
576 | } | |
577 | if ((ip < matchlimit) && (*ref == *ip)) | |
578 | ip++; | |
579 | _endCount: | |
580 | ||
581 | /* Encode MatchLength */ | |
582 | len = (ip - anchor); | |
583 | /* Check output limit */ | |
584 | if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)) | |
585 | return (0); | |
586 | if (len >= (int)ML_MASK) { | |
587 | *token += ML_MASK; | |
588 | len -= ML_MASK; | |
589 | for (; len > 509; len -= 510) { | |
590 | *op++ = 255; | |
591 | *op++ = 255; | |
592 | } | |
593 | if (len > 254) { | |
594 | len -= 255; | |
595 | *op++ = 255; | |
596 | } | |
597 | *op++ = (BYTE)len; | |
598 | } else | |
599 | *token += len; | |
600 | ||
601 | /* Test end of chunk */ | |
602 | if (ip > mflimit) { | |
603 | anchor = ip; | |
604 | break; | |
605 | } | |
606 | /* Fill table */ | |
607 | HashTable[LZ4_HASH_VALUE(ip - 2)] = ip - 2 - base; | |
608 | ||
609 | /* Test next position */ | |
610 | ref = base + HashTable[LZ4_HASH_VALUE(ip)]; | |
611 | HashTable[LZ4_HASH_VALUE(ip)] = ip - base; | |
612 | if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) { | |
613 | token = op++; | |
614 | *token = 0; | |
615 | goto _next_match; | |
616 | } | |
617 | /* Prepare next loop */ | |
618 | anchor = ip++; | |
619 | forwardH = LZ4_HASH_VALUE(ip); | |
620 | } | |
621 | ||
622 | _last_literals: | |
623 | /* Encode Last Literals */ | |
624 | { | |
625 | int lastRun = iend - anchor; | |
626 | if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) > | |
627 | oend) | |
628 | return (0); | |
629 | if (lastRun >= (int)RUN_MASK) { | |
630 | *op++ = (RUN_MASK << ML_BITS); | |
631 | lastRun -= RUN_MASK; | |
632 | for (; lastRun > 254; lastRun -= 255) { | |
633 | *op++ = 255; | |
634 | } | |
635 | *op++ = (BYTE)lastRun; | |
636 | } else | |
637 | *op++ = (lastRun << ML_BITS); | |
638 | (void) memcpy(op, anchor, iend - anchor); | |
639 | op += iend - anchor; | |
640 | } | |
641 | ||
642 | /* End */ | |
643 | return (int)(((char *)op) - dest); | |
644 | } | |
645 | ||
646 | ||
647 | ||
648 | /* Note : this function is valid only if isize < LZ4_64KLIMIT */ | |
649 | #define LZ4_64KLIMIT ((1 << 16) + (MFLIMIT - 1)) | |
650 | #define HASHLOG64K (HASH_LOG + 1) | |
651 | #define HASH64KTABLESIZE (1U << HASHLOG64K) | |
652 | #define LZ4_HASH64K_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8) - \ | |
653 | HASHLOG64K)) | |
654 | #define LZ4_HASH64K_VALUE(p) LZ4_HASH64K_FUNCTION(A32(p)) | |
655 | ||
656 | /*ARGSUSED*/ | |
657 | static int | |
658 | LZ4_compress64kCtx(void *ctx, const char *source, char *dest, int isize, | |
659 | int osize) | |
660 | { | |
661 | struct refTables *srt = (struct refTables *)ctx; | |
662 | U16 *HashTable = (U16 *) (srt->hashTable); | |
663 | ||
664 | const BYTE *ip = (BYTE *) source; | |
665 | const BYTE *anchor = ip; | |
666 | const BYTE *const base = ip; | |
667 | const BYTE *const iend = ip + isize; | |
668 | const BYTE *const oend = (BYTE *) dest + osize; | |
669 | const BYTE *const mflimit = iend - MFLIMIT; | |
670 | #define matchlimit (iend - LASTLITERALS) | |
671 | ||
672 | BYTE *op = (BYTE *) dest; | |
673 | ||
674 | int len, length; | |
675 | const int skipStrength = SKIPSTRENGTH; | |
676 | U32 forwardH; | |
677 | ||
678 | /* Init */ | |
679 | if (isize < MINLENGTH) | |
680 | goto _last_literals; | |
681 | ||
682 | /* First Byte */ | |
683 | ip++; | |
684 | forwardH = LZ4_HASH64K_VALUE(ip); | |
685 | ||
686 | /* Main Loop */ | |
687 | for (;;) { | |
688 | int findMatchAttempts = (1U << skipStrength) + 3; | |
689 | const BYTE *forwardIp = ip; | |
690 | const BYTE *ref; | |
691 | BYTE *token; | |
692 | ||
693 | /* Find a match */ | |
694 | do { | |
695 | U32 h = forwardH; | |
696 | int step = findMatchAttempts++ >> skipStrength; | |
697 | ip = forwardIp; | |
698 | forwardIp = ip + step; | |
699 | ||
700 | if (forwardIp > mflimit) { | |
701 | goto _last_literals; | |
702 | } | |
703 | ||
704 | forwardH = LZ4_HASH64K_VALUE(forwardIp); | |
705 | ref = base + HashTable[h]; | |
706 | HashTable[h] = ip - base; | |
707 | ||
708 | } while (A32(ref) != A32(ip)); | |
709 | ||
710 | /* Catch up */ | |
711 | while ((ip > anchor) && (ref > (BYTE *) source) && | |
712 | (ip[-1] == ref[-1])) { | |
713 | ip--; | |
714 | ref--; | |
715 | } | |
716 | ||
717 | /* Encode Literal length */ | |
718 | length = ip - anchor; | |
719 | token = op++; | |
720 | ||
721 | /* Check output limit */ | |
722 | if (unlikely(op + length + (2 + 1 + LASTLITERALS) + | |
723 | (length >> 8) > oend)) | |
724 | return (0); | |
725 | ||
726 | if (length >= (int)RUN_MASK) { | |
727 | *token = (RUN_MASK << ML_BITS); | |
728 | len = length - RUN_MASK; | |
729 | for (; len > 254; len -= 255) | |
730 | *op++ = 255; | |
731 | *op++ = (BYTE)len; | |
732 | } else | |
733 | *token = (length << ML_BITS); | |
734 | ||
735 | /* Copy Literals */ | |
736 | LZ4_BLINDCOPY(anchor, op, length); | |
737 | ||
738 | _next_match: | |
739 | /* Encode Offset */ | |
740 | LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref); | |
741 | ||
742 | /* Start Counting */ | |
743 | ip += MINMATCH; | |
744 | ref += MINMATCH; /* MinMatch verified */ | |
745 | anchor = ip; | |
746 | while (ip < matchlimit - (STEPSIZE - 1)) { | |
747 | UARCH diff = AARCH(ref) ^ AARCH(ip); | |
748 | if (!diff) { | |
749 | ip += STEPSIZE; | |
750 | ref += STEPSIZE; | |
751 | continue; | |
752 | } | |
753 | ip += LZ4_NbCommonBytes(diff); | |
754 | goto _endCount; | |
755 | } | |
756 | #if LZ4_ARCH64 | |
757 | if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) { | |
758 | ip += 4; | |
759 | ref += 4; | |
760 | } | |
761 | #endif | |
762 | if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) { | |
763 | ip += 2; | |
764 | ref += 2; | |
765 | } | |
766 | if ((ip < matchlimit) && (*ref == *ip)) | |
767 | ip++; | |
768 | _endCount: | |
769 | ||
770 | /* Encode MatchLength */ | |
771 | len = (ip - anchor); | |
772 | /* Check output limit */ | |
773 | if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)) | |
774 | return (0); | |
775 | if (len >= (int)ML_MASK) { | |
776 | *token += ML_MASK; | |
777 | len -= ML_MASK; | |
778 | for (; len > 509; len -= 510) { | |
779 | *op++ = 255; | |
780 | *op++ = 255; | |
781 | } | |
782 | if (len > 254) { | |
783 | len -= 255; | |
784 | *op++ = 255; | |
785 | } | |
786 | *op++ = (BYTE)len; | |
787 | } else | |
788 | *token += len; | |
789 | ||
790 | /* Test end of chunk */ | |
791 | if (ip > mflimit) { | |
792 | anchor = ip; | |
793 | break; | |
794 | } | |
795 | /* Fill table */ | |
796 | HashTable[LZ4_HASH64K_VALUE(ip - 2)] = ip - 2 - base; | |
797 | ||
798 | /* Test next position */ | |
799 | ref = base + HashTable[LZ4_HASH64K_VALUE(ip)]; | |
800 | HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base; | |
801 | if (A32(ref) == A32(ip)) { | |
802 | token = op++; | |
803 | *token = 0; | |
804 | goto _next_match; | |
805 | } | |
806 | /* Prepare next loop */ | |
807 | anchor = ip++; | |
808 | forwardH = LZ4_HASH64K_VALUE(ip); | |
809 | } | |
810 | ||
811 | _last_literals: | |
812 | /* Encode Last Literals */ | |
813 | { | |
814 | int lastRun = iend - anchor; | |
815 | if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) > | |
816 | oend) | |
817 | return (0); | |
818 | if (lastRun >= (int)RUN_MASK) { | |
819 | *op++ = (RUN_MASK << ML_BITS); | |
820 | lastRun -= RUN_MASK; | |
821 | for (; lastRun > 254; lastRun -= 255) | |
822 | *op++ = 255; | |
823 | *op++ = (BYTE)lastRun; | |
824 | } else | |
825 | *op++ = (lastRun << ML_BITS); | |
826 | (void) memcpy(op, anchor, iend - anchor); | |
827 | op += iend - anchor; | |
828 | } | |
829 | ||
830 | /* End */ | |
831 | return (int)(((char *)op) - dest); | |
832 | } | |
833 | ||
834 | static int | |
835 | real_LZ4_compress(const char *source, char *dest, int isize, int osize) | |
836 | { | |
837 | void *ctx; | |
838 | int result; | |
839 | ||
840 | ASSERT(lz4_cache != NULL); | |
79c76d5b | 841 | ctx = kmem_cache_alloc(lz4_cache, KM_SLEEP); |
9759c60f ED |
842 | |
843 | /* | |
844 | * out of kernel memory, gently fall through - this will disable | |
845 | * compression in zio_compress_data | |
846 | */ | |
847 | if (ctx == NULL) | |
848 | return (0); | |
849 | ||
850 | memset(ctx, 0, sizeof (struct refTables)); | |
851 | ||
852 | if (isize < LZ4_64KLIMIT) | |
853 | result = LZ4_compress64kCtx(ctx, source, dest, isize, osize); | |
854 | else | |
855 | result = LZ4_compressCtx(ctx, source, dest, isize, osize); | |
856 | ||
857 | kmem_cache_free(lz4_cache, ctx); | |
858 | return (result); | |
859 | } | |
860 | ||
861 | /* Decompression functions */ | |
862 | ||
863 | /* | |
864 | * Note: The decoding functions real_LZ4_uncompress() and | |
865 | * LZ4_uncompress_unknownOutputSize() are safe against "buffer overflow" | |
866 | * attack type. They will never write nor read outside of the provided | |
867 | * output buffers. LZ4_uncompress_unknownOutputSize() also insures that | |
868 | * it will never read outside of the input buffer. A corrupted input | |
869 | * will produce an error result, a negative int, indicating the position | |
870 | * of the error within input stream. | |
4240dc33 TC |
871 | * |
872 | * Note[2]: real_LZ4_uncompress(), referred to above, is not used in ZFS so | |
873 | * its code is not present here. | |
9759c60f ED |
874 | */ |
875 | ||
f871ab6e FS |
876 | static const int dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0}; |
877 | #if LZ4_ARCH64 | |
878 | static const int dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3}; | |
879 | #endif | |
880 | ||
9759c60f ED |
881 | static int |
882 | LZ4_uncompress_unknownOutputSize(const char *source, char *dest, int isize, | |
883 | int maxOutputSize) | |
884 | { | |
885 | /* Local Variables */ | |
886 | const BYTE *restrict ip = (const BYTE *) source; | |
887 | const BYTE *const iend = ip + isize; | |
888 | const BYTE *ref; | |
889 | ||
890 | BYTE *op = (BYTE *) dest; | |
891 | BYTE *const oend = op + maxOutputSize; | |
892 | BYTE *cpy; | |
893 | ||
9759c60f ED |
894 | /* Main Loop */ |
895 | while (ip < iend) { | |
896 | unsigned token; | |
897 | size_t length; | |
898 | ||
899 | /* get runlength */ | |
900 | token = *ip++; | |
901 | if ((length = (token >> ML_BITS)) == RUN_MASK) { | |
902 | int s = 255; | |
903 | while ((ip < iend) && (s == 255)) { | |
904 | s = *ip++; | |
f871ab6e FS |
905 | if (unlikely(length > (size_t)(length + s))) |
906 | goto _output_error; | |
9759c60f ED |
907 | length += s; |
908 | } | |
909 | } | |
910 | /* copy literals */ | |
911 | cpy = op + length; | |
ee471228 DM |
912 | /* CORNER-CASE: cpy might overflow. */ |
913 | if (cpy < op) | |
914 | goto _output_error; /* cpy was overflowed, bail! */ | |
9759c60f ED |
915 | if ((cpy > oend - COPYLENGTH) || |
916 | (ip + length > iend - COPYLENGTH)) { | |
917 | if (cpy > oend) | |
918 | /* Error: writes beyond output buffer */ | |
919 | goto _output_error; | |
920 | if (ip + length != iend) | |
921 | /* | |
922 | * Error: LZ4 format requires to consume all | |
923 | * input at this stage | |
924 | */ | |
925 | goto _output_error; | |
926 | (void) memcpy(op, ip, length); | |
927 | op += length; | |
928 | /* Necessarily EOF, due to parsing restrictions */ | |
929 | break; | |
930 | } | |
931 | LZ4_WILDCOPY(ip, op, cpy); | |
932 | ip -= (op - cpy); | |
933 | op = cpy; | |
934 | ||
935 | /* get offset */ | |
936 | LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip); | |
937 | ip += 2; | |
938 | if (ref < (BYTE * const) dest) | |
939 | /* | |
940 | * Error: offset creates reference outside of | |
941 | * destination buffer | |
942 | */ | |
943 | goto _output_error; | |
944 | ||
945 | /* get matchlength */ | |
946 | if ((length = (token & ML_MASK)) == ML_MASK) { | |
947 | while (ip < iend) { | |
948 | int s = *ip++; | |
f871ab6e FS |
949 | if (unlikely(length > (size_t)(length + s))) |
950 | goto _output_error; | |
9759c60f ED |
951 | length += s; |
952 | if (s == 255) | |
953 | continue; | |
954 | break; | |
955 | } | |
956 | } | |
957 | /* copy repeated sequence */ | |
958 | if (unlikely(op - ref < STEPSIZE)) { | |
959 | #if LZ4_ARCH64 | |
f871ab6e | 960 | int dec64 = dec64table[op - ref]; |
9759c60f ED |
961 | #else |
962 | const int dec64 = 0; | |
963 | #endif | |
964 | op[0] = ref[0]; | |
965 | op[1] = ref[1]; | |
966 | op[2] = ref[2]; | |
967 | op[3] = ref[3]; | |
968 | op += 4; | |
969 | ref += 4; | |
f871ab6e | 970 | ref -= dec32table[op - ref]; |
9759c60f ED |
971 | A32(op) = A32(ref); |
972 | op += STEPSIZE - 4; | |
973 | ref -= dec64; | |
974 | } else { | |
975 | LZ4_COPYSTEP(ref, op); | |
976 | } | |
977 | cpy = op + length - (STEPSIZE - 4); | |
978 | if (cpy > oend - COPYLENGTH) { | |
979 | if (cpy > oend) | |
980 | /* | |
981 | * Error: request to write outside of | |
982 | * destination buffer | |
983 | */ | |
984 | goto _output_error; | |
f871ab6e FS |
985 | #if LZ4_ARCH64 |
986 | if ((ref + COPYLENGTH) > oend) | |
987 | #else | |
988 | if ((ref + COPYLENGTH) > oend || | |
989 | (op + COPYLENGTH) > oend) | |
990 | #endif | |
991 | goto _output_error; | |
9759c60f ED |
992 | LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH)); |
993 | while (op < cpy) | |
994 | *op++ = *ref++; | |
995 | op = cpy; | |
996 | if (op == oend) | |
997 | /* | |
998 | * Check EOF (should never happen, since | |
999 | * last 5 bytes are supposed to be literals) | |
1000 | */ | |
1001 | goto _output_error; | |
1002 | continue; | |
1003 | } | |
1004 | LZ4_SECURECOPY(ref, op, cpy); | |
1005 | op = cpy; /* correction */ | |
1006 | } | |
1007 | ||
1008 | /* end of decoding */ | |
1009 | return (int)(((char *)op) - dest); | |
1010 | ||
1011 | /* write overflow error detected */ | |
1012 | _output_error: | |
f871ab6e | 1013 | return (-1); |
9759c60f ED |
1014 | } |
1015 | ||
1016 | void | |
1017 | lz4_init(void) | |
1018 | { | |
1019 | lz4_cache = kmem_cache_create("lz4_cache", | |
02730c33 | 1020 | sizeof (struct refTables), 0, NULL, NULL, NULL, NULL, NULL, 0); |
9759c60f ED |
1021 | } |
1022 | ||
1023 | void | |
1024 | lz4_fini(void) | |
1025 | { | |
1026 | if (lz4_cache) { | |
1027 | kmem_cache_destroy(lz4_cache); | |
1028 | lz4_cache = NULL; | |
1029 | } | |
1030 | } |