]>
Commit | Line | Data |
---|---|---|
9759c60f ED |
1 | /* |
2 | * LZ4 - Fast LZ compression algorithm | |
3 | * Header File | |
4 | * Copyright (C) 2011-2013, Yann Collet. | |
5 | * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) | |
6 | * | |
7 | * Redistribution and use in source and binary forms, with or without | |
8 | * modification, are permitted provided that the following conditions are | |
9 | * met: | |
10 | * | |
11 | * * Redistributions of source code must retain the above copyright | |
12 | * notice, this list of conditions and the following disclaimer. | |
13 | * * Redistributions in binary form must reproduce the above | |
14 | * copyright notice, this list of conditions and the following disclaimer | |
15 | * in the documentation and/or other materials provided with the | |
16 | * distribution. | |
17 | * | |
18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
19 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
20 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
21 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
22 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
23 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
24 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
25 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
26 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
27 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
28 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
29 | * | |
30 | * You can contact the author at : | |
31 | * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html | |
32 | * - LZ4 source repository : http://code.google.com/p/lz4/ | |
33 | */ | |
34 | ||
35 | #include <sys/zfs_context.h> | |
36 | ||
37 | static int real_LZ4_compress(const char *source, char *dest, int isize, | |
38 | int osize); | |
39 | static int LZ4_uncompress_unknownOutputSize(const char *source, char *dest, | |
40 | int isize, int maxOutputSize); | |
41 | static int LZ4_compressCtx(void *ctx, const char *source, char *dest, | |
42 | int isize, int osize); | |
43 | static int LZ4_compress64kCtx(void *ctx, const char *source, char *dest, | |
44 | int isize, int osize); | |
45 | ||
46 | static kmem_cache_t *lz4_cache; | |
47 | ||
48 | /*ARGSUSED*/ | |
49 | size_t | |
50 | lz4_compress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n) | |
51 | { | |
52 | uint32_t bufsiz; | |
53 | char *dest = d_start; | |
54 | ||
55 | ASSERT(d_len >= sizeof (bufsiz)); | |
56 | ||
57 | bufsiz = real_LZ4_compress(s_start, &dest[sizeof (bufsiz)], s_len, | |
58 | d_len - sizeof (bufsiz)); | |
59 | ||
60 | /* Signal an error if the compression routine returned zero. */ | |
61 | if (bufsiz == 0) | |
62 | return (s_len); | |
63 | ||
64 | /* | |
65 | * Encode the compresed buffer size at the start. We'll need this in | |
66 | * decompression to counter the effects of padding which might be | |
67 | * added to the compressed buffer and which, if unhandled, would | |
68 | * confuse the hell out of our decompression function. | |
69 | */ | |
70 | *(uint32_t *)dest = BE_32(bufsiz); | |
71 | ||
72 | return (bufsiz + sizeof (bufsiz)); | |
73 | } | |
74 | ||
75 | /*ARGSUSED*/ | |
76 | int | |
77 | lz4_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n) | |
78 | { | |
79 | const char *src = s_start; | |
80 | uint32_t bufsiz = BE_IN32(src); | |
81 | ||
82 | /* invalid compressed buffer size encoded at start */ | |
83 | if (bufsiz + sizeof (bufsiz) > s_len) | |
84 | return (1); | |
85 | ||
86 | /* | |
87 | * Returns 0 on success (decompression function returned non-negative) | |
88 | * and non-zero on failure (decompression function returned negative. | |
89 | */ | |
90 | return (LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)], | |
91 | d_start, bufsiz, d_len) < 0); | |
92 | } | |
93 | ||
94 | /* | |
95 | * LZ4 API Description: | |
96 | * | |
97 | * Simple Functions: | |
98 | * real_LZ4_compress() : | |
99 | * isize : is the input size. Max supported value is ~1.9GB | |
100 | * return : the number of bytes written in buffer dest | |
101 | * or 0 if the compression fails (if LZ4_COMPRESSMIN is set). | |
102 | * note : destination buffer must be already allocated. | |
103 | * destination buffer must be sized to handle worst cases | |
104 | * situations (input data not compressible) worst case size | |
105 | * evaluation is provided by function LZ4_compressBound(). | |
106 | * | |
107 | * real_LZ4_uncompress() : | |
108 | * osize : is the output size, therefore the original size | |
109 | * return : the number of bytes read in the source buffer. | |
110 | * If the source stream is malformed, the function will stop | |
111 | * decoding and return a negative result, indicating the byte | |
112 | * position of the faulty instruction. This function never | |
113 | * writes beyond dest + osize, and is therefore protected | |
114 | * against malicious data packets. | |
115 | * note : destination buffer must be already allocated | |
116 | * | |
117 | * Advanced Functions | |
118 | * | |
119 | * LZ4_compressBound() : | |
120 | * Provides the maximum size that LZ4 may output in a "worst case" | |
121 | * scenario (input data not compressible) primarily useful for memory | |
122 | * allocation of output buffer. | |
123 | * | |
124 | * isize : is the input size. Max supported value is ~1.9GB | |
125 | * return : maximum output size in a "worst case" scenario | |
126 | * note : this function is limited by "int" range (2^31-1) | |
127 | * | |
128 | * LZ4_uncompress_unknownOutputSize() : | |
129 | * isize : is the input size, therefore the compressed size | |
130 | * maxOutputSize : is the size of the destination buffer (which must be | |
131 | * already allocated) | |
132 | * return : the number of bytes decoded in the destination buffer | |
133 | * (necessarily <= maxOutputSize). If the source stream is | |
134 | * malformed, the function will stop decoding and return a | |
135 | * negative result, indicating the byte position of the faulty | |
136 | * instruction. This function never writes beyond dest + | |
137 | * maxOutputSize, and is therefore protected against malicious | |
138 | * data packets. | |
139 | * note : Destination buffer must be already allocated. | |
140 | * This version is slightly slower than real_LZ4_uncompress() | |
141 | * | |
142 | * LZ4_compressCtx() : | |
143 | * This function explicitly handles the CTX memory structure. | |
144 | * | |
145 | * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated | |
146 | * by the caller (either on the stack or using kmem_cache_alloc). Passing NULL | |
147 | * isn't valid. | |
148 | * | |
149 | * LZ4_compress64kCtx() : | |
150 | * Same as LZ4_compressCtx(), but specific to small inputs (<64KB). | |
151 | * isize *Must* be <64KB, otherwise the output will be corrupted. | |
152 | * | |
153 | * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated | |
154 | * by the caller (either on the stack or using kmem_cache_alloc). Passing NULL | |
155 | * isn't valid. | |
156 | */ | |
157 | ||
158 | /* | |
159 | * Tuning parameters | |
160 | */ | |
161 | ||
162 | /* | |
163 | * COMPRESSIONLEVEL: Increasing this value improves compression ratio | |
164 | * Lowering this value reduces memory usage. Reduced memory usage | |
165 | * typically improves speed, due to cache effect (ex: L1 32KB for Intel, | |
166 | * L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes | |
167 | * (examples : 12 -> 16KB ; 17 -> 512KB) | |
168 | */ | |
169 | #define COMPRESSIONLEVEL 12 | |
170 | ||
171 | /* | |
172 | * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the | |
173 | * algorithm skip faster data segments considered "incompressible". | |
174 | * This may decrease compression ratio dramatically, but will be | |
175 | * faster on incompressible data. Increasing this value will make | |
176 | * the algorithm search more before declaring a segment "incompressible". | |
177 | * This could improve compression a bit, but will be slower on | |
178 | * incompressible data. The default value (6) is recommended. | |
179 | */ | |
180 | #define NOTCOMPRESSIBLE_CONFIRMATION 6 | |
181 | ||
182 | /* | |
183 | * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE: This will provide a boost to | |
184 | * performance for big endian cpu, but the resulting compressed stream | |
185 | * will be incompatible with little-endian CPU. You can set this option | |
186 | * to 1 in situations where data will stay within closed environment. | |
187 | * This option is useless on Little_Endian CPU (such as x86). | |
188 | */ | |
189 | /* #define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */ | |
190 | ||
191 | /* | |
192 | * CPU Feature Detection | |
193 | */ | |
194 | ||
195 | /* 32 or 64 bits ? */ | |
196 | #if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || \ | |
197 | defined(__amd64) || defined(__ppc64__) || defined(_WIN64) || \ | |
198 | defined(__LP64__) || defined(_LP64)) | |
199 | #define LZ4_ARCH64 1 | |
200 | #else | |
201 | #define LZ4_ARCH64 0 | |
202 | #endif | |
203 | ||
204 | /* | |
205 | * Little Endian or Big Endian? | |
206 | * Note: overwrite the below #define if you know your architecture endianess. | |
207 | */ | |
208 | #if (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || \ | |
209 | defined(_BIG_ENDIAN) || defined(_ARCH_PPC) || defined(__PPC__) || \ | |
210 | defined(__PPC) || defined(PPC) || defined(__powerpc__) || \ | |
211 | defined(__powerpc) || defined(powerpc) || \ | |
212 | ((defined(__BYTE_ORDER__)&&(__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)))) | |
213 | #define LZ4_BIG_ENDIAN 1 | |
214 | #else | |
215 | /* | |
216 | * Little Endian assumed. PDP Endian and other very rare endian format | |
217 | * are unsupported. | |
218 | */ | |
219 | #endif | |
220 | ||
221 | /* | |
222 | * Unaligned memory access is automatically enabled for "common" CPU, | |
223 | * such as x86. For others CPU, the compiler will be more cautious, and | |
224 | * insert extra code to ensure aligned access is respected. If you know | |
225 | * your target CPU supports unaligned memory access, you may want to | |
226 | * force this option manually to improve performance | |
227 | */ | |
228 | #if defined(__ARM_FEATURE_UNALIGNED) | |
229 | #define LZ4_FORCE_UNALIGNED_ACCESS 1 | |
230 | #endif | |
231 | ||
232 | /* | |
233 | * Illumos : we can't use GCC's __builtin_ctz family of builtins in the | |
234 | * kernel | |
235 | * Linux : we can use GCC's __builtin_ctz family of builtins in the | |
236 | * kernel | |
237 | */ | |
238 | #undef LZ4_FORCE_SW_BITCOUNT | |
239 | ||
240 | /* | |
241 | * Compiler Options | |
242 | */ | |
243 | /* Disable restrict */ | |
244 | #define restrict | |
245 | ||
246 | #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) | |
247 | ||
248 | #if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__) | |
249 | #define expect(expr, value) (__builtin_expect((expr), (value))) | |
250 | #else | |
251 | #define expect(expr, value) (expr) | |
252 | #endif | |
253 | ||
254 | #ifndef likely | |
255 | #define likely(expr) expect((expr) != 0, 1) | |
256 | #endif | |
257 | ||
258 | #ifndef unlikely | |
259 | #define unlikely(expr) expect((expr) != 0, 0) | |
260 | #endif | |
261 | ||
262 | #define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | \ | |
263 | (((x) & 0xffu) << 8))) | |
264 | ||
265 | /* Basic types */ | |
266 | #define BYTE uint8_t | |
267 | #define U16 uint16_t | |
268 | #define U32 uint32_t | |
269 | #define S32 int32_t | |
270 | #define U64 uint64_t | |
271 | ||
272 | #ifndef LZ4_FORCE_UNALIGNED_ACCESS | |
273 | #pragma pack(1) | |
274 | #endif | |
275 | ||
276 | typedef struct _U16_S { | |
277 | U16 v; | |
278 | } U16_S; | |
279 | typedef struct _U32_S { | |
280 | U32 v; | |
281 | } U32_S; | |
282 | typedef struct _U64_S { | |
283 | U64 v; | |
284 | } U64_S; | |
285 | ||
286 | #ifndef LZ4_FORCE_UNALIGNED_ACCESS | |
287 | #pragma pack() | |
288 | #endif | |
289 | ||
290 | #define A64(x) (((U64_S *)(x))->v) | |
291 | #define A32(x) (((U32_S *)(x))->v) | |
292 | #define A16(x) (((U16_S *)(x))->v) | |
293 | ||
294 | /* | |
295 | * Constants | |
296 | */ | |
297 | #define MINMATCH 4 | |
298 | ||
299 | #define HASH_LOG COMPRESSIONLEVEL | |
300 | #define HASHTABLESIZE (1 << HASH_LOG) | |
301 | #define HASH_MASK (HASHTABLESIZE - 1) | |
302 | ||
303 | #define SKIPSTRENGTH (NOTCOMPRESSIBLE_CONFIRMATION > 2 ? \ | |
304 | NOTCOMPRESSIBLE_CONFIRMATION : 2) | |
305 | ||
306 | #define COPYLENGTH 8 | |
307 | #define LASTLITERALS 5 | |
308 | #define MFLIMIT (COPYLENGTH + MINMATCH) | |
309 | #define MINLENGTH (MFLIMIT + 1) | |
310 | ||
311 | #define MAXD_LOG 16 | |
312 | #define MAX_DISTANCE ((1 << MAXD_LOG) - 1) | |
313 | ||
314 | #define ML_BITS 4 | |
315 | #define ML_MASK ((1U<<ML_BITS)-1) | |
316 | #define RUN_BITS (8-ML_BITS) | |
317 | #define RUN_MASK ((1U<<RUN_BITS)-1) | |
318 | ||
319 | ||
320 | /* | |
321 | * Architecture-specific macros | |
322 | */ | |
323 | #if LZ4_ARCH64 | |
324 | #define STEPSIZE 8 | |
325 | #define UARCH U64 | |
326 | #define AARCH A64 | |
327 | #define LZ4_COPYSTEP(s, d) A64(d) = A64(s); d += 8; s += 8; | |
328 | #define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d) | |
329 | #define LZ4_SECURECOPY(s, d, e) if (d < e) LZ4_WILDCOPY(s, d, e) | |
330 | #define HTYPE U32 | |
331 | #define INITBASE(base) const BYTE* const base = ip | |
332 | #else /* !LZ4_ARCH64 */ | |
333 | #define STEPSIZE 4 | |
334 | #define UARCH U32 | |
335 | #define AARCH A32 | |
336 | #define LZ4_COPYSTEP(s, d) A32(d) = A32(s); d += 4; s += 4; | |
337 | #define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d); LZ4_COPYSTEP(s, d); | |
338 | #define LZ4_SECURECOPY LZ4_WILDCOPY | |
339 | #define HTYPE const BYTE * | |
340 | #define INITBASE(base) const int base = 0 | |
341 | #endif /* !LZ4_ARCH64 */ | |
342 | ||
343 | #if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE)) | |
344 | #define LZ4_READ_LITTLEENDIAN_16(d, s, p) \ | |
345 | { U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; } | |
346 | #define LZ4_WRITE_LITTLEENDIAN_16(p, i) \ | |
347 | { U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p += 2; } | |
348 | #else | |
349 | #define LZ4_READ_LITTLEENDIAN_16(d, s, p) { d = (s) - A16(p); } | |
350 | #define LZ4_WRITE_LITTLEENDIAN_16(p, v) { A16(p) = v; p += 2; } | |
351 | #endif | |
352 | ||
353 | ||
354 | /* Local structures */ | |
355 | struct refTables { | |
356 | HTYPE hashTable[HASHTABLESIZE]; | |
357 | }; | |
358 | ||
359 | ||
360 | /* Macros */ | |
361 | #define LZ4_HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH * 8) - \ | |
362 | HASH_LOG)) | |
363 | #define LZ4_HASH_VALUE(p) LZ4_HASH_FUNCTION(A32(p)) | |
364 | #define LZ4_WILDCOPY(s, d, e) do { LZ4_COPYPACKET(s, d) } while (d < e); | |
365 | #define LZ4_BLINDCOPY(s, d, l) { BYTE* e = (d) + l; LZ4_WILDCOPY(s, d, e); \ | |
366 | d = e; } | |
367 | ||
368 | ||
369 | /* Private functions */ | |
370 | #if LZ4_ARCH64 | |
371 | ||
372 | static inline int | |
373 | LZ4_NbCommonBytes(register U64 val) | |
374 | { | |
375 | #if defined(LZ4_BIG_ENDIAN) | |
376 | #if defined(__GNUC__) && (GCC_VERSION >= 304) && \ | |
377 | !defined(LZ4_FORCE_SW_BITCOUNT) | |
378 | return (__builtin_clzll(val) >> 3); | |
379 | #else | |
380 | int r; | |
381 | if (!(val >> 32)) { | |
382 | r = 4; | |
383 | } else { | |
384 | r = 0; | |
385 | val >>= 32; | |
386 | } | |
387 | if (!(val >> 16)) { | |
388 | r += 2; | |
389 | val >>= 8; | |
390 | } else { | |
391 | val >>= 24; | |
392 | } | |
393 | r += (!val); | |
394 | return (r); | |
395 | #endif | |
396 | #else | |
397 | #if defined(__GNUC__) && (GCC_VERSION >= 304) && \ | |
398 | !defined(LZ4_FORCE_SW_BITCOUNT) | |
399 | return (__builtin_ctzll(val) >> 3); | |
400 | #else | |
401 | static const int DeBruijnBytePos[64] = | |
402 | { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, | |
403 | 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, | |
404 | 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, | |
405 | 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 | |
406 | }; | |
407 | return DeBruijnBytePos[((U64) ((val & -val) * 0x0218A392CDABBD3F)) >> | |
408 | 58]; | |
409 | #endif | |
410 | #endif | |
411 | } | |
412 | ||
413 | #else | |
414 | ||
415 | static inline int | |
416 | LZ4_NbCommonBytes(register U32 val) | |
417 | { | |
418 | #if defined(LZ4_BIG_ENDIAN) | |
419 | #if defined(__GNUC__) && (GCC_VERSION >= 304) && \ | |
420 | !defined(LZ4_FORCE_SW_BITCOUNT) | |
421 | return (__builtin_clz(val) >> 3); | |
422 | #else | |
423 | int r; | |
424 | if (!(val >> 16)) { | |
425 | r = 2; | |
426 | val >>= 8; | |
427 | } else { | |
428 | r = 0; | |
429 | val >>= 24; | |
430 | } | |
431 | r += (!val); | |
432 | return (r); | |
433 | #endif | |
434 | #else | |
435 | #if defined(__GNUC__) && (GCC_VERSION >= 304) && \ | |
436 | !defined(LZ4_FORCE_SW_BITCOUNT) | |
437 | return (__builtin_ctz(val) >> 3); | |
438 | #else | |
439 | static const int DeBruijnBytePos[32] = { | |
440 | 0, 0, 3, 0, 3, 1, 3, 0, | |
441 | 3, 2, 2, 1, 3, 2, 0, 1, | |
442 | 3, 3, 1, 2, 2, 2, 2, 0, | |
443 | 3, 1, 2, 0, 1, 0, 1, 1 | |
444 | }; | |
445 | return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >> | |
446 | 27]; | |
447 | #endif | |
448 | #endif | |
449 | } | |
450 | ||
451 | #endif | |
452 | ||
453 | /* Compression functions */ | |
454 | ||
455 | /*ARGSUSED*/ | |
456 | static int | |
457 | LZ4_compressCtx(void *ctx, const char *source, char *dest, int isize, | |
458 | int osize) | |
459 | { | |
460 | struct refTables *srt = (struct refTables *)ctx; | |
461 | HTYPE *HashTable = (HTYPE *) (srt->hashTable); | |
462 | ||
463 | const BYTE *ip = (BYTE *) source; | |
464 | INITBASE(base); | |
465 | const BYTE *anchor = ip; | |
466 | const BYTE *const iend = ip + isize; | |
467 | const BYTE *const oend = (BYTE *) dest + osize; | |
468 | const BYTE *const mflimit = iend - MFLIMIT; | |
469 | #define matchlimit (iend - LASTLITERALS) | |
470 | ||
471 | BYTE *op = (BYTE *) dest; | |
472 | ||
473 | int len, length; | |
474 | const int skipStrength = SKIPSTRENGTH; | |
475 | U32 forwardH; | |
476 | ||
477 | ||
478 | /* Init */ | |
479 | if (isize < MINLENGTH) | |
480 | goto _last_literals; | |
481 | ||
482 | /* First Byte */ | |
483 | HashTable[LZ4_HASH_VALUE(ip)] = ip - base; | |
484 | ip++; | |
485 | forwardH = LZ4_HASH_VALUE(ip); | |
486 | ||
487 | /* Main Loop */ | |
488 | for (;;) { | |
489 | int findMatchAttempts = (1U << skipStrength) + 3; | |
490 | const BYTE *forwardIp = ip; | |
491 | const BYTE *ref; | |
492 | BYTE *token; | |
493 | ||
494 | /* Find a match */ | |
495 | do { | |
496 | U32 h = forwardH; | |
497 | int step = findMatchAttempts++ >> skipStrength; | |
498 | ip = forwardIp; | |
499 | forwardIp = ip + step; | |
500 | ||
501 | if (unlikely(forwardIp > mflimit)) { | |
502 | goto _last_literals; | |
503 | } | |
504 | ||
505 | forwardH = LZ4_HASH_VALUE(forwardIp); | |
506 | ref = base + HashTable[h]; | |
507 | HashTable[h] = ip - base; | |
508 | ||
509 | } while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip))); | |
510 | ||
511 | /* Catch up */ | |
512 | while ((ip > anchor) && (ref > (BYTE *) source) && | |
513 | unlikely(ip[-1] == ref[-1])) { | |
514 | ip--; | |
515 | ref--; | |
516 | } | |
517 | ||
518 | /* Encode Literal length */ | |
519 | length = ip - anchor; | |
520 | token = op++; | |
521 | ||
522 | /* Check output limit */ | |
523 | if (unlikely(op + length + (2 + 1 + LASTLITERALS) + | |
524 | (length >> 8) > oend)) | |
525 | return (0); | |
526 | ||
527 | if (length >= (int)RUN_MASK) { | |
528 | *token = (RUN_MASK << ML_BITS); | |
529 | len = length - RUN_MASK; | |
530 | for (; len > 254; len -= 255) | |
531 | *op++ = 255; | |
532 | *op++ = (BYTE)len; | |
533 | } else | |
534 | *token = (length << ML_BITS); | |
535 | ||
536 | /* Copy Literals */ | |
537 | LZ4_BLINDCOPY(anchor, op, length); | |
538 | ||
539 | _next_match: | |
540 | /* Encode Offset */ | |
541 | LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref); | |
542 | ||
543 | /* Start Counting */ | |
544 | ip += MINMATCH; | |
545 | ref += MINMATCH; /* MinMatch verified */ | |
546 | anchor = ip; | |
547 | while (likely(ip < matchlimit - (STEPSIZE - 1))) { | |
548 | UARCH diff = AARCH(ref) ^ AARCH(ip); | |
549 | if (!diff) { | |
550 | ip += STEPSIZE; | |
551 | ref += STEPSIZE; | |
552 | continue; | |
553 | } | |
554 | ip += LZ4_NbCommonBytes(diff); | |
555 | goto _endCount; | |
556 | } | |
557 | #if LZ4_ARCH64 | |
558 | if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) { | |
559 | ip += 4; | |
560 | ref += 4; | |
561 | } | |
562 | #endif | |
563 | if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) { | |
564 | ip += 2; | |
565 | ref += 2; | |
566 | } | |
567 | if ((ip < matchlimit) && (*ref == *ip)) | |
568 | ip++; | |
569 | _endCount: | |
570 | ||
571 | /* Encode MatchLength */ | |
572 | len = (ip - anchor); | |
573 | /* Check output limit */ | |
574 | if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)) | |
575 | return (0); | |
576 | if (len >= (int)ML_MASK) { | |
577 | *token += ML_MASK; | |
578 | len -= ML_MASK; | |
579 | for (; len > 509; len -= 510) { | |
580 | *op++ = 255; | |
581 | *op++ = 255; | |
582 | } | |
583 | if (len > 254) { | |
584 | len -= 255; | |
585 | *op++ = 255; | |
586 | } | |
587 | *op++ = (BYTE)len; | |
588 | } else | |
589 | *token += len; | |
590 | ||
591 | /* Test end of chunk */ | |
592 | if (ip > mflimit) { | |
593 | anchor = ip; | |
594 | break; | |
595 | } | |
596 | /* Fill table */ | |
597 | HashTable[LZ4_HASH_VALUE(ip - 2)] = ip - 2 - base; | |
598 | ||
599 | /* Test next position */ | |
600 | ref = base + HashTable[LZ4_HASH_VALUE(ip)]; | |
601 | HashTable[LZ4_HASH_VALUE(ip)] = ip - base; | |
602 | if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) { | |
603 | token = op++; | |
604 | *token = 0; | |
605 | goto _next_match; | |
606 | } | |
607 | /* Prepare next loop */ | |
608 | anchor = ip++; | |
609 | forwardH = LZ4_HASH_VALUE(ip); | |
610 | } | |
611 | ||
612 | _last_literals: | |
613 | /* Encode Last Literals */ | |
614 | { | |
615 | int lastRun = iend - anchor; | |
616 | if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) > | |
617 | oend) | |
618 | return (0); | |
619 | if (lastRun >= (int)RUN_MASK) { | |
620 | *op++ = (RUN_MASK << ML_BITS); | |
621 | lastRun -= RUN_MASK; | |
622 | for (; lastRun > 254; lastRun -= 255) { | |
623 | *op++ = 255; | |
624 | } | |
625 | *op++ = (BYTE)lastRun; | |
626 | } else | |
627 | *op++ = (lastRun << ML_BITS); | |
628 | (void) memcpy(op, anchor, iend - anchor); | |
629 | op += iend - anchor; | |
630 | } | |
631 | ||
632 | /* End */ | |
633 | return (int)(((char *)op) - dest); | |
634 | } | |
635 | ||
636 | ||
637 | ||
638 | /* Note : this function is valid only if isize < LZ4_64KLIMIT */ | |
639 | #define LZ4_64KLIMIT ((1 << 16) + (MFLIMIT - 1)) | |
640 | #define HASHLOG64K (HASH_LOG + 1) | |
641 | #define HASH64KTABLESIZE (1U << HASHLOG64K) | |
642 | #define LZ4_HASH64K_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8) - \ | |
643 | HASHLOG64K)) | |
644 | #define LZ4_HASH64K_VALUE(p) LZ4_HASH64K_FUNCTION(A32(p)) | |
645 | ||
646 | /*ARGSUSED*/ | |
647 | static int | |
648 | LZ4_compress64kCtx(void *ctx, const char *source, char *dest, int isize, | |
649 | int osize) | |
650 | { | |
651 | struct refTables *srt = (struct refTables *)ctx; | |
652 | U16 *HashTable = (U16 *) (srt->hashTable); | |
653 | ||
654 | const BYTE *ip = (BYTE *) source; | |
655 | const BYTE *anchor = ip; | |
656 | const BYTE *const base = ip; | |
657 | const BYTE *const iend = ip + isize; | |
658 | const BYTE *const oend = (BYTE *) dest + osize; | |
659 | const BYTE *const mflimit = iend - MFLIMIT; | |
660 | #define matchlimit (iend - LASTLITERALS) | |
661 | ||
662 | BYTE *op = (BYTE *) dest; | |
663 | ||
664 | int len, length; | |
665 | const int skipStrength = SKIPSTRENGTH; | |
666 | U32 forwardH; | |
667 | ||
668 | /* Init */ | |
669 | if (isize < MINLENGTH) | |
670 | goto _last_literals; | |
671 | ||
672 | /* First Byte */ | |
673 | ip++; | |
674 | forwardH = LZ4_HASH64K_VALUE(ip); | |
675 | ||
676 | /* Main Loop */ | |
677 | for (;;) { | |
678 | int findMatchAttempts = (1U << skipStrength) + 3; | |
679 | const BYTE *forwardIp = ip; | |
680 | const BYTE *ref; | |
681 | BYTE *token; | |
682 | ||
683 | /* Find a match */ | |
684 | do { | |
685 | U32 h = forwardH; | |
686 | int step = findMatchAttempts++ >> skipStrength; | |
687 | ip = forwardIp; | |
688 | forwardIp = ip + step; | |
689 | ||
690 | if (forwardIp > mflimit) { | |
691 | goto _last_literals; | |
692 | } | |
693 | ||
694 | forwardH = LZ4_HASH64K_VALUE(forwardIp); | |
695 | ref = base + HashTable[h]; | |
696 | HashTable[h] = ip - base; | |
697 | ||
698 | } while (A32(ref) != A32(ip)); | |
699 | ||
700 | /* Catch up */ | |
701 | while ((ip > anchor) && (ref > (BYTE *) source) && | |
702 | (ip[-1] == ref[-1])) { | |
703 | ip--; | |
704 | ref--; | |
705 | } | |
706 | ||
707 | /* Encode Literal length */ | |
708 | length = ip - anchor; | |
709 | token = op++; | |
710 | ||
711 | /* Check output limit */ | |
712 | if (unlikely(op + length + (2 + 1 + LASTLITERALS) + | |
713 | (length >> 8) > oend)) | |
714 | return (0); | |
715 | ||
716 | if (length >= (int)RUN_MASK) { | |
717 | *token = (RUN_MASK << ML_BITS); | |
718 | len = length - RUN_MASK; | |
719 | for (; len > 254; len -= 255) | |
720 | *op++ = 255; | |
721 | *op++ = (BYTE)len; | |
722 | } else | |
723 | *token = (length << ML_BITS); | |
724 | ||
725 | /* Copy Literals */ | |
726 | LZ4_BLINDCOPY(anchor, op, length); | |
727 | ||
728 | _next_match: | |
729 | /* Encode Offset */ | |
730 | LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref); | |
731 | ||
732 | /* Start Counting */ | |
733 | ip += MINMATCH; | |
734 | ref += MINMATCH; /* MinMatch verified */ | |
735 | anchor = ip; | |
736 | while (ip < matchlimit - (STEPSIZE - 1)) { | |
737 | UARCH diff = AARCH(ref) ^ AARCH(ip); | |
738 | if (!diff) { | |
739 | ip += STEPSIZE; | |
740 | ref += STEPSIZE; | |
741 | continue; | |
742 | } | |
743 | ip += LZ4_NbCommonBytes(diff); | |
744 | goto _endCount; | |
745 | } | |
746 | #if LZ4_ARCH64 | |
747 | if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) { | |
748 | ip += 4; | |
749 | ref += 4; | |
750 | } | |
751 | #endif | |
752 | if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) { | |
753 | ip += 2; | |
754 | ref += 2; | |
755 | } | |
756 | if ((ip < matchlimit) && (*ref == *ip)) | |
757 | ip++; | |
758 | _endCount: | |
759 | ||
760 | /* Encode MatchLength */ | |
761 | len = (ip - anchor); | |
762 | /* Check output limit */ | |
763 | if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)) | |
764 | return (0); | |
765 | if (len >= (int)ML_MASK) { | |
766 | *token += ML_MASK; | |
767 | len -= ML_MASK; | |
768 | for (; len > 509; len -= 510) { | |
769 | *op++ = 255; | |
770 | *op++ = 255; | |
771 | } | |
772 | if (len > 254) { | |
773 | len -= 255; | |
774 | *op++ = 255; | |
775 | } | |
776 | *op++ = (BYTE)len; | |
777 | } else | |
778 | *token += len; | |
779 | ||
780 | /* Test end of chunk */ | |
781 | if (ip > mflimit) { | |
782 | anchor = ip; | |
783 | break; | |
784 | } | |
785 | /* Fill table */ | |
786 | HashTable[LZ4_HASH64K_VALUE(ip - 2)] = ip - 2 - base; | |
787 | ||
788 | /* Test next position */ | |
789 | ref = base + HashTable[LZ4_HASH64K_VALUE(ip)]; | |
790 | HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base; | |
791 | if (A32(ref) == A32(ip)) { | |
792 | token = op++; | |
793 | *token = 0; | |
794 | goto _next_match; | |
795 | } | |
796 | /* Prepare next loop */ | |
797 | anchor = ip++; | |
798 | forwardH = LZ4_HASH64K_VALUE(ip); | |
799 | } | |
800 | ||
801 | _last_literals: | |
802 | /* Encode Last Literals */ | |
803 | { | |
804 | int lastRun = iend - anchor; | |
805 | if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) > | |
806 | oend) | |
807 | return (0); | |
808 | if (lastRun >= (int)RUN_MASK) { | |
809 | *op++ = (RUN_MASK << ML_BITS); | |
810 | lastRun -= RUN_MASK; | |
811 | for (; lastRun > 254; lastRun -= 255) | |
812 | *op++ = 255; | |
813 | *op++ = (BYTE)lastRun; | |
814 | } else | |
815 | *op++ = (lastRun << ML_BITS); | |
816 | (void) memcpy(op, anchor, iend - anchor); | |
817 | op += iend - anchor; | |
818 | } | |
819 | ||
820 | /* End */ | |
821 | return (int)(((char *)op) - dest); | |
822 | } | |
823 | ||
824 | static int | |
825 | real_LZ4_compress(const char *source, char *dest, int isize, int osize) | |
826 | { | |
827 | void *ctx; | |
828 | int result; | |
829 | ||
830 | ASSERT(lz4_cache != NULL); | |
831 | ctx = kmem_cache_alloc(lz4_cache, KM_PUSHPAGE); | |
832 | ||
833 | /* | |
834 | * out of kernel memory, gently fall through - this will disable | |
835 | * compression in zio_compress_data | |
836 | */ | |
837 | if (ctx == NULL) | |
838 | return (0); | |
839 | ||
840 | memset(ctx, 0, sizeof (struct refTables)); | |
841 | ||
842 | if (isize < LZ4_64KLIMIT) | |
843 | result = LZ4_compress64kCtx(ctx, source, dest, isize, osize); | |
844 | else | |
845 | result = LZ4_compressCtx(ctx, source, dest, isize, osize); | |
846 | ||
847 | kmem_cache_free(lz4_cache, ctx); | |
848 | return (result); | |
849 | } | |
850 | ||
851 | /* Decompression functions */ | |
852 | ||
853 | /* | |
854 | * Note: The decoding functions real_LZ4_uncompress() and | |
855 | * LZ4_uncompress_unknownOutputSize() are safe against "buffer overflow" | |
856 | * attack type. They will never write nor read outside of the provided | |
857 | * output buffers. LZ4_uncompress_unknownOutputSize() also insures that | |
858 | * it will never read outside of the input buffer. A corrupted input | |
859 | * will produce an error result, a negative int, indicating the position | |
860 | * of the error within input stream. | |
861 | */ | |
862 | ||
863 | static int | |
864 | LZ4_uncompress_unknownOutputSize(const char *source, char *dest, int isize, | |
865 | int maxOutputSize) | |
866 | { | |
867 | /* Local Variables */ | |
868 | const BYTE *restrict ip = (const BYTE *) source; | |
869 | const BYTE *const iend = ip + isize; | |
870 | const BYTE *ref; | |
871 | ||
872 | BYTE *op = (BYTE *) dest; | |
873 | BYTE *const oend = op + maxOutputSize; | |
874 | BYTE *cpy; | |
875 | ||
876 | size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0}; | |
877 | #if LZ4_ARCH64 | |
878 | size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3}; | |
879 | #endif | |
880 | ||
881 | /* Main Loop */ | |
882 | while (ip < iend) { | |
883 | unsigned token; | |
884 | size_t length; | |
885 | ||
886 | /* get runlength */ | |
887 | token = *ip++; | |
888 | if ((length = (token >> ML_BITS)) == RUN_MASK) { | |
889 | int s = 255; | |
890 | while ((ip < iend) && (s == 255)) { | |
891 | s = *ip++; | |
892 | length += s; | |
893 | } | |
894 | } | |
895 | /* copy literals */ | |
896 | cpy = op + length; | |
897 | if ((cpy > oend - COPYLENGTH) || | |
898 | (ip + length > iend - COPYLENGTH)) { | |
899 | if (cpy > oend) | |
900 | /* Error: writes beyond output buffer */ | |
901 | goto _output_error; | |
902 | if (ip + length != iend) | |
903 | /* | |
904 | * Error: LZ4 format requires to consume all | |
905 | * input at this stage | |
906 | */ | |
907 | goto _output_error; | |
908 | (void) memcpy(op, ip, length); | |
909 | op += length; | |
910 | /* Necessarily EOF, due to parsing restrictions */ | |
911 | break; | |
912 | } | |
913 | LZ4_WILDCOPY(ip, op, cpy); | |
914 | ip -= (op - cpy); | |
915 | op = cpy; | |
916 | ||
917 | /* get offset */ | |
918 | LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip); | |
919 | ip += 2; | |
920 | if (ref < (BYTE * const) dest) | |
921 | /* | |
922 | * Error: offset creates reference outside of | |
923 | * destination buffer | |
924 | */ | |
925 | goto _output_error; | |
926 | ||
927 | /* get matchlength */ | |
928 | if ((length = (token & ML_MASK)) == ML_MASK) { | |
929 | while (ip < iend) { | |
930 | int s = *ip++; | |
931 | length += s; | |
932 | if (s == 255) | |
933 | continue; | |
934 | break; | |
935 | } | |
936 | } | |
937 | /* copy repeated sequence */ | |
938 | if (unlikely(op - ref < STEPSIZE)) { | |
939 | #if LZ4_ARCH64 | |
940 | size_t dec64 = dec64table[op-ref]; | |
941 | #else | |
942 | const int dec64 = 0; | |
943 | #endif | |
944 | op[0] = ref[0]; | |
945 | op[1] = ref[1]; | |
946 | op[2] = ref[2]; | |
947 | op[3] = ref[3]; | |
948 | op += 4; | |
949 | ref += 4; | |
950 | ref -= dec32table[op-ref]; | |
951 | A32(op) = A32(ref); | |
952 | op += STEPSIZE - 4; | |
953 | ref -= dec64; | |
954 | } else { | |
955 | LZ4_COPYSTEP(ref, op); | |
956 | } | |
957 | cpy = op + length - (STEPSIZE - 4); | |
958 | if (cpy > oend - COPYLENGTH) { | |
959 | if (cpy > oend) | |
960 | /* | |
961 | * Error: request to write outside of | |
962 | * destination buffer | |
963 | */ | |
964 | goto _output_error; | |
965 | LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH)); | |
966 | while (op < cpy) | |
967 | *op++ = *ref++; | |
968 | op = cpy; | |
969 | if (op == oend) | |
970 | /* | |
971 | * Check EOF (should never happen, since | |
972 | * last 5 bytes are supposed to be literals) | |
973 | */ | |
974 | goto _output_error; | |
975 | continue; | |
976 | } | |
977 | LZ4_SECURECOPY(ref, op, cpy); | |
978 | op = cpy; /* correction */ | |
979 | } | |
980 | ||
981 | /* end of decoding */ | |
982 | return (int)(((char *)op) - dest); | |
983 | ||
984 | /* write overflow error detected */ | |
985 | _output_error: | |
986 | return (int)(-(((char *)ip) - source)); | |
987 | } | |
988 | ||
989 | void | |
990 | lz4_init(void) | |
991 | { | |
992 | lz4_cache = kmem_cache_create("lz4_cache", | |
993 | sizeof (struct refTables), 0, NULL, NULL, NULL, NULL, NULL, 0); | |
994 | } | |
995 | ||
996 | void | |
997 | lz4_fini(void) | |
998 | { | |
999 | if (lz4_cache) { | |
1000 | kmem_cache_destroy(lz4_cache); | |
1001 | lz4_cache = NULL; | |
1002 | } | |
1003 | } | |
1004 |