]> git.proxmox.com Git - mirror_zfs.git/blob - module/zfs/lz4.c
Linux 3.11 compat: Rename LZ4 symbols
[mirror_zfs.git] / module / zfs / lz4.c
1 /*
2 * LZ4 - Fast LZ compression algorithm
3 * Header File
4 * Copyright (C) 2011-2013, Yann Collet.
5 * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are
9 * met:
10 *
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following disclaimer
15 * in the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *
30 * You can contact the author at :
31 * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
32 * - LZ4 source repository : http://code.google.com/p/lz4/
33 */
34
35 #include <sys/zfs_context.h>
36
37 static int real_LZ4_compress(const char *source, char *dest, int isize,
38 int osize);
39 static int LZ4_uncompress_unknownOutputSize(const char *source, char *dest,
40 int isize, int maxOutputSize);
41 static int LZ4_compressCtx(void *ctx, const char *source, char *dest,
42 int isize, int osize);
43 static int LZ4_compress64kCtx(void *ctx, const char *source, char *dest,
44 int isize, int osize);
45
46 static kmem_cache_t *lz4_cache;
47
48 /*ARGSUSED*/
49 size_t
50 lz4_compress_zfs(void *s_start, void *d_start, size_t s_len, size_t d_len, int n)
51 {
52 uint32_t bufsiz;
53 char *dest = d_start;
54
55 ASSERT(d_len >= sizeof (bufsiz));
56
57 bufsiz = real_LZ4_compress(s_start, &dest[sizeof (bufsiz)], s_len,
58 d_len - sizeof (bufsiz));
59
60 /* Signal an error if the compression routine returned zero. */
61 if (bufsiz == 0)
62 return (s_len);
63
64 /*
65 * Encode the compresed buffer size at the start. We'll need this in
66 * decompression to counter the effects of padding which might be
67 * added to the compressed buffer and which, if unhandled, would
68 * confuse the hell out of our decompression function.
69 */
70 *(uint32_t *)dest = BE_32(bufsiz);
71
72 return (bufsiz + sizeof (bufsiz));
73 }
74
75 /*ARGSUSED*/
76 int
77 lz4_decompress_zfs(void *s_start, void *d_start, size_t s_len, size_t d_len, int n)
78 {
79 const char *src = s_start;
80 uint32_t bufsiz = BE_IN32(src);
81
82 /* invalid compressed buffer size encoded at start */
83 if (bufsiz + sizeof (bufsiz) > s_len)
84 return (1);
85
86 /*
87 * Returns 0 on success (decompression function returned non-negative)
88 * and non-zero on failure (decompression function returned negative.
89 */
90 return (LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)],
91 d_start, bufsiz, d_len) < 0);
92 }
93
94 /*
95 * LZ4 API Description:
96 *
97 * Simple Functions:
98 * real_LZ4_compress() :
99 * isize : is the input size. Max supported value is ~1.9GB
100 * return : the number of bytes written in buffer dest
101 * or 0 if the compression fails (if LZ4_COMPRESSMIN is set).
102 * note : destination buffer must be already allocated.
103 * destination buffer must be sized to handle worst cases
104 * situations (input data not compressible) worst case size
105 * evaluation is provided by function LZ4_compressBound().
106 *
107 * real_LZ4_uncompress() :
108 * osize : is the output size, therefore the original size
109 * return : the number of bytes read in the source buffer.
110 * If the source stream is malformed, the function will stop
111 * decoding and return a negative result, indicating the byte
112 * position of the faulty instruction. This function never
113 * writes beyond dest + osize, and is therefore protected
114 * against malicious data packets.
115 * note : destination buffer must be already allocated
116 *
117 * Advanced Functions
118 *
119 * LZ4_compressBound() :
120 * Provides the maximum size that LZ4 may output in a "worst case"
121 * scenario (input data not compressible) primarily useful for memory
122 * allocation of output buffer.
123 *
124 * isize : is the input size. Max supported value is ~1.9GB
125 * return : maximum output size in a "worst case" scenario
126 * note : this function is limited by "int" range (2^31-1)
127 *
128 * LZ4_uncompress_unknownOutputSize() :
129 * isize : is the input size, therefore the compressed size
130 * maxOutputSize : is the size of the destination buffer (which must be
131 * already allocated)
132 * return : the number of bytes decoded in the destination buffer
133 * (necessarily <= maxOutputSize). If the source stream is
134 * malformed, the function will stop decoding and return a
135 * negative result, indicating the byte position of the faulty
136 * instruction. This function never writes beyond dest +
137 * maxOutputSize, and is therefore protected against malicious
138 * data packets.
139 * note : Destination buffer must be already allocated.
140 * This version is slightly slower than real_LZ4_uncompress()
141 *
142 * LZ4_compressCtx() :
143 * This function explicitly handles the CTX memory structure.
144 *
145 * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
146 * by the caller (either on the stack or using kmem_cache_alloc). Passing NULL
147 * isn't valid.
148 *
149 * LZ4_compress64kCtx() :
150 * Same as LZ4_compressCtx(), but specific to small inputs (<64KB).
151 * isize *Must* be <64KB, otherwise the output will be corrupted.
152 *
153 * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
154 * by the caller (either on the stack or using kmem_cache_alloc). Passing NULL
155 * isn't valid.
156 */
157
158 /*
159 * Tuning parameters
160 */
161
162 /*
163 * COMPRESSIONLEVEL: Increasing this value improves compression ratio
164 * Lowering this value reduces memory usage. Reduced memory usage
165 * typically improves speed, due to cache effect (ex: L1 32KB for Intel,
166 * L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes
167 * (examples : 12 -> 16KB ; 17 -> 512KB)
168 */
169 #define COMPRESSIONLEVEL 12
170
171 /*
172 * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the
173 * algorithm skip faster data segments considered "incompressible".
174 * This may decrease compression ratio dramatically, but will be
175 * faster on incompressible data. Increasing this value will make
176 * the algorithm search more before declaring a segment "incompressible".
177 * This could improve compression a bit, but will be slower on
178 * incompressible data. The default value (6) is recommended.
179 */
180 #define NOTCOMPRESSIBLE_CONFIRMATION 6
181
182 /*
183 * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE: This will provide a boost to
184 * performance for big endian cpu, but the resulting compressed stream
185 * will be incompatible with little-endian CPU. You can set this option
186 * to 1 in situations where data will stay within closed environment.
187 * This option is useless on Little_Endian CPU (such as x86).
188 */
189 /* #define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */
190
191 /*
192 * CPU Feature Detection
193 */
194
195 /* 32 or 64 bits ? */
196 #if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || \
197 defined(__amd64) || defined(__ppc64__) || defined(_WIN64) || \
198 defined(__LP64__) || defined(_LP64))
199 #define LZ4_ARCH64 1
200 #else
201 #define LZ4_ARCH64 0
202 #endif
203
204 /*
205 * Little Endian or Big Endian?
206 * Note: overwrite the below #define if you know your architecture endianess.
207 */
208 #if (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || \
209 defined(_BIG_ENDIAN) || defined(_ARCH_PPC) || defined(__PPC__) || \
210 defined(__PPC) || defined(PPC) || defined(__powerpc__) || \
211 defined(__powerpc) || defined(powerpc) || \
212 ((defined(__BYTE_ORDER__)&&(__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__))))
213 #define LZ4_BIG_ENDIAN 1
214 #else
215 /*
216 * Little Endian assumed. PDP Endian and other very rare endian format
217 * are unsupported.
218 */
219 #endif
220
221 /*
222 * Unaligned memory access is automatically enabled for "common" CPU,
223 * such as x86. For others CPU, the compiler will be more cautious, and
224 * insert extra code to ensure aligned access is respected. If you know
225 * your target CPU supports unaligned memory access, you may want to
226 * force this option manually to improve performance
227 */
228 #if defined(__ARM_FEATURE_UNALIGNED)
229 #define LZ4_FORCE_UNALIGNED_ACCESS 1
230 #endif
231
232 /*
233 * Illumos : we can't use GCC's __builtin_ctz family of builtins in the
234 * kernel
235 * Linux : we can use GCC's __builtin_ctz family of builtins in the
236 * kernel
237 */
238 #undef LZ4_FORCE_SW_BITCOUNT
239
240 /*
241 * Compiler Options
242 */
243 /* Disable restrict */
244 #define restrict
245
246 /*
247 * Linux : GCC_VERSION is defined as of 3.9-rc1, so undefine it.
248 * torvalds/linux@3f3f8d2f48acfd8ed3b8e6b7377935da57b27b16
249 */
250 #ifdef GCC_VERSION
251 #undef GCC_VERSION
252 #endif
253
254 #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
255
256 #if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__)
257 #define expect(expr, value) (__builtin_expect((expr), (value)))
258 #else
259 #define expect(expr, value) (expr)
260 #endif
261
262 #ifndef likely
263 #define likely(expr) expect((expr) != 0, 1)
264 #endif
265
266 #ifndef unlikely
267 #define unlikely(expr) expect((expr) != 0, 0)
268 #endif
269
270 #define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | \
271 (((x) & 0xffu) << 8)))
272
273 /* Basic types */
274 #define BYTE uint8_t
275 #define U16 uint16_t
276 #define U32 uint32_t
277 #define S32 int32_t
278 #define U64 uint64_t
279
280 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
281 #pragma pack(1)
282 #endif
283
284 typedef struct _U16_S {
285 U16 v;
286 } U16_S;
287 typedef struct _U32_S {
288 U32 v;
289 } U32_S;
290 typedef struct _U64_S {
291 U64 v;
292 } U64_S;
293
294 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
295 #pragma pack()
296 #endif
297
298 #define A64(x) (((U64_S *)(x))->v)
299 #define A32(x) (((U32_S *)(x))->v)
300 #define A16(x) (((U16_S *)(x))->v)
301
302 /*
303 * Constants
304 */
305 #define MINMATCH 4
306
307 #define HASH_LOG COMPRESSIONLEVEL
308 #define HASHTABLESIZE (1 << HASH_LOG)
309 #define HASH_MASK (HASHTABLESIZE - 1)
310
311 #define SKIPSTRENGTH (NOTCOMPRESSIBLE_CONFIRMATION > 2 ? \
312 NOTCOMPRESSIBLE_CONFIRMATION : 2)
313
314 #define COPYLENGTH 8
315 #define LASTLITERALS 5
316 #define MFLIMIT (COPYLENGTH + MINMATCH)
317 #define MINLENGTH (MFLIMIT + 1)
318
319 #define MAXD_LOG 16
320 #define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
321
322 #define ML_BITS 4
323 #define ML_MASK ((1U<<ML_BITS)-1)
324 #define RUN_BITS (8-ML_BITS)
325 #define RUN_MASK ((1U<<RUN_BITS)-1)
326
327
328 /*
329 * Architecture-specific macros
330 */
331 #if LZ4_ARCH64
332 #define STEPSIZE 8
333 #define UARCH U64
334 #define AARCH A64
335 #define LZ4_COPYSTEP(s, d) A64(d) = A64(s); d += 8; s += 8;
336 #define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d)
337 #define LZ4_SECURECOPY(s, d, e) if (d < e) LZ4_WILDCOPY(s, d, e)
338 #define HTYPE U32
339 #define INITBASE(base) const BYTE* const base = ip
340 #else /* !LZ4_ARCH64 */
341 #define STEPSIZE 4
342 #define UARCH U32
343 #define AARCH A32
344 #define LZ4_COPYSTEP(s, d) A32(d) = A32(s); d += 4; s += 4;
345 #define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d); LZ4_COPYSTEP(s, d);
346 #define LZ4_SECURECOPY LZ4_WILDCOPY
347 #define HTYPE const BYTE *
348 #define INITBASE(base) const int base = 0
349 #endif /* !LZ4_ARCH64 */
350
351 #if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE))
352 #define LZ4_READ_LITTLEENDIAN_16(d, s, p) \
353 { U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
354 #define LZ4_WRITE_LITTLEENDIAN_16(p, i) \
355 { U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p += 2; }
356 #else
357 #define LZ4_READ_LITTLEENDIAN_16(d, s, p) { d = (s) - A16(p); }
358 #define LZ4_WRITE_LITTLEENDIAN_16(p, v) { A16(p) = v; p += 2; }
359 #endif
360
361
362 /* Local structures */
363 struct refTables {
364 HTYPE hashTable[HASHTABLESIZE];
365 };
366
367
368 /* Macros */
369 #define LZ4_HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH * 8) - \
370 HASH_LOG))
371 #define LZ4_HASH_VALUE(p) LZ4_HASH_FUNCTION(A32(p))
372 #define LZ4_WILDCOPY(s, d, e) do { LZ4_COPYPACKET(s, d) } while (d < e);
373 #define LZ4_BLINDCOPY(s, d, l) { BYTE* e = (d) + l; LZ4_WILDCOPY(s, d, e); \
374 d = e; }
375
376
377 /* Private functions */
378 #if LZ4_ARCH64
379
380 static inline int
381 LZ4_NbCommonBytes(register U64 val)
382 {
383 #if defined(LZ4_BIG_ENDIAN)
384 #if defined(__GNUC__) && (GCC_VERSION >= 304) && \
385 !defined(LZ4_FORCE_SW_BITCOUNT)
386 return (__builtin_clzll(val) >> 3);
387 #else
388 int r;
389 if (!(val >> 32)) {
390 r = 4;
391 } else {
392 r = 0;
393 val >>= 32;
394 }
395 if (!(val >> 16)) {
396 r += 2;
397 val >>= 8;
398 } else {
399 val >>= 24;
400 }
401 r += (!val);
402 return (r);
403 #endif
404 #else
405 #if defined(__GNUC__) && (GCC_VERSION >= 304) && \
406 !defined(LZ4_FORCE_SW_BITCOUNT)
407 return (__builtin_ctzll(val) >> 3);
408 #else
409 static const int DeBruijnBytePos[64] =
410 { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5,
411 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5,
412 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4,
413 4, 5, 7, 2, 6, 5, 7, 6, 7, 7
414 };
415 return DeBruijnBytePos[((U64) ((val & -val) * 0x0218A392CDABBD3F)) >>
416 58];
417 #endif
418 #endif
419 }
420
421 #else
422
423 static inline int
424 LZ4_NbCommonBytes(register U32 val)
425 {
426 #if defined(LZ4_BIG_ENDIAN)
427 #if defined(__GNUC__) && (GCC_VERSION >= 304) && \
428 !defined(LZ4_FORCE_SW_BITCOUNT)
429 return (__builtin_clz(val) >> 3);
430 #else
431 int r;
432 if (!(val >> 16)) {
433 r = 2;
434 val >>= 8;
435 } else {
436 r = 0;
437 val >>= 24;
438 }
439 r += (!val);
440 return (r);
441 #endif
442 #else
443 #if defined(__GNUC__) && (GCC_VERSION >= 304) && \
444 !defined(LZ4_FORCE_SW_BITCOUNT)
445 return (__builtin_ctz(val) >> 3);
446 #else
447 static const int DeBruijnBytePos[32] = {
448 0, 0, 3, 0, 3, 1, 3, 0,
449 3, 2, 2, 1, 3, 2, 0, 1,
450 3, 3, 1, 2, 2, 2, 2, 0,
451 3, 1, 2, 0, 1, 0, 1, 1
452 };
453 return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >>
454 27];
455 #endif
456 #endif
457 }
458
459 #endif
460
461 /* Compression functions */
462
463 /*ARGSUSED*/
464 static int
465 LZ4_compressCtx(void *ctx, const char *source, char *dest, int isize,
466 int osize)
467 {
468 struct refTables *srt = (struct refTables *)ctx;
469 HTYPE *HashTable = (HTYPE *) (srt->hashTable);
470
471 const BYTE *ip = (BYTE *) source;
472 INITBASE(base);
473 const BYTE *anchor = ip;
474 const BYTE *const iend = ip + isize;
475 const BYTE *const oend = (BYTE *) dest + osize;
476 const BYTE *const mflimit = iend - MFLIMIT;
477 #define matchlimit (iend - LASTLITERALS)
478
479 BYTE *op = (BYTE *) dest;
480
481 int len, length;
482 const int skipStrength = SKIPSTRENGTH;
483 U32 forwardH;
484
485
486 /* Init */
487 if (isize < MINLENGTH)
488 goto _last_literals;
489
490 /* First Byte */
491 HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
492 ip++;
493 forwardH = LZ4_HASH_VALUE(ip);
494
495 /* Main Loop */
496 for (;;) {
497 int findMatchAttempts = (1U << skipStrength) + 3;
498 const BYTE *forwardIp = ip;
499 const BYTE *ref;
500 BYTE *token;
501
502 /* Find a match */
503 do {
504 U32 h = forwardH;
505 int step = findMatchAttempts++ >> skipStrength;
506 ip = forwardIp;
507 forwardIp = ip + step;
508
509 if (unlikely(forwardIp > mflimit)) {
510 goto _last_literals;
511 }
512
513 forwardH = LZ4_HASH_VALUE(forwardIp);
514 ref = base + HashTable[h];
515 HashTable[h] = ip - base;
516
517 } while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
518
519 /* Catch up */
520 while ((ip > anchor) && (ref > (BYTE *) source) &&
521 unlikely(ip[-1] == ref[-1])) {
522 ip--;
523 ref--;
524 }
525
526 /* Encode Literal length */
527 length = ip - anchor;
528 token = op++;
529
530 /* Check output limit */
531 if (unlikely(op + length + (2 + 1 + LASTLITERALS) +
532 (length >> 8) > oend))
533 return (0);
534
535 if (length >= (int)RUN_MASK) {
536 *token = (RUN_MASK << ML_BITS);
537 len = length - RUN_MASK;
538 for (; len > 254; len -= 255)
539 *op++ = 255;
540 *op++ = (BYTE)len;
541 } else
542 *token = (length << ML_BITS);
543
544 /* Copy Literals */
545 LZ4_BLINDCOPY(anchor, op, length);
546
547 _next_match:
548 /* Encode Offset */
549 LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
550
551 /* Start Counting */
552 ip += MINMATCH;
553 ref += MINMATCH; /* MinMatch verified */
554 anchor = ip;
555 while (likely(ip < matchlimit - (STEPSIZE - 1))) {
556 UARCH diff = AARCH(ref) ^ AARCH(ip);
557 if (!diff) {
558 ip += STEPSIZE;
559 ref += STEPSIZE;
560 continue;
561 }
562 ip += LZ4_NbCommonBytes(diff);
563 goto _endCount;
564 }
565 #if LZ4_ARCH64
566 if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
567 ip += 4;
568 ref += 4;
569 }
570 #endif
571 if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
572 ip += 2;
573 ref += 2;
574 }
575 if ((ip < matchlimit) && (*ref == *ip))
576 ip++;
577 _endCount:
578
579 /* Encode MatchLength */
580 len = (ip - anchor);
581 /* Check output limit */
582 if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend))
583 return (0);
584 if (len >= (int)ML_MASK) {
585 *token += ML_MASK;
586 len -= ML_MASK;
587 for (; len > 509; len -= 510) {
588 *op++ = 255;
589 *op++ = 255;
590 }
591 if (len > 254) {
592 len -= 255;
593 *op++ = 255;
594 }
595 *op++ = (BYTE)len;
596 } else
597 *token += len;
598
599 /* Test end of chunk */
600 if (ip > mflimit) {
601 anchor = ip;
602 break;
603 }
604 /* Fill table */
605 HashTable[LZ4_HASH_VALUE(ip - 2)] = ip - 2 - base;
606
607 /* Test next position */
608 ref = base + HashTable[LZ4_HASH_VALUE(ip)];
609 HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
610 if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) {
611 token = op++;
612 *token = 0;
613 goto _next_match;
614 }
615 /* Prepare next loop */
616 anchor = ip++;
617 forwardH = LZ4_HASH_VALUE(ip);
618 }
619
620 _last_literals:
621 /* Encode Last Literals */
622 {
623 int lastRun = iend - anchor;
624 if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
625 oend)
626 return (0);
627 if (lastRun >= (int)RUN_MASK) {
628 *op++ = (RUN_MASK << ML_BITS);
629 lastRun -= RUN_MASK;
630 for (; lastRun > 254; lastRun -= 255) {
631 *op++ = 255;
632 }
633 *op++ = (BYTE)lastRun;
634 } else
635 *op++ = (lastRun << ML_BITS);
636 (void) memcpy(op, anchor, iend - anchor);
637 op += iend - anchor;
638 }
639
640 /* End */
641 return (int)(((char *)op) - dest);
642 }
643
644
645
646 /* Note : this function is valid only if isize < LZ4_64KLIMIT */
647 #define LZ4_64KLIMIT ((1 << 16) + (MFLIMIT - 1))
648 #define HASHLOG64K (HASH_LOG + 1)
649 #define HASH64KTABLESIZE (1U << HASHLOG64K)
650 #define LZ4_HASH64K_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8) - \
651 HASHLOG64K))
652 #define LZ4_HASH64K_VALUE(p) LZ4_HASH64K_FUNCTION(A32(p))
653
654 /*ARGSUSED*/
655 static int
656 LZ4_compress64kCtx(void *ctx, const char *source, char *dest, int isize,
657 int osize)
658 {
659 struct refTables *srt = (struct refTables *)ctx;
660 U16 *HashTable = (U16 *) (srt->hashTable);
661
662 const BYTE *ip = (BYTE *) source;
663 const BYTE *anchor = ip;
664 const BYTE *const base = ip;
665 const BYTE *const iend = ip + isize;
666 const BYTE *const oend = (BYTE *) dest + osize;
667 const BYTE *const mflimit = iend - MFLIMIT;
668 #define matchlimit (iend - LASTLITERALS)
669
670 BYTE *op = (BYTE *) dest;
671
672 int len, length;
673 const int skipStrength = SKIPSTRENGTH;
674 U32 forwardH;
675
676 /* Init */
677 if (isize < MINLENGTH)
678 goto _last_literals;
679
680 /* First Byte */
681 ip++;
682 forwardH = LZ4_HASH64K_VALUE(ip);
683
684 /* Main Loop */
685 for (;;) {
686 int findMatchAttempts = (1U << skipStrength) + 3;
687 const BYTE *forwardIp = ip;
688 const BYTE *ref;
689 BYTE *token;
690
691 /* Find a match */
692 do {
693 U32 h = forwardH;
694 int step = findMatchAttempts++ >> skipStrength;
695 ip = forwardIp;
696 forwardIp = ip + step;
697
698 if (forwardIp > mflimit) {
699 goto _last_literals;
700 }
701
702 forwardH = LZ4_HASH64K_VALUE(forwardIp);
703 ref = base + HashTable[h];
704 HashTable[h] = ip - base;
705
706 } while (A32(ref) != A32(ip));
707
708 /* Catch up */
709 while ((ip > anchor) && (ref > (BYTE *) source) &&
710 (ip[-1] == ref[-1])) {
711 ip--;
712 ref--;
713 }
714
715 /* Encode Literal length */
716 length = ip - anchor;
717 token = op++;
718
719 /* Check output limit */
720 if (unlikely(op + length + (2 + 1 + LASTLITERALS) +
721 (length >> 8) > oend))
722 return (0);
723
724 if (length >= (int)RUN_MASK) {
725 *token = (RUN_MASK << ML_BITS);
726 len = length - RUN_MASK;
727 for (; len > 254; len -= 255)
728 *op++ = 255;
729 *op++ = (BYTE)len;
730 } else
731 *token = (length << ML_BITS);
732
733 /* Copy Literals */
734 LZ4_BLINDCOPY(anchor, op, length);
735
736 _next_match:
737 /* Encode Offset */
738 LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
739
740 /* Start Counting */
741 ip += MINMATCH;
742 ref += MINMATCH; /* MinMatch verified */
743 anchor = ip;
744 while (ip < matchlimit - (STEPSIZE - 1)) {
745 UARCH diff = AARCH(ref) ^ AARCH(ip);
746 if (!diff) {
747 ip += STEPSIZE;
748 ref += STEPSIZE;
749 continue;
750 }
751 ip += LZ4_NbCommonBytes(diff);
752 goto _endCount;
753 }
754 #if LZ4_ARCH64
755 if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
756 ip += 4;
757 ref += 4;
758 }
759 #endif
760 if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
761 ip += 2;
762 ref += 2;
763 }
764 if ((ip < matchlimit) && (*ref == *ip))
765 ip++;
766 _endCount:
767
768 /* Encode MatchLength */
769 len = (ip - anchor);
770 /* Check output limit */
771 if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend))
772 return (0);
773 if (len >= (int)ML_MASK) {
774 *token += ML_MASK;
775 len -= ML_MASK;
776 for (; len > 509; len -= 510) {
777 *op++ = 255;
778 *op++ = 255;
779 }
780 if (len > 254) {
781 len -= 255;
782 *op++ = 255;
783 }
784 *op++ = (BYTE)len;
785 } else
786 *token += len;
787
788 /* Test end of chunk */
789 if (ip > mflimit) {
790 anchor = ip;
791 break;
792 }
793 /* Fill table */
794 HashTable[LZ4_HASH64K_VALUE(ip - 2)] = ip - 2 - base;
795
796 /* Test next position */
797 ref = base + HashTable[LZ4_HASH64K_VALUE(ip)];
798 HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base;
799 if (A32(ref) == A32(ip)) {
800 token = op++;
801 *token = 0;
802 goto _next_match;
803 }
804 /* Prepare next loop */
805 anchor = ip++;
806 forwardH = LZ4_HASH64K_VALUE(ip);
807 }
808
809 _last_literals:
810 /* Encode Last Literals */
811 {
812 int lastRun = iend - anchor;
813 if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
814 oend)
815 return (0);
816 if (lastRun >= (int)RUN_MASK) {
817 *op++ = (RUN_MASK << ML_BITS);
818 lastRun -= RUN_MASK;
819 for (; lastRun > 254; lastRun -= 255)
820 *op++ = 255;
821 *op++ = (BYTE)lastRun;
822 } else
823 *op++ = (lastRun << ML_BITS);
824 (void) memcpy(op, anchor, iend - anchor);
825 op += iend - anchor;
826 }
827
828 /* End */
829 return (int)(((char *)op) - dest);
830 }
831
832 static int
833 real_LZ4_compress(const char *source, char *dest, int isize, int osize)
834 {
835 void *ctx;
836 int result;
837
838 ASSERT(lz4_cache != NULL);
839 ctx = kmem_cache_alloc(lz4_cache, KM_PUSHPAGE);
840
841 /*
842 * out of kernel memory, gently fall through - this will disable
843 * compression in zio_compress_data
844 */
845 if (ctx == NULL)
846 return (0);
847
848 memset(ctx, 0, sizeof (struct refTables));
849
850 if (isize < LZ4_64KLIMIT)
851 result = LZ4_compress64kCtx(ctx, source, dest, isize, osize);
852 else
853 result = LZ4_compressCtx(ctx, source, dest, isize, osize);
854
855 kmem_cache_free(lz4_cache, ctx);
856 return (result);
857 }
858
859 /* Decompression functions */
860
861 /*
862 * Note: The decoding functions real_LZ4_uncompress() and
863 * LZ4_uncompress_unknownOutputSize() are safe against "buffer overflow"
864 * attack type. They will never write nor read outside of the provided
865 * output buffers. LZ4_uncompress_unknownOutputSize() also insures that
866 * it will never read outside of the input buffer. A corrupted input
867 * will produce an error result, a negative int, indicating the position
868 * of the error within input stream.
869 */
870
871 static int
872 LZ4_uncompress_unknownOutputSize(const char *source, char *dest, int isize,
873 int maxOutputSize)
874 {
875 /* Local Variables */
876 const BYTE *restrict ip = (const BYTE *) source;
877 const BYTE *const iend = ip + isize;
878 const BYTE *ref;
879
880 BYTE *op = (BYTE *) dest;
881 BYTE *const oend = op + maxOutputSize;
882 BYTE *cpy;
883
884 size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
885 #if LZ4_ARCH64
886 size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3};
887 #endif
888
889 /* Main Loop */
890 while (ip < iend) {
891 unsigned token;
892 size_t length;
893
894 /* get runlength */
895 token = *ip++;
896 if ((length = (token >> ML_BITS)) == RUN_MASK) {
897 int s = 255;
898 while ((ip < iend) && (s == 255)) {
899 s = *ip++;
900 length += s;
901 }
902 }
903 /* copy literals */
904 cpy = op + length;
905 if ((cpy > oend - COPYLENGTH) ||
906 (ip + length > iend - COPYLENGTH)) {
907 if (cpy > oend)
908 /* Error: writes beyond output buffer */
909 goto _output_error;
910 if (ip + length != iend)
911 /*
912 * Error: LZ4 format requires to consume all
913 * input at this stage
914 */
915 goto _output_error;
916 (void) memcpy(op, ip, length);
917 op += length;
918 /* Necessarily EOF, due to parsing restrictions */
919 break;
920 }
921 LZ4_WILDCOPY(ip, op, cpy);
922 ip -= (op - cpy);
923 op = cpy;
924
925 /* get offset */
926 LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
927 ip += 2;
928 if (ref < (BYTE * const) dest)
929 /*
930 * Error: offset creates reference outside of
931 * destination buffer
932 */
933 goto _output_error;
934
935 /* get matchlength */
936 if ((length = (token & ML_MASK)) == ML_MASK) {
937 while (ip < iend) {
938 int s = *ip++;
939 length += s;
940 if (s == 255)
941 continue;
942 break;
943 }
944 }
945 /* copy repeated sequence */
946 if (unlikely(op - ref < STEPSIZE)) {
947 #if LZ4_ARCH64
948 size_t dec64 = dec64table[op-ref];
949 #else
950 const int dec64 = 0;
951 #endif
952 op[0] = ref[0];
953 op[1] = ref[1];
954 op[2] = ref[2];
955 op[3] = ref[3];
956 op += 4;
957 ref += 4;
958 ref -= dec32table[op-ref];
959 A32(op) = A32(ref);
960 op += STEPSIZE - 4;
961 ref -= dec64;
962 } else {
963 LZ4_COPYSTEP(ref, op);
964 }
965 cpy = op + length - (STEPSIZE - 4);
966 if (cpy > oend - COPYLENGTH) {
967 if (cpy > oend)
968 /*
969 * Error: request to write outside of
970 * destination buffer
971 */
972 goto _output_error;
973 LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
974 while (op < cpy)
975 *op++ = *ref++;
976 op = cpy;
977 if (op == oend)
978 /*
979 * Check EOF (should never happen, since
980 * last 5 bytes are supposed to be literals)
981 */
982 goto _output_error;
983 continue;
984 }
985 LZ4_SECURECOPY(ref, op, cpy);
986 op = cpy; /* correction */
987 }
988
989 /* end of decoding */
990 return (int)(((char *)op) - dest);
991
992 /* write overflow error detected */
993 _output_error:
994 return (int)(-(((char *)ip) - source));
995 }
996
997 void
998 lz4_init(void)
999 {
1000 lz4_cache = kmem_cache_create("lz4_cache",
1001 sizeof (struct refTables), 0, NULL, NULL, NULL, NULL, NULL, 0);
1002 }
1003
1004 void
1005 lz4_fini(void)
1006 {
1007 if (lz4_cache) {
1008 kmem_cache_destroy(lz4_cache);
1009 lz4_cache = NULL;
1010 }
1011 }
1012