]> git.proxmox.com Git - mirror_zfs-debian.git/blame - module/zfs/lz4.c
Imported Upstream version 0.6.4.2
[mirror_zfs-debian.git] / module / zfs / lz4.c
CommitLineData
9759c60f
ED
1/*
2 * LZ4 - Fast LZ compression algorithm
3 * Header File
4 * Copyright (C) 2011-2013, Yann Collet.
5 * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are
9 * met:
10 *
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following disclaimer
15 * in the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *
30 * You can contact the author at :
31 * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
32 * - LZ4 source repository : http://code.google.com/p/lz4/
33 */
34
35#include <sys/zfs_context.h>
36
37static int real_LZ4_compress(const char *source, char *dest, int isize,
38 int osize);
39static int LZ4_uncompress_unknownOutputSize(const char *source, char *dest,
40 int isize, int maxOutputSize);
41static int LZ4_compressCtx(void *ctx, const char *source, char *dest,
42 int isize, int osize);
43static int LZ4_compress64kCtx(void *ctx, const char *source, char *dest,
44 int isize, int osize);
45
46static kmem_cache_t *lz4_cache;
47
48/*ARGSUSED*/
49size_t
a08ee875
LG
50lz4_compress_zfs(void *s_start, void *d_start, size_t s_len,
51 size_t d_len, int n)
9759c60f
ED
52{
53 uint32_t bufsiz;
54 char *dest = d_start;
55
56 ASSERT(d_len >= sizeof (bufsiz));
57
58 bufsiz = real_LZ4_compress(s_start, &dest[sizeof (bufsiz)], s_len,
59 d_len - sizeof (bufsiz));
60
61 /* Signal an error if the compression routine returned zero. */
62 if (bufsiz == 0)
63 return (s_len);
64
65 /*
66 * Encode the compresed buffer size at the start. We'll need this in
67 * decompression to counter the effects of padding which might be
68 * added to the compressed buffer and which, if unhandled, would
69 * confuse the hell out of our decompression function.
70 */
71 *(uint32_t *)dest = BE_32(bufsiz);
72
73 return (bufsiz + sizeof (bufsiz));
74}
75
76/*ARGSUSED*/
77int
a08ee875
LG
78lz4_decompress_zfs(void *s_start, void *d_start, size_t s_len,
79 size_t d_len, int n)
9759c60f
ED
80{
81 const char *src = s_start;
82 uint32_t bufsiz = BE_IN32(src);
83
84 /* invalid compressed buffer size encoded at start */
85 if (bufsiz + sizeof (bufsiz) > s_len)
86 return (1);
87
88 /*
89 * Returns 0 on success (decompression function returned non-negative)
90 * and non-zero on failure (decompression function returned negative.
91 */
92 return (LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)],
93 d_start, bufsiz, d_len) < 0);
94}
95
96/*
97 * LZ4 API Description:
98 *
99 * Simple Functions:
100 * real_LZ4_compress() :
101 * isize : is the input size. Max supported value is ~1.9GB
102 * return : the number of bytes written in buffer dest
103 * or 0 if the compression fails (if LZ4_COMPRESSMIN is set).
104 * note : destination buffer must be already allocated.
105 * destination buffer must be sized to handle worst cases
106 * situations (input data not compressible) worst case size
107 * evaluation is provided by function LZ4_compressBound().
108 *
109 * real_LZ4_uncompress() :
110 * osize : is the output size, therefore the original size
111 * return : the number of bytes read in the source buffer.
112 * If the source stream is malformed, the function will stop
113 * decoding and return a negative result, indicating the byte
114 * position of the faulty instruction. This function never
115 * writes beyond dest + osize, and is therefore protected
116 * against malicious data packets.
117 * note : destination buffer must be already allocated
ea04106b
AX
118 * note : real_LZ4_uncompress() is not used in ZFS so its code
119 * is not present here.
9759c60f
ED
120 *
121 * Advanced Functions
122 *
123 * LZ4_compressBound() :
124 * Provides the maximum size that LZ4 may output in a "worst case"
125 * scenario (input data not compressible) primarily useful for memory
126 * allocation of output buffer.
127 *
128 * isize : is the input size. Max supported value is ~1.9GB
129 * return : maximum output size in a "worst case" scenario
130 * note : this function is limited by "int" range (2^31-1)
131 *
132 * LZ4_uncompress_unknownOutputSize() :
133 * isize : is the input size, therefore the compressed size
134 * maxOutputSize : is the size of the destination buffer (which must be
135 * already allocated)
136 * return : the number of bytes decoded in the destination buffer
137 * (necessarily <= maxOutputSize). If the source stream is
138 * malformed, the function will stop decoding and return a
139 * negative result, indicating the byte position of the faulty
140 * instruction. This function never writes beyond dest +
141 * maxOutputSize, and is therefore protected against malicious
142 * data packets.
143 * note : Destination buffer must be already allocated.
144 * This version is slightly slower than real_LZ4_uncompress()
145 *
146 * LZ4_compressCtx() :
147 * This function explicitly handles the CTX memory structure.
148 *
149 * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
a08ee875
LG
150 * by the caller (either on the stack or using kmem_cache_alloc). Passing
151 * NULL isn't valid.
9759c60f
ED
152 *
153 * LZ4_compress64kCtx() :
154 * Same as LZ4_compressCtx(), but specific to small inputs (<64KB).
155 * isize *Must* be <64KB, otherwise the output will be corrupted.
156 *
157 * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
a08ee875
LG
158 * by the caller (either on the stack or using kmem_cache_alloc). Passing
159 * NULL isn't valid.
9759c60f
ED
160 */
161
162/*
163 * Tuning parameters
164 */
165
166/*
167 * COMPRESSIONLEVEL: Increasing this value improves compression ratio
168 * Lowering this value reduces memory usage. Reduced memory usage
169 * typically improves speed, due to cache effect (ex: L1 32KB for Intel,
170 * L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes
171 * (examples : 12 -> 16KB ; 17 -> 512KB)
172 */
173#define COMPRESSIONLEVEL 12
174
175/*
176 * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the
177 * algorithm skip faster data segments considered "incompressible".
178 * This may decrease compression ratio dramatically, but will be
179 * faster on incompressible data. Increasing this value will make
180 * the algorithm search more before declaring a segment "incompressible".
181 * This could improve compression a bit, but will be slower on
182 * incompressible data. The default value (6) is recommended.
183 */
184#define NOTCOMPRESSIBLE_CONFIRMATION 6
185
186/*
187 * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE: This will provide a boost to
188 * performance for big endian cpu, but the resulting compressed stream
189 * will be incompatible with little-endian CPU. You can set this option
190 * to 1 in situations where data will stay within closed environment.
191 * This option is useless on Little_Endian CPU (such as x86).
192 */
193/* #define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */
194
195/*
196 * CPU Feature Detection
197 */
198
199/* 32 or 64 bits ? */
ea04106b 200#if defined(_LP64)
9759c60f
ED
201#define LZ4_ARCH64 1
202#else
203#define LZ4_ARCH64 0
204#endif
205
206/*
207 * Little Endian or Big Endian?
208 * Note: overwrite the below #define if you know your architecture endianess.
209 */
ea04106b 210#if defined(_BIG_ENDIAN)
9759c60f
ED
211#define LZ4_BIG_ENDIAN 1
212#else
213/*
214 * Little Endian assumed. PDP Endian and other very rare endian format
215 * are unsupported.
216 */
ea04106b 217#undef LZ4_BIG_ENDIAN
9759c60f
ED
218#endif
219
220/*
221 * Unaligned memory access is automatically enabled for "common" CPU,
222 * such as x86. For others CPU, the compiler will be more cautious, and
223 * insert extra code to ensure aligned access is respected. If you know
224 * your target CPU supports unaligned memory access, you may want to
225 * force this option manually to improve performance
226 */
227#if defined(__ARM_FEATURE_UNALIGNED)
228#define LZ4_FORCE_UNALIGNED_ACCESS 1
229#endif
230
231/*
232 * Illumos : we can't use GCC's __builtin_ctz family of builtins in the
233 * kernel
234 * Linux : we can use GCC's __builtin_ctz family of builtins in the
235 * kernel
236 */
237#undef LZ4_FORCE_SW_BITCOUNT
a08ee875
LG
238#if defined(__sparc)
239#define LZ4_FORCE_SW_BITCOUNT
240#endif
9759c60f
ED
241
242/*
243 * Compiler Options
244 */
245/* Disable restrict */
246#define restrict
247
1c24b699
RY
248/*
249 * Linux : GCC_VERSION is defined as of 3.9-rc1, so undefine it.
250 * torvalds/linux@3f3f8d2f48acfd8ed3b8e6b7377935da57b27b16
251 */
252#ifdef GCC_VERSION
253#undef GCC_VERSION
254#endif
255
9759c60f
ED
256#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
257
258#if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__)
259#define expect(expr, value) (__builtin_expect((expr), (value)))
260#else
261#define expect(expr, value) (expr)
262#endif
263
264#ifndef likely
265#define likely(expr) expect((expr) != 0, 1)
266#endif
267
268#ifndef unlikely
269#define unlikely(expr) expect((expr) != 0, 0)
270#endif
271
a08ee875 272#define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | \
9759c60f
ED
273 (((x) & 0xffu) << 8)))
274
275/* Basic types */
276#define BYTE uint8_t
277#define U16 uint16_t
278#define U32 uint32_t
279#define S32 int32_t
280#define U64 uint64_t
281
282#ifndef LZ4_FORCE_UNALIGNED_ACCESS
283#pragma pack(1)
284#endif
285
286typedef struct _U16_S {
287 U16 v;
288} U16_S;
289typedef struct _U32_S {
290 U32 v;
291} U32_S;
292typedef struct _U64_S {
293 U64 v;
294} U64_S;
295
296#ifndef LZ4_FORCE_UNALIGNED_ACCESS
297#pragma pack()
298#endif
299
300#define A64(x) (((U64_S *)(x))->v)
301#define A32(x) (((U32_S *)(x))->v)
302#define A16(x) (((U16_S *)(x))->v)
303
304/*
305 * Constants
306 */
307#define MINMATCH 4
308
309#define HASH_LOG COMPRESSIONLEVEL
310#define HASHTABLESIZE (1 << HASH_LOG)
311#define HASH_MASK (HASHTABLESIZE - 1)
312
313#define SKIPSTRENGTH (NOTCOMPRESSIBLE_CONFIRMATION > 2 ? \
314 NOTCOMPRESSIBLE_CONFIRMATION : 2)
315
316#define COPYLENGTH 8
317#define LASTLITERALS 5
318#define MFLIMIT (COPYLENGTH + MINMATCH)
319#define MINLENGTH (MFLIMIT + 1)
320
321#define MAXD_LOG 16
322#define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
323
324#define ML_BITS 4
325#define ML_MASK ((1U<<ML_BITS)-1)
326#define RUN_BITS (8-ML_BITS)
327#define RUN_MASK ((1U<<RUN_BITS)-1)
328
329
330/*
331 * Architecture-specific macros
332 */
333#if LZ4_ARCH64
334#define STEPSIZE 8
335#define UARCH U64
336#define AARCH A64
337#define LZ4_COPYSTEP(s, d) A64(d) = A64(s); d += 8; s += 8;
338#define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d)
339#define LZ4_SECURECOPY(s, d, e) if (d < e) LZ4_WILDCOPY(s, d, e)
340#define HTYPE U32
341#define INITBASE(base) const BYTE* const base = ip
342#else /* !LZ4_ARCH64 */
343#define STEPSIZE 4
344#define UARCH U32
345#define AARCH A32
346#define LZ4_COPYSTEP(s, d) A32(d) = A32(s); d += 4; s += 4;
347#define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d); LZ4_COPYSTEP(s, d);
348#define LZ4_SECURECOPY LZ4_WILDCOPY
349#define HTYPE const BYTE *
350#define INITBASE(base) const int base = 0
351#endif /* !LZ4_ARCH64 */
352
353#if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE))
354#define LZ4_READ_LITTLEENDIAN_16(d, s, p) \
355 { U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
356#define LZ4_WRITE_LITTLEENDIAN_16(p, i) \
357 { U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p += 2; }
358#else
359#define LZ4_READ_LITTLEENDIAN_16(d, s, p) { d = (s) - A16(p); }
360#define LZ4_WRITE_LITTLEENDIAN_16(p, v) { A16(p) = v; p += 2; }
361#endif
362
363
364/* Local structures */
365struct refTables {
366 HTYPE hashTable[HASHTABLESIZE];
367};
368
369
370/* Macros */
371#define LZ4_HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH * 8) - \
372 HASH_LOG))
373#define LZ4_HASH_VALUE(p) LZ4_HASH_FUNCTION(A32(p))
374#define LZ4_WILDCOPY(s, d, e) do { LZ4_COPYPACKET(s, d) } while (d < e);
375#define LZ4_BLINDCOPY(s, d, l) { BYTE* e = (d) + l; LZ4_WILDCOPY(s, d, e); \
376 d = e; }
377
378
379/* Private functions */
380#if LZ4_ARCH64
381
382static inline int
383LZ4_NbCommonBytes(register U64 val)
384{
385#if defined(LZ4_BIG_ENDIAN)
386#if defined(__GNUC__) && (GCC_VERSION >= 304) && \
387 !defined(LZ4_FORCE_SW_BITCOUNT)
388 return (__builtin_clzll(val) >> 3);
389#else
390 int r;
391 if (!(val >> 32)) {
392 r = 4;
393 } else {
394 r = 0;
395 val >>= 32;
396 }
397 if (!(val >> 16)) {
398 r += 2;
399 val >>= 8;
400 } else {
401 val >>= 24;
402 }
403 r += (!val);
404 return (r);
405#endif
406#else
407#if defined(__GNUC__) && (GCC_VERSION >= 304) && \
408 !defined(LZ4_FORCE_SW_BITCOUNT)
409 return (__builtin_ctzll(val) >> 3);
410#else
411 static const int DeBruijnBytePos[64] =
412 { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5,
413 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5,
414 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4,
415 4, 5, 7, 2, 6, 5, 7, 6, 7, 7
416 };
417 return DeBruijnBytePos[((U64) ((val & -val) * 0x0218A392CDABBD3F)) >>
418 58];
419#endif
420#endif
421}
422
423#else
424
425static inline int
426LZ4_NbCommonBytes(register U32 val)
427{
428#if defined(LZ4_BIG_ENDIAN)
429#if defined(__GNUC__) && (GCC_VERSION >= 304) && \
430 !defined(LZ4_FORCE_SW_BITCOUNT)
431 return (__builtin_clz(val) >> 3);
432#else
433 int r;
434 if (!(val >> 16)) {
435 r = 2;
436 val >>= 8;
437 } else {
438 r = 0;
439 val >>= 24;
440 }
441 r += (!val);
442 return (r);
443#endif
444#else
445#if defined(__GNUC__) && (GCC_VERSION >= 304) && \
446 !defined(LZ4_FORCE_SW_BITCOUNT)
447 return (__builtin_ctz(val) >> 3);
448#else
449 static const int DeBruijnBytePos[32] = {
450 0, 0, 3, 0, 3, 1, 3, 0,
451 3, 2, 2, 1, 3, 2, 0, 1,
452 3, 3, 1, 2, 2, 2, 2, 0,
453 3, 1, 2, 0, 1, 0, 1, 1
454 };
455 return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >>
456 27];
457#endif
458#endif
459}
460
461#endif
462
463/* Compression functions */
464
465/*ARGSUSED*/
466static int
467LZ4_compressCtx(void *ctx, const char *source, char *dest, int isize,
468 int osize)
469{
470 struct refTables *srt = (struct refTables *)ctx;
471 HTYPE *HashTable = (HTYPE *) (srt->hashTable);
472
473 const BYTE *ip = (BYTE *) source;
474 INITBASE(base);
475 const BYTE *anchor = ip;
476 const BYTE *const iend = ip + isize;
477 const BYTE *const oend = (BYTE *) dest + osize;
478 const BYTE *const mflimit = iend - MFLIMIT;
479#define matchlimit (iend - LASTLITERALS)
480
481 BYTE *op = (BYTE *) dest;
482
483 int len, length;
484 const int skipStrength = SKIPSTRENGTH;
485 U32 forwardH;
486
487
488 /* Init */
489 if (isize < MINLENGTH)
490 goto _last_literals;
491
492 /* First Byte */
493 HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
494 ip++;
495 forwardH = LZ4_HASH_VALUE(ip);
496
497 /* Main Loop */
498 for (;;) {
499 int findMatchAttempts = (1U << skipStrength) + 3;
500 const BYTE *forwardIp = ip;
501 const BYTE *ref;
502 BYTE *token;
503
504 /* Find a match */
505 do {
506 U32 h = forwardH;
507 int step = findMatchAttempts++ >> skipStrength;
508 ip = forwardIp;
509 forwardIp = ip + step;
510
511 if (unlikely(forwardIp > mflimit)) {
512 goto _last_literals;
513 }
514
515 forwardH = LZ4_HASH_VALUE(forwardIp);
516 ref = base + HashTable[h];
517 HashTable[h] = ip - base;
518
519 } while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
520
521 /* Catch up */
522 while ((ip > anchor) && (ref > (BYTE *) source) &&
523 unlikely(ip[-1] == ref[-1])) {
524 ip--;
525 ref--;
526 }
527
528 /* Encode Literal length */
529 length = ip - anchor;
530 token = op++;
531
532 /* Check output limit */
533 if (unlikely(op + length + (2 + 1 + LASTLITERALS) +
534 (length >> 8) > oend))
535 return (0);
536
537 if (length >= (int)RUN_MASK) {
538 *token = (RUN_MASK << ML_BITS);
539 len = length - RUN_MASK;
540 for (; len > 254; len -= 255)
541 *op++ = 255;
542 *op++ = (BYTE)len;
543 } else
544 *token = (length << ML_BITS);
545
546 /* Copy Literals */
547 LZ4_BLINDCOPY(anchor, op, length);
548
549 _next_match:
550 /* Encode Offset */
551 LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
552
553 /* Start Counting */
554 ip += MINMATCH;
555 ref += MINMATCH; /* MinMatch verified */
556 anchor = ip;
557 while (likely(ip < matchlimit - (STEPSIZE - 1))) {
558 UARCH diff = AARCH(ref) ^ AARCH(ip);
559 if (!diff) {
560 ip += STEPSIZE;
561 ref += STEPSIZE;
562 continue;
563 }
564 ip += LZ4_NbCommonBytes(diff);
565 goto _endCount;
566 }
567#if LZ4_ARCH64
568 if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
569 ip += 4;
570 ref += 4;
571 }
572#endif
573 if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
574 ip += 2;
575 ref += 2;
576 }
577 if ((ip < matchlimit) && (*ref == *ip))
578 ip++;
579 _endCount:
580
581 /* Encode MatchLength */
582 len = (ip - anchor);
583 /* Check output limit */
584 if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend))
585 return (0);
586 if (len >= (int)ML_MASK) {
587 *token += ML_MASK;
588 len -= ML_MASK;
589 for (; len > 509; len -= 510) {
590 *op++ = 255;
591 *op++ = 255;
592 }
593 if (len > 254) {
594 len -= 255;
595 *op++ = 255;
596 }
597 *op++ = (BYTE)len;
598 } else
599 *token += len;
600
601 /* Test end of chunk */
602 if (ip > mflimit) {
603 anchor = ip;
604 break;
605 }
606 /* Fill table */
607 HashTable[LZ4_HASH_VALUE(ip - 2)] = ip - 2 - base;
608
609 /* Test next position */
610 ref = base + HashTable[LZ4_HASH_VALUE(ip)];
611 HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
612 if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) {
613 token = op++;
614 *token = 0;
615 goto _next_match;
616 }
617 /* Prepare next loop */
618 anchor = ip++;
619 forwardH = LZ4_HASH_VALUE(ip);
620 }
621
622 _last_literals:
623 /* Encode Last Literals */
624 {
625 int lastRun = iend - anchor;
626 if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
627 oend)
628 return (0);
629 if (lastRun >= (int)RUN_MASK) {
630 *op++ = (RUN_MASK << ML_BITS);
631 lastRun -= RUN_MASK;
632 for (; lastRun > 254; lastRun -= 255) {
633 *op++ = 255;
634 }
635 *op++ = (BYTE)lastRun;
636 } else
637 *op++ = (lastRun << ML_BITS);
638 (void) memcpy(op, anchor, iend - anchor);
639 op += iend - anchor;
640 }
641
642 /* End */
643 return (int)(((char *)op) - dest);
644}
645
646
647
648/* Note : this function is valid only if isize < LZ4_64KLIMIT */
649#define LZ4_64KLIMIT ((1 << 16) + (MFLIMIT - 1))
650#define HASHLOG64K (HASH_LOG + 1)
651#define HASH64KTABLESIZE (1U << HASHLOG64K)
652#define LZ4_HASH64K_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8) - \
653 HASHLOG64K))
654#define LZ4_HASH64K_VALUE(p) LZ4_HASH64K_FUNCTION(A32(p))
655
656/*ARGSUSED*/
657static int
658LZ4_compress64kCtx(void *ctx, const char *source, char *dest, int isize,
659 int osize)
660{
661 struct refTables *srt = (struct refTables *)ctx;
662 U16 *HashTable = (U16 *) (srt->hashTable);
663
664 const BYTE *ip = (BYTE *) source;
665 const BYTE *anchor = ip;
666 const BYTE *const base = ip;
667 const BYTE *const iend = ip + isize;
668 const BYTE *const oend = (BYTE *) dest + osize;
669 const BYTE *const mflimit = iend - MFLIMIT;
670#define matchlimit (iend - LASTLITERALS)
671
672 BYTE *op = (BYTE *) dest;
673
674 int len, length;
675 const int skipStrength = SKIPSTRENGTH;
676 U32 forwardH;
677
678 /* Init */
679 if (isize < MINLENGTH)
680 goto _last_literals;
681
682 /* First Byte */
683 ip++;
684 forwardH = LZ4_HASH64K_VALUE(ip);
685
686 /* Main Loop */
687 for (;;) {
688 int findMatchAttempts = (1U << skipStrength) + 3;
689 const BYTE *forwardIp = ip;
690 const BYTE *ref;
691 BYTE *token;
692
693 /* Find a match */
694 do {
695 U32 h = forwardH;
696 int step = findMatchAttempts++ >> skipStrength;
697 ip = forwardIp;
698 forwardIp = ip + step;
699
700 if (forwardIp > mflimit) {
701 goto _last_literals;
702 }
703
704 forwardH = LZ4_HASH64K_VALUE(forwardIp);
705 ref = base + HashTable[h];
706 HashTable[h] = ip - base;
707
708 } while (A32(ref) != A32(ip));
709
710 /* Catch up */
711 while ((ip > anchor) && (ref > (BYTE *) source) &&
712 (ip[-1] == ref[-1])) {
713 ip--;
714 ref--;
715 }
716
717 /* Encode Literal length */
718 length = ip - anchor;
719 token = op++;
720
721 /* Check output limit */
722 if (unlikely(op + length + (2 + 1 + LASTLITERALS) +
723 (length >> 8) > oend))
724 return (0);
725
726 if (length >= (int)RUN_MASK) {
727 *token = (RUN_MASK << ML_BITS);
728 len = length - RUN_MASK;
729 for (; len > 254; len -= 255)
730 *op++ = 255;
731 *op++ = (BYTE)len;
732 } else
733 *token = (length << ML_BITS);
734
735 /* Copy Literals */
736 LZ4_BLINDCOPY(anchor, op, length);
737
738 _next_match:
739 /* Encode Offset */
740 LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
741
742 /* Start Counting */
743 ip += MINMATCH;
744 ref += MINMATCH; /* MinMatch verified */
745 anchor = ip;
746 while (ip < matchlimit - (STEPSIZE - 1)) {
747 UARCH diff = AARCH(ref) ^ AARCH(ip);
748 if (!diff) {
749 ip += STEPSIZE;
750 ref += STEPSIZE;
751 continue;
752 }
753 ip += LZ4_NbCommonBytes(diff);
754 goto _endCount;
755 }
756#if LZ4_ARCH64
757 if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
758 ip += 4;
759 ref += 4;
760 }
761#endif
762 if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
763 ip += 2;
764 ref += 2;
765 }
766 if ((ip < matchlimit) && (*ref == *ip))
767 ip++;
768 _endCount:
769
770 /* Encode MatchLength */
771 len = (ip - anchor);
772 /* Check output limit */
773 if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend))
774 return (0);
775 if (len >= (int)ML_MASK) {
776 *token += ML_MASK;
777 len -= ML_MASK;
778 for (; len > 509; len -= 510) {
779 *op++ = 255;
780 *op++ = 255;
781 }
782 if (len > 254) {
783 len -= 255;
784 *op++ = 255;
785 }
786 *op++ = (BYTE)len;
787 } else
788 *token += len;
789
790 /* Test end of chunk */
791 if (ip > mflimit) {
792 anchor = ip;
793 break;
794 }
795 /* Fill table */
796 HashTable[LZ4_HASH64K_VALUE(ip - 2)] = ip - 2 - base;
797
798 /* Test next position */
799 ref = base + HashTable[LZ4_HASH64K_VALUE(ip)];
800 HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base;
801 if (A32(ref) == A32(ip)) {
802 token = op++;
803 *token = 0;
804 goto _next_match;
805 }
806 /* Prepare next loop */
807 anchor = ip++;
808 forwardH = LZ4_HASH64K_VALUE(ip);
809 }
810
811 _last_literals:
812 /* Encode Last Literals */
813 {
814 int lastRun = iend - anchor;
815 if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
816 oend)
817 return (0);
818 if (lastRun >= (int)RUN_MASK) {
819 *op++ = (RUN_MASK << ML_BITS);
820 lastRun -= RUN_MASK;
821 for (; lastRun > 254; lastRun -= 255)
822 *op++ = 255;
823 *op++ = (BYTE)lastRun;
824 } else
825 *op++ = (lastRun << ML_BITS);
826 (void) memcpy(op, anchor, iend - anchor);
827 op += iend - anchor;
828 }
829
830 /* End */
831 return (int)(((char *)op) - dest);
832}
833
834static int
835real_LZ4_compress(const char *source, char *dest, int isize, int osize)
836{
837 void *ctx;
838 int result;
839
840 ASSERT(lz4_cache != NULL);
ea04106b 841 ctx = kmem_cache_alloc(lz4_cache, KM_SLEEP);
9759c60f
ED
842
843 /*
844 * out of kernel memory, gently fall through - this will disable
845 * compression in zio_compress_data
846 */
847 if (ctx == NULL)
848 return (0);
849
850 memset(ctx, 0, sizeof (struct refTables));
851
852 if (isize < LZ4_64KLIMIT)
853 result = LZ4_compress64kCtx(ctx, source, dest, isize, osize);
854 else
855 result = LZ4_compressCtx(ctx, source, dest, isize, osize);
856
857 kmem_cache_free(lz4_cache, ctx);
858 return (result);
859}
860
861/* Decompression functions */
862
863/*
864 * Note: The decoding functions real_LZ4_uncompress() and
865 * LZ4_uncompress_unknownOutputSize() are safe against "buffer overflow"
866 * attack type. They will never write nor read outside of the provided
867 * output buffers. LZ4_uncompress_unknownOutputSize() also insures that
868 * it will never read outside of the input buffer. A corrupted input
869 * will produce an error result, a negative int, indicating the position
870 * of the error within input stream.
ea04106b
AX
871 *
872 * Note[2]: real_LZ4_uncompress(), referred to above, is not used in ZFS so
873 * its code is not present here.
9759c60f
ED
874 */
875
876static int
877LZ4_uncompress_unknownOutputSize(const char *source, char *dest, int isize,
878 int maxOutputSize)
879{
880 /* Local Variables */
881 const BYTE *restrict ip = (const BYTE *) source;
882 const BYTE *const iend = ip + isize;
883 const BYTE *ref;
884
885 BYTE *op = (BYTE *) dest;
886 BYTE *const oend = op + maxOutputSize;
887 BYTE *cpy;
888
889 size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
890#if LZ4_ARCH64
891 size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3};
892#endif
893
894 /* Main Loop */
895 while (ip < iend) {
896 unsigned token;
897 size_t length;
898
899 /* get runlength */
900 token = *ip++;
901 if ((length = (token >> ML_BITS)) == RUN_MASK) {
902 int s = 255;
903 while ((ip < iend) && (s == 255)) {
904 s = *ip++;
905 length += s;
906 }
907 }
908 /* copy literals */
909 cpy = op + length;
ea04106b
AX
910 /* CORNER-CASE: cpy might overflow. */
911 if (cpy < op)
912 goto _output_error; /* cpy was overflowed, bail! */
9759c60f
ED
913 if ((cpy > oend - COPYLENGTH) ||
914 (ip + length > iend - COPYLENGTH)) {
915 if (cpy > oend)
916 /* Error: writes beyond output buffer */
917 goto _output_error;
918 if (ip + length != iend)
919 /*
920 * Error: LZ4 format requires to consume all
921 * input at this stage
922 */
923 goto _output_error;
924 (void) memcpy(op, ip, length);
925 op += length;
926 /* Necessarily EOF, due to parsing restrictions */
927 break;
928 }
929 LZ4_WILDCOPY(ip, op, cpy);
930 ip -= (op - cpy);
931 op = cpy;
932
933 /* get offset */
934 LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
935 ip += 2;
936 if (ref < (BYTE * const) dest)
937 /*
938 * Error: offset creates reference outside of
939 * destination buffer
940 */
941 goto _output_error;
942
943 /* get matchlength */
944 if ((length = (token & ML_MASK)) == ML_MASK) {
945 while (ip < iend) {
946 int s = *ip++;
947 length += s;
948 if (s == 255)
949 continue;
950 break;
951 }
952 }
953 /* copy repeated sequence */
954 if (unlikely(op - ref < STEPSIZE)) {
955#if LZ4_ARCH64
956 size_t dec64 = dec64table[op-ref];
957#else
958 const int dec64 = 0;
959#endif
960 op[0] = ref[0];
961 op[1] = ref[1];
962 op[2] = ref[2];
963 op[3] = ref[3];
964 op += 4;
965 ref += 4;
966 ref -= dec32table[op-ref];
967 A32(op) = A32(ref);
968 op += STEPSIZE - 4;
969 ref -= dec64;
970 } else {
971 LZ4_COPYSTEP(ref, op);
972 }
973 cpy = op + length - (STEPSIZE - 4);
974 if (cpy > oend - COPYLENGTH) {
975 if (cpy > oend)
976 /*
977 * Error: request to write outside of
978 * destination buffer
979 */
980 goto _output_error;
981 LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
982 while (op < cpy)
983 *op++ = *ref++;
984 op = cpy;
985 if (op == oend)
986 /*
987 * Check EOF (should never happen, since
988 * last 5 bytes are supposed to be literals)
989 */
990 goto _output_error;
991 continue;
992 }
993 LZ4_SECURECOPY(ref, op, cpy);
994 op = cpy; /* correction */
995 }
996
997 /* end of decoding */
998 return (int)(((char *)op) - dest);
999
1000 /* write overflow error detected */
1001 _output_error:
1002 return (int)(-(((char *)ip) - source));
1003}
1004
1005void
1006lz4_init(void)
1007{
1008 lz4_cache = kmem_cache_create("lz4_cache",
1009 sizeof (struct refTables), 0, NULL, NULL, NULL, NULL, NULL, 0);
1010}
1011
1012void
1013lz4_fini(void)
1014{
1015 if (lz4_cache) {
1016 kmem_cache_destroy(lz4_cache);
1017 lz4_cache = NULL;
1018 }
1019}