]> git.proxmox.com Git - mirror_zfs.git/blob - module/zfs/lz4.c
Illumos #3035 LZ4 compression support in ZFS and GRUB
[mirror_zfs.git] / module / zfs / lz4.c
1 /*
2 * LZ4 - Fast LZ compression algorithm
3 * Header File
4 * Copyright (C) 2011-2013, Yann Collet.
5 * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are
9 * met:
10 *
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following disclaimer
15 * in the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *
30 * You can contact the author at :
31 * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
32 * - LZ4 source repository : http://code.google.com/p/lz4/
33 */
34
35 #include <sys/zfs_context.h>
36
37 static int real_LZ4_compress(const char *source, char *dest, int isize,
38 int osize);
39 static int LZ4_uncompress_unknownOutputSize(const char *source, char *dest,
40 int isize, int maxOutputSize);
41 static int LZ4_compressCtx(void *ctx, const char *source, char *dest,
42 int isize, int osize);
43 static int LZ4_compress64kCtx(void *ctx, const char *source, char *dest,
44 int isize, int osize);
45
46 static kmem_cache_t *lz4_cache;
47
48 /*ARGSUSED*/
49 size_t
50 lz4_compress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n)
51 {
52 uint32_t bufsiz;
53 char *dest = d_start;
54
55 ASSERT(d_len >= sizeof (bufsiz));
56
57 bufsiz = real_LZ4_compress(s_start, &dest[sizeof (bufsiz)], s_len,
58 d_len - sizeof (bufsiz));
59
60 /* Signal an error if the compression routine returned zero. */
61 if (bufsiz == 0)
62 return (s_len);
63
64 /*
65 * Encode the compresed buffer size at the start. We'll need this in
66 * decompression to counter the effects of padding which might be
67 * added to the compressed buffer and which, if unhandled, would
68 * confuse the hell out of our decompression function.
69 */
70 *(uint32_t *)dest = BE_32(bufsiz);
71
72 return (bufsiz + sizeof (bufsiz));
73 }
74
75 /*ARGSUSED*/
76 int
77 lz4_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n)
78 {
79 const char *src = s_start;
80 uint32_t bufsiz = BE_IN32(src);
81
82 /* invalid compressed buffer size encoded at start */
83 if (bufsiz + sizeof (bufsiz) > s_len)
84 return (1);
85
86 /*
87 * Returns 0 on success (decompression function returned non-negative)
88 * and non-zero on failure (decompression function returned negative.
89 */
90 return (LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)],
91 d_start, bufsiz, d_len) < 0);
92 }
93
94 /*
95 * LZ4 API Description:
96 *
97 * Simple Functions:
98 * real_LZ4_compress() :
99 * isize : is the input size. Max supported value is ~1.9GB
100 * return : the number of bytes written in buffer dest
101 * or 0 if the compression fails (if LZ4_COMPRESSMIN is set).
102 * note : destination buffer must be already allocated.
103 * destination buffer must be sized to handle worst cases
104 * situations (input data not compressible) worst case size
105 * evaluation is provided by function LZ4_compressBound().
106 *
107 * real_LZ4_uncompress() :
108 * osize : is the output size, therefore the original size
109 * return : the number of bytes read in the source buffer.
110 * If the source stream is malformed, the function will stop
111 * decoding and return a negative result, indicating the byte
112 * position of the faulty instruction. This function never
113 * writes beyond dest + osize, and is therefore protected
114 * against malicious data packets.
115 * note : destination buffer must be already allocated
116 *
117 * Advanced Functions
118 *
119 * LZ4_compressBound() :
120 * Provides the maximum size that LZ4 may output in a "worst case"
121 * scenario (input data not compressible) primarily useful for memory
122 * allocation of output buffer.
123 *
124 * isize : is the input size. Max supported value is ~1.9GB
125 * return : maximum output size in a "worst case" scenario
126 * note : this function is limited by "int" range (2^31-1)
127 *
128 * LZ4_uncompress_unknownOutputSize() :
129 * isize : is the input size, therefore the compressed size
130 * maxOutputSize : is the size of the destination buffer (which must be
131 * already allocated)
132 * return : the number of bytes decoded in the destination buffer
133 * (necessarily <= maxOutputSize). If the source stream is
134 * malformed, the function will stop decoding and return a
135 * negative result, indicating the byte position of the faulty
136 * instruction. This function never writes beyond dest +
137 * maxOutputSize, and is therefore protected against malicious
138 * data packets.
139 * note : Destination buffer must be already allocated.
140 * This version is slightly slower than real_LZ4_uncompress()
141 *
142 * LZ4_compressCtx() :
143 * This function explicitly handles the CTX memory structure.
144 *
145 * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
146 * by the caller (either on the stack or using kmem_cache_alloc). Passing NULL
147 * isn't valid.
148 *
149 * LZ4_compress64kCtx() :
150 * Same as LZ4_compressCtx(), but specific to small inputs (<64KB).
151 * isize *Must* be <64KB, otherwise the output will be corrupted.
152 *
153 * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
154 * by the caller (either on the stack or using kmem_cache_alloc). Passing NULL
155 * isn't valid.
156 */
157
158 /*
159 * Tuning parameters
160 */
161
162 /*
163 * COMPRESSIONLEVEL: Increasing this value improves compression ratio
164 * Lowering this value reduces memory usage. Reduced memory usage
165 * typically improves speed, due to cache effect (ex: L1 32KB for Intel,
166 * L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes
167 * (examples : 12 -> 16KB ; 17 -> 512KB)
168 */
169 #define COMPRESSIONLEVEL 12
170
171 /*
172 * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the
173 * algorithm skip faster data segments considered "incompressible".
174 * This may decrease compression ratio dramatically, but will be
175 * faster on incompressible data. Increasing this value will make
176 * the algorithm search more before declaring a segment "incompressible".
177 * This could improve compression a bit, but will be slower on
178 * incompressible data. The default value (6) is recommended.
179 */
180 #define NOTCOMPRESSIBLE_CONFIRMATION 6
181
182 /*
183 * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE: This will provide a boost to
184 * performance for big endian cpu, but the resulting compressed stream
185 * will be incompatible with little-endian CPU. You can set this option
186 * to 1 in situations where data will stay within closed environment.
187 * This option is useless on Little_Endian CPU (such as x86).
188 */
189 /* #define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */
190
191 /*
192 * CPU Feature Detection
193 */
194
195 /* 32 or 64 bits ? */
196 #if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || \
197 defined(__amd64) || defined(__ppc64__) || defined(_WIN64) || \
198 defined(__LP64__) || defined(_LP64))
199 #define LZ4_ARCH64 1
200 #else
201 #define LZ4_ARCH64 0
202 #endif
203
204 /*
205 * Little Endian or Big Endian?
206 * Note: overwrite the below #define if you know your architecture endianess.
207 */
208 #if (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || \
209 defined(_BIG_ENDIAN) || defined(_ARCH_PPC) || defined(__PPC__) || \
210 defined(__PPC) || defined(PPC) || defined(__powerpc__) || \
211 defined(__powerpc) || defined(powerpc) || \
212 ((defined(__BYTE_ORDER__)&&(__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__))))
213 #define LZ4_BIG_ENDIAN 1
214 #else
215 /*
216 * Little Endian assumed. PDP Endian and other very rare endian format
217 * are unsupported.
218 */
219 #endif
220
221 /*
222 * Unaligned memory access is automatically enabled for "common" CPU,
223 * such as x86. For others CPU, the compiler will be more cautious, and
224 * insert extra code to ensure aligned access is respected. If you know
225 * your target CPU supports unaligned memory access, you may want to
226 * force this option manually to improve performance
227 */
228 #if defined(__ARM_FEATURE_UNALIGNED)
229 #define LZ4_FORCE_UNALIGNED_ACCESS 1
230 #endif
231
232 /*
233 * Illumos : we can't use GCC's __builtin_ctz family of builtins in the
234 * kernel
235 * Linux : we can use GCC's __builtin_ctz family of builtins in the
236 * kernel
237 */
238 #undef LZ4_FORCE_SW_BITCOUNT
239
240 /*
241 * Compiler Options
242 */
243 /* Disable restrict */
244 #define restrict
245
246 #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
247
248 #if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__)
249 #define expect(expr, value) (__builtin_expect((expr), (value)))
250 #else
251 #define expect(expr, value) (expr)
252 #endif
253
254 #ifndef likely
255 #define likely(expr) expect((expr) != 0, 1)
256 #endif
257
258 #ifndef unlikely
259 #define unlikely(expr) expect((expr) != 0, 0)
260 #endif
261
262 #define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | \
263 (((x) & 0xffu) << 8)))
264
265 /* Basic types */
266 #define BYTE uint8_t
267 #define U16 uint16_t
268 #define U32 uint32_t
269 #define S32 int32_t
270 #define U64 uint64_t
271
272 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
273 #pragma pack(1)
274 #endif
275
276 typedef struct _U16_S {
277 U16 v;
278 } U16_S;
279 typedef struct _U32_S {
280 U32 v;
281 } U32_S;
282 typedef struct _U64_S {
283 U64 v;
284 } U64_S;
285
286 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
287 #pragma pack()
288 #endif
289
290 #define A64(x) (((U64_S *)(x))->v)
291 #define A32(x) (((U32_S *)(x))->v)
292 #define A16(x) (((U16_S *)(x))->v)
293
294 /*
295 * Constants
296 */
297 #define MINMATCH 4
298
299 #define HASH_LOG COMPRESSIONLEVEL
300 #define HASHTABLESIZE (1 << HASH_LOG)
301 #define HASH_MASK (HASHTABLESIZE - 1)
302
303 #define SKIPSTRENGTH (NOTCOMPRESSIBLE_CONFIRMATION > 2 ? \
304 NOTCOMPRESSIBLE_CONFIRMATION : 2)
305
306 #define COPYLENGTH 8
307 #define LASTLITERALS 5
308 #define MFLIMIT (COPYLENGTH + MINMATCH)
309 #define MINLENGTH (MFLIMIT + 1)
310
311 #define MAXD_LOG 16
312 #define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
313
314 #define ML_BITS 4
315 #define ML_MASK ((1U<<ML_BITS)-1)
316 #define RUN_BITS (8-ML_BITS)
317 #define RUN_MASK ((1U<<RUN_BITS)-1)
318
319
320 /*
321 * Architecture-specific macros
322 */
323 #if LZ4_ARCH64
324 #define STEPSIZE 8
325 #define UARCH U64
326 #define AARCH A64
327 #define LZ4_COPYSTEP(s, d) A64(d) = A64(s); d += 8; s += 8;
328 #define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d)
329 #define LZ4_SECURECOPY(s, d, e) if (d < e) LZ4_WILDCOPY(s, d, e)
330 #define HTYPE U32
331 #define INITBASE(base) const BYTE* const base = ip
332 #else /* !LZ4_ARCH64 */
333 #define STEPSIZE 4
334 #define UARCH U32
335 #define AARCH A32
336 #define LZ4_COPYSTEP(s, d) A32(d) = A32(s); d += 4; s += 4;
337 #define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d); LZ4_COPYSTEP(s, d);
338 #define LZ4_SECURECOPY LZ4_WILDCOPY
339 #define HTYPE const BYTE *
340 #define INITBASE(base) const int base = 0
341 #endif /* !LZ4_ARCH64 */
342
343 #if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE))
344 #define LZ4_READ_LITTLEENDIAN_16(d, s, p) \
345 { U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
346 #define LZ4_WRITE_LITTLEENDIAN_16(p, i) \
347 { U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p += 2; }
348 #else
349 #define LZ4_READ_LITTLEENDIAN_16(d, s, p) { d = (s) - A16(p); }
350 #define LZ4_WRITE_LITTLEENDIAN_16(p, v) { A16(p) = v; p += 2; }
351 #endif
352
353
354 /* Local structures */
355 struct refTables {
356 HTYPE hashTable[HASHTABLESIZE];
357 };
358
359
360 /* Macros */
361 #define LZ4_HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH * 8) - \
362 HASH_LOG))
363 #define LZ4_HASH_VALUE(p) LZ4_HASH_FUNCTION(A32(p))
364 #define LZ4_WILDCOPY(s, d, e) do { LZ4_COPYPACKET(s, d) } while (d < e);
365 #define LZ4_BLINDCOPY(s, d, l) { BYTE* e = (d) + l; LZ4_WILDCOPY(s, d, e); \
366 d = e; }
367
368
369 /* Private functions */
370 #if LZ4_ARCH64
371
372 static inline int
373 LZ4_NbCommonBytes(register U64 val)
374 {
375 #if defined(LZ4_BIG_ENDIAN)
376 #if defined(__GNUC__) && (GCC_VERSION >= 304) && \
377 !defined(LZ4_FORCE_SW_BITCOUNT)
378 return (__builtin_clzll(val) >> 3);
379 #else
380 int r;
381 if (!(val >> 32)) {
382 r = 4;
383 } else {
384 r = 0;
385 val >>= 32;
386 }
387 if (!(val >> 16)) {
388 r += 2;
389 val >>= 8;
390 } else {
391 val >>= 24;
392 }
393 r += (!val);
394 return (r);
395 #endif
396 #else
397 #if defined(__GNUC__) && (GCC_VERSION >= 304) && \
398 !defined(LZ4_FORCE_SW_BITCOUNT)
399 return (__builtin_ctzll(val) >> 3);
400 #else
401 static const int DeBruijnBytePos[64] =
402 { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5,
403 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5,
404 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4,
405 4, 5, 7, 2, 6, 5, 7, 6, 7, 7
406 };
407 return DeBruijnBytePos[((U64) ((val & -val) * 0x0218A392CDABBD3F)) >>
408 58];
409 #endif
410 #endif
411 }
412
413 #else
414
415 static inline int
416 LZ4_NbCommonBytes(register U32 val)
417 {
418 #if defined(LZ4_BIG_ENDIAN)
419 #if defined(__GNUC__) && (GCC_VERSION >= 304) && \
420 !defined(LZ4_FORCE_SW_BITCOUNT)
421 return (__builtin_clz(val) >> 3);
422 #else
423 int r;
424 if (!(val >> 16)) {
425 r = 2;
426 val >>= 8;
427 } else {
428 r = 0;
429 val >>= 24;
430 }
431 r += (!val);
432 return (r);
433 #endif
434 #else
435 #if defined(__GNUC__) && (GCC_VERSION >= 304) && \
436 !defined(LZ4_FORCE_SW_BITCOUNT)
437 return (__builtin_ctz(val) >> 3);
438 #else
439 static const int DeBruijnBytePos[32] = {
440 0, 0, 3, 0, 3, 1, 3, 0,
441 3, 2, 2, 1, 3, 2, 0, 1,
442 3, 3, 1, 2, 2, 2, 2, 0,
443 3, 1, 2, 0, 1, 0, 1, 1
444 };
445 return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >>
446 27];
447 #endif
448 #endif
449 }
450
451 #endif
452
453 /* Compression functions */
454
455 /*ARGSUSED*/
456 static int
457 LZ4_compressCtx(void *ctx, const char *source, char *dest, int isize,
458 int osize)
459 {
460 struct refTables *srt = (struct refTables *)ctx;
461 HTYPE *HashTable = (HTYPE *) (srt->hashTable);
462
463 const BYTE *ip = (BYTE *) source;
464 INITBASE(base);
465 const BYTE *anchor = ip;
466 const BYTE *const iend = ip + isize;
467 const BYTE *const oend = (BYTE *) dest + osize;
468 const BYTE *const mflimit = iend - MFLIMIT;
469 #define matchlimit (iend - LASTLITERALS)
470
471 BYTE *op = (BYTE *) dest;
472
473 int len, length;
474 const int skipStrength = SKIPSTRENGTH;
475 U32 forwardH;
476
477
478 /* Init */
479 if (isize < MINLENGTH)
480 goto _last_literals;
481
482 /* First Byte */
483 HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
484 ip++;
485 forwardH = LZ4_HASH_VALUE(ip);
486
487 /* Main Loop */
488 for (;;) {
489 int findMatchAttempts = (1U << skipStrength) + 3;
490 const BYTE *forwardIp = ip;
491 const BYTE *ref;
492 BYTE *token;
493
494 /* Find a match */
495 do {
496 U32 h = forwardH;
497 int step = findMatchAttempts++ >> skipStrength;
498 ip = forwardIp;
499 forwardIp = ip + step;
500
501 if (unlikely(forwardIp > mflimit)) {
502 goto _last_literals;
503 }
504
505 forwardH = LZ4_HASH_VALUE(forwardIp);
506 ref = base + HashTable[h];
507 HashTable[h] = ip - base;
508
509 } while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
510
511 /* Catch up */
512 while ((ip > anchor) && (ref > (BYTE *) source) &&
513 unlikely(ip[-1] == ref[-1])) {
514 ip--;
515 ref--;
516 }
517
518 /* Encode Literal length */
519 length = ip - anchor;
520 token = op++;
521
522 /* Check output limit */
523 if (unlikely(op + length + (2 + 1 + LASTLITERALS) +
524 (length >> 8) > oend))
525 return (0);
526
527 if (length >= (int)RUN_MASK) {
528 *token = (RUN_MASK << ML_BITS);
529 len = length - RUN_MASK;
530 for (; len > 254; len -= 255)
531 *op++ = 255;
532 *op++ = (BYTE)len;
533 } else
534 *token = (length << ML_BITS);
535
536 /* Copy Literals */
537 LZ4_BLINDCOPY(anchor, op, length);
538
539 _next_match:
540 /* Encode Offset */
541 LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
542
543 /* Start Counting */
544 ip += MINMATCH;
545 ref += MINMATCH; /* MinMatch verified */
546 anchor = ip;
547 while (likely(ip < matchlimit - (STEPSIZE - 1))) {
548 UARCH diff = AARCH(ref) ^ AARCH(ip);
549 if (!diff) {
550 ip += STEPSIZE;
551 ref += STEPSIZE;
552 continue;
553 }
554 ip += LZ4_NbCommonBytes(diff);
555 goto _endCount;
556 }
557 #if LZ4_ARCH64
558 if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
559 ip += 4;
560 ref += 4;
561 }
562 #endif
563 if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
564 ip += 2;
565 ref += 2;
566 }
567 if ((ip < matchlimit) && (*ref == *ip))
568 ip++;
569 _endCount:
570
571 /* Encode MatchLength */
572 len = (ip - anchor);
573 /* Check output limit */
574 if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend))
575 return (0);
576 if (len >= (int)ML_MASK) {
577 *token += ML_MASK;
578 len -= ML_MASK;
579 for (; len > 509; len -= 510) {
580 *op++ = 255;
581 *op++ = 255;
582 }
583 if (len > 254) {
584 len -= 255;
585 *op++ = 255;
586 }
587 *op++ = (BYTE)len;
588 } else
589 *token += len;
590
591 /* Test end of chunk */
592 if (ip > mflimit) {
593 anchor = ip;
594 break;
595 }
596 /* Fill table */
597 HashTable[LZ4_HASH_VALUE(ip - 2)] = ip - 2 - base;
598
599 /* Test next position */
600 ref = base + HashTable[LZ4_HASH_VALUE(ip)];
601 HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
602 if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) {
603 token = op++;
604 *token = 0;
605 goto _next_match;
606 }
607 /* Prepare next loop */
608 anchor = ip++;
609 forwardH = LZ4_HASH_VALUE(ip);
610 }
611
612 _last_literals:
613 /* Encode Last Literals */
614 {
615 int lastRun = iend - anchor;
616 if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
617 oend)
618 return (0);
619 if (lastRun >= (int)RUN_MASK) {
620 *op++ = (RUN_MASK << ML_BITS);
621 lastRun -= RUN_MASK;
622 for (; lastRun > 254; lastRun -= 255) {
623 *op++ = 255;
624 }
625 *op++ = (BYTE)lastRun;
626 } else
627 *op++ = (lastRun << ML_BITS);
628 (void) memcpy(op, anchor, iend - anchor);
629 op += iend - anchor;
630 }
631
632 /* End */
633 return (int)(((char *)op) - dest);
634 }
635
636
637
638 /* Note : this function is valid only if isize < LZ4_64KLIMIT */
639 #define LZ4_64KLIMIT ((1 << 16) + (MFLIMIT - 1))
640 #define HASHLOG64K (HASH_LOG + 1)
641 #define HASH64KTABLESIZE (1U << HASHLOG64K)
642 #define LZ4_HASH64K_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8) - \
643 HASHLOG64K))
644 #define LZ4_HASH64K_VALUE(p) LZ4_HASH64K_FUNCTION(A32(p))
645
646 /*ARGSUSED*/
647 static int
648 LZ4_compress64kCtx(void *ctx, const char *source, char *dest, int isize,
649 int osize)
650 {
651 struct refTables *srt = (struct refTables *)ctx;
652 U16 *HashTable = (U16 *) (srt->hashTable);
653
654 const BYTE *ip = (BYTE *) source;
655 const BYTE *anchor = ip;
656 const BYTE *const base = ip;
657 const BYTE *const iend = ip + isize;
658 const BYTE *const oend = (BYTE *) dest + osize;
659 const BYTE *const mflimit = iend - MFLIMIT;
660 #define matchlimit (iend - LASTLITERALS)
661
662 BYTE *op = (BYTE *) dest;
663
664 int len, length;
665 const int skipStrength = SKIPSTRENGTH;
666 U32 forwardH;
667
668 /* Init */
669 if (isize < MINLENGTH)
670 goto _last_literals;
671
672 /* First Byte */
673 ip++;
674 forwardH = LZ4_HASH64K_VALUE(ip);
675
676 /* Main Loop */
677 for (;;) {
678 int findMatchAttempts = (1U << skipStrength) + 3;
679 const BYTE *forwardIp = ip;
680 const BYTE *ref;
681 BYTE *token;
682
683 /* Find a match */
684 do {
685 U32 h = forwardH;
686 int step = findMatchAttempts++ >> skipStrength;
687 ip = forwardIp;
688 forwardIp = ip + step;
689
690 if (forwardIp > mflimit) {
691 goto _last_literals;
692 }
693
694 forwardH = LZ4_HASH64K_VALUE(forwardIp);
695 ref = base + HashTable[h];
696 HashTable[h] = ip - base;
697
698 } while (A32(ref) != A32(ip));
699
700 /* Catch up */
701 while ((ip > anchor) && (ref > (BYTE *) source) &&
702 (ip[-1] == ref[-1])) {
703 ip--;
704 ref--;
705 }
706
707 /* Encode Literal length */
708 length = ip - anchor;
709 token = op++;
710
711 /* Check output limit */
712 if (unlikely(op + length + (2 + 1 + LASTLITERALS) +
713 (length >> 8) > oend))
714 return (0);
715
716 if (length >= (int)RUN_MASK) {
717 *token = (RUN_MASK << ML_BITS);
718 len = length - RUN_MASK;
719 for (; len > 254; len -= 255)
720 *op++ = 255;
721 *op++ = (BYTE)len;
722 } else
723 *token = (length << ML_BITS);
724
725 /* Copy Literals */
726 LZ4_BLINDCOPY(anchor, op, length);
727
728 _next_match:
729 /* Encode Offset */
730 LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
731
732 /* Start Counting */
733 ip += MINMATCH;
734 ref += MINMATCH; /* MinMatch verified */
735 anchor = ip;
736 while (ip < matchlimit - (STEPSIZE - 1)) {
737 UARCH diff = AARCH(ref) ^ AARCH(ip);
738 if (!diff) {
739 ip += STEPSIZE;
740 ref += STEPSIZE;
741 continue;
742 }
743 ip += LZ4_NbCommonBytes(diff);
744 goto _endCount;
745 }
746 #if LZ4_ARCH64
747 if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
748 ip += 4;
749 ref += 4;
750 }
751 #endif
752 if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
753 ip += 2;
754 ref += 2;
755 }
756 if ((ip < matchlimit) && (*ref == *ip))
757 ip++;
758 _endCount:
759
760 /* Encode MatchLength */
761 len = (ip - anchor);
762 /* Check output limit */
763 if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend))
764 return (0);
765 if (len >= (int)ML_MASK) {
766 *token += ML_MASK;
767 len -= ML_MASK;
768 for (; len > 509; len -= 510) {
769 *op++ = 255;
770 *op++ = 255;
771 }
772 if (len > 254) {
773 len -= 255;
774 *op++ = 255;
775 }
776 *op++ = (BYTE)len;
777 } else
778 *token += len;
779
780 /* Test end of chunk */
781 if (ip > mflimit) {
782 anchor = ip;
783 break;
784 }
785 /* Fill table */
786 HashTable[LZ4_HASH64K_VALUE(ip - 2)] = ip - 2 - base;
787
788 /* Test next position */
789 ref = base + HashTable[LZ4_HASH64K_VALUE(ip)];
790 HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base;
791 if (A32(ref) == A32(ip)) {
792 token = op++;
793 *token = 0;
794 goto _next_match;
795 }
796 /* Prepare next loop */
797 anchor = ip++;
798 forwardH = LZ4_HASH64K_VALUE(ip);
799 }
800
801 _last_literals:
802 /* Encode Last Literals */
803 {
804 int lastRun = iend - anchor;
805 if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
806 oend)
807 return (0);
808 if (lastRun >= (int)RUN_MASK) {
809 *op++ = (RUN_MASK << ML_BITS);
810 lastRun -= RUN_MASK;
811 for (; lastRun > 254; lastRun -= 255)
812 *op++ = 255;
813 *op++ = (BYTE)lastRun;
814 } else
815 *op++ = (lastRun << ML_BITS);
816 (void) memcpy(op, anchor, iend - anchor);
817 op += iend - anchor;
818 }
819
820 /* End */
821 return (int)(((char *)op) - dest);
822 }
823
824 static int
825 real_LZ4_compress(const char *source, char *dest, int isize, int osize)
826 {
827 void *ctx;
828 int result;
829
830 ASSERT(lz4_cache != NULL);
831 ctx = kmem_cache_alloc(lz4_cache, KM_PUSHPAGE);
832
833 /*
834 * out of kernel memory, gently fall through - this will disable
835 * compression in zio_compress_data
836 */
837 if (ctx == NULL)
838 return (0);
839
840 memset(ctx, 0, sizeof (struct refTables));
841
842 if (isize < LZ4_64KLIMIT)
843 result = LZ4_compress64kCtx(ctx, source, dest, isize, osize);
844 else
845 result = LZ4_compressCtx(ctx, source, dest, isize, osize);
846
847 kmem_cache_free(lz4_cache, ctx);
848 return (result);
849 }
850
851 /* Decompression functions */
852
853 /*
854 * Note: The decoding functions real_LZ4_uncompress() and
855 * LZ4_uncompress_unknownOutputSize() are safe against "buffer overflow"
856 * attack type. They will never write nor read outside of the provided
857 * output buffers. LZ4_uncompress_unknownOutputSize() also insures that
858 * it will never read outside of the input buffer. A corrupted input
859 * will produce an error result, a negative int, indicating the position
860 * of the error within input stream.
861 */
862
863 static int
864 LZ4_uncompress_unknownOutputSize(const char *source, char *dest, int isize,
865 int maxOutputSize)
866 {
867 /* Local Variables */
868 const BYTE *restrict ip = (const BYTE *) source;
869 const BYTE *const iend = ip + isize;
870 const BYTE *ref;
871
872 BYTE *op = (BYTE *) dest;
873 BYTE *const oend = op + maxOutputSize;
874 BYTE *cpy;
875
876 size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
877 #if LZ4_ARCH64
878 size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3};
879 #endif
880
881 /* Main Loop */
882 while (ip < iend) {
883 unsigned token;
884 size_t length;
885
886 /* get runlength */
887 token = *ip++;
888 if ((length = (token >> ML_BITS)) == RUN_MASK) {
889 int s = 255;
890 while ((ip < iend) && (s == 255)) {
891 s = *ip++;
892 length += s;
893 }
894 }
895 /* copy literals */
896 cpy = op + length;
897 if ((cpy > oend - COPYLENGTH) ||
898 (ip + length > iend - COPYLENGTH)) {
899 if (cpy > oend)
900 /* Error: writes beyond output buffer */
901 goto _output_error;
902 if (ip + length != iend)
903 /*
904 * Error: LZ4 format requires to consume all
905 * input at this stage
906 */
907 goto _output_error;
908 (void) memcpy(op, ip, length);
909 op += length;
910 /* Necessarily EOF, due to parsing restrictions */
911 break;
912 }
913 LZ4_WILDCOPY(ip, op, cpy);
914 ip -= (op - cpy);
915 op = cpy;
916
917 /* get offset */
918 LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
919 ip += 2;
920 if (ref < (BYTE * const) dest)
921 /*
922 * Error: offset creates reference outside of
923 * destination buffer
924 */
925 goto _output_error;
926
927 /* get matchlength */
928 if ((length = (token & ML_MASK)) == ML_MASK) {
929 while (ip < iend) {
930 int s = *ip++;
931 length += s;
932 if (s == 255)
933 continue;
934 break;
935 }
936 }
937 /* copy repeated sequence */
938 if (unlikely(op - ref < STEPSIZE)) {
939 #if LZ4_ARCH64
940 size_t dec64 = dec64table[op-ref];
941 #else
942 const int dec64 = 0;
943 #endif
944 op[0] = ref[0];
945 op[1] = ref[1];
946 op[2] = ref[2];
947 op[3] = ref[3];
948 op += 4;
949 ref += 4;
950 ref -= dec32table[op-ref];
951 A32(op) = A32(ref);
952 op += STEPSIZE - 4;
953 ref -= dec64;
954 } else {
955 LZ4_COPYSTEP(ref, op);
956 }
957 cpy = op + length - (STEPSIZE - 4);
958 if (cpy > oend - COPYLENGTH) {
959 if (cpy > oend)
960 /*
961 * Error: request to write outside of
962 * destination buffer
963 */
964 goto _output_error;
965 LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
966 while (op < cpy)
967 *op++ = *ref++;
968 op = cpy;
969 if (op == oend)
970 /*
971 * Check EOF (should never happen, since
972 * last 5 bytes are supposed to be literals)
973 */
974 goto _output_error;
975 continue;
976 }
977 LZ4_SECURECOPY(ref, op, cpy);
978 op = cpy; /* correction */
979 }
980
981 /* end of decoding */
982 return (int)(((char *)op) - dest);
983
984 /* write overflow error detected */
985 _output_error:
986 return (int)(-(((char *)ip) - source));
987 }
988
989 void
990 lz4_init(void)
991 {
992 lz4_cache = kmem_cache_create("lz4_cache",
993 sizeof (struct refTables), 0, NULL, NULL, NULL, NULL, NULL, 0);
994 }
995
996 void
997 lz4_fini(void)
998 {
999 if (lz4_cache) {
1000 kmem_cache_destroy(lz4_cache);
1001 lz4_cache = NULL;
1002 }
1003 }
1004