]> git.proxmox.com Git - mirror_zfs.git/blob - module/zfs/lz4_zfs.c
Remove db_state DB_NOFILL checks from syncing context
[mirror_zfs.git] / module / zfs / lz4_zfs.c
1 /*
2 * LZ4 - Fast LZ compression algorithm
3 * Header File
4 * Copyright (C) 2011-2013, Yann Collet.
5 * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are
9 * met:
10 *
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following disclaimer
15 * in the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *
30 * You can contact the author at :
31 * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
32 * - LZ4 source repository : http://code.google.com/p/lz4/
33 */
34
35 /*
36 * N.B. - This file seems to be based on LZ4 r85, dated Dec 10, 2012
37 */
38
39 #include <sys/zfs_context.h>
40 #include <sys/zio_compress.h>
41
42 static int real_LZ4_compress(const char *source, char *dest, int isize,
43 int osize);
44 static int LZ4_compressCtx(void *ctx, const char *source, char *dest,
45 int isize, int osize);
46 static int LZ4_compress64kCtx(void *ctx, const char *source, char *dest,
47 int isize, int osize);
48
49 /* See lz4.c */
50 int LZ4_uncompress_unknownOutputSize(const char *source, char *dest,
51 int isize, int maxOutputSize);
52
53 static kmem_cache_t *lz4_cache;
54
55 size_t
56 lz4_compress_zfs(void *s_start, void *d_start, size_t s_len,
57 size_t d_len, int n)
58 {
59 (void) n;
60 uint32_t bufsiz;
61 char *dest = d_start;
62
63 ASSERT(d_len >= sizeof (bufsiz));
64
65 bufsiz = real_LZ4_compress(s_start, &dest[sizeof (bufsiz)], s_len,
66 d_len - sizeof (bufsiz));
67
68 /* Signal an error if the compression routine returned zero. */
69 if (bufsiz == 0)
70 return (s_len);
71
72 /*
73 * The exact compressed size is needed by the decompression routine,
74 * so it is stored at the start of the buffer. Note that this may be
75 * less than the compressed block size, which is rounded up to a
76 * multiple of 1<<ashift.
77 */
78 *(uint32_t *)dest = BE_32(bufsiz);
79
80 return (bufsiz + sizeof (bufsiz));
81 }
82
83 int
84 lz4_decompress_zfs(void *s_start, void *d_start, size_t s_len,
85 size_t d_len, int n)
86 {
87 (void) n;
88 const char *src = s_start;
89 uint32_t bufsiz = BE_IN32(src);
90
91 /* invalid compressed buffer size encoded at start */
92 if (bufsiz + sizeof (bufsiz) > s_len)
93 return (1);
94
95 /*
96 * Returns 0 on success (decompression function returned non-negative)
97 * and non-zero on failure (decompression function returned negative).
98 */
99 return (LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)],
100 d_start, bufsiz, d_len) < 0);
101 }
102
103 /*
104 * LZ4 API Description:
105 *
106 * Simple Functions:
107 * real_LZ4_compress() :
108 * isize : is the input size. Max supported value is ~1.9GB
109 * return : the number of bytes written in buffer dest
110 * or 0 if the compression fails (if LZ4_COMPRESSMIN is set).
111 * note : destination buffer must be already allocated.
112 * destination buffer must be sized to handle worst cases
113 * situations (input data not compressible) worst case size
114 * evaluation is provided by function LZ4_compressBound().
115 *
116 * real_LZ4_uncompress() :
117 * osize : is the output size, therefore the original size
118 * return : the number of bytes read in the source buffer.
119 * If the source stream is malformed, the function will stop
120 * decoding and return a negative result, indicating the byte
121 * position of the faulty instruction. This function never
122 * writes beyond dest + osize, and is therefore protected
123 * against malicious data packets.
124 * note : destination buffer must be already allocated
125 * note : real_LZ4_uncompress() is not used in ZFS so its code
126 * is not present here.
127 *
128 * Advanced Functions
129 *
130 * LZ4_compressBound() :
131 * Provides the maximum size that LZ4 may output in a "worst case"
132 * scenario (input data not compressible) primarily useful for memory
133 * allocation of output buffer.
134 *
135 * isize : is the input size. Max supported value is ~1.9GB
136 * return : maximum output size in a "worst case" scenario
137 * note : this function is limited by "int" range (2^31-1)
138 *
139 * LZ4_uncompress_unknownOutputSize() :
140 * isize : is the input size, therefore the compressed size
141 * maxOutputSize : is the size of the destination buffer (which must be
142 * already allocated)
143 * return : the number of bytes decoded in the destination buffer
144 * (necessarily <= maxOutputSize). If the source stream is
145 * malformed, the function will stop decoding and return a
146 * negative result, indicating the byte position of the faulty
147 * instruction. This function never writes beyond dest +
148 * maxOutputSize, and is therefore protected against malicious
149 * data packets.
150 * note : Destination buffer must be already allocated.
151 * This version is slightly slower than real_LZ4_uncompress()
152 *
153 * LZ4_compressCtx() :
154 * This function explicitly handles the CTX memory structure.
155 *
156 * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
157 * by the caller (either on the stack or using kmem_cache_alloc). Passing
158 * NULL isn't valid.
159 *
160 * LZ4_compress64kCtx() :
161 * Same as LZ4_compressCtx(), but specific to small inputs (<64KB).
162 * isize *Must* be <64KB, otherwise the output will be corrupted.
163 *
164 * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
165 * by the caller (either on the stack or using kmem_cache_alloc). Passing
166 * NULL isn't valid.
167 */
168
169 /*
170 * Tuning parameters
171 */
172
173 /*
174 * COMPRESSIONLEVEL: Increasing this value improves compression ratio
175 * Lowering this value reduces memory usage. Reduced memory usage
176 * typically improves speed, due to cache effect (ex: L1 32KB for Intel,
177 * L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes
178 * (examples : 12 -> 16KB ; 17 -> 512KB)
179 */
180 #define COMPRESSIONLEVEL 12
181
182 /*
183 * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the
184 * algorithm skip faster data segments considered "incompressible".
185 * This may decrease compression ratio dramatically, but will be
186 * faster on incompressible data. Increasing this value will make
187 * the algorithm search more before declaring a segment "incompressible".
188 * This could improve compression a bit, but will be slower on
189 * incompressible data. The default value (6) is recommended.
190 */
191 #define NOTCOMPRESSIBLE_CONFIRMATION 6
192
193 /*
194 * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE: This will provide a boost to
195 * performance for big endian cpu, but the resulting compressed stream
196 * will be incompatible with little-endian CPU. You can set this option
197 * to 1 in situations where data will stay within closed environment.
198 * This option is useless on Little_Endian CPU (such as x86).
199 */
200 /* #define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */
201
202 /*
203 * CPU Feature Detection
204 */
205
206 /* 32 or 64 bits ? */
207 #if defined(_LP64)
208 #define LZ4_ARCH64 1
209 #else
210 #define LZ4_ARCH64 0
211 #endif
212
213 /*
214 * Little Endian or Big Endian?
215 * Note: overwrite the below #define if you know your architecture endianness.
216 */
217 #if defined(_ZFS_BIG_ENDIAN)
218 #define LZ4_BIG_ENDIAN 1
219 #else
220 /*
221 * Little Endian assumed. PDP Endian and other very rare endian format
222 * are unsupported.
223 */
224 #undef LZ4_BIG_ENDIAN
225 #endif
226
227 /*
228 * Unaligned memory access is automatically enabled for "common" CPU,
229 * such as x86. For others CPU, the compiler will be more cautious, and
230 * insert extra code to ensure aligned access is respected. If you know
231 * your target CPU supports unaligned memory access, you may want to
232 * force this option manually to improve performance
233 */
234 #if defined(__ARM_FEATURE_UNALIGNED)
235 #define LZ4_FORCE_UNALIGNED_ACCESS 1
236 #endif
237
238 /*
239 * Illumos : we can't use GCC's __builtin_ctz family of builtins in the
240 * kernel
241 * Linux : we can use GCC's __builtin_ctz family of builtins in the
242 * kernel
243 */
244 #undef LZ4_FORCE_SW_BITCOUNT
245 #if defined(__sparc)
246 #define LZ4_FORCE_SW_BITCOUNT
247 #endif
248
249 /*
250 * Compiler Options
251 */
252 /* Disable restrict */
253 #define restrict
254
255 /*
256 * Linux : GCC_VERSION is defined as of 3.9-rc1, so undefine it.
257 * torvalds/linux@3f3f8d2f48acfd8ed3b8e6b7377935da57b27b16
258 */
259 #ifdef GCC_VERSION
260 #undef GCC_VERSION
261 #endif
262
263 #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
264
265 #if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__)
266 #define expect(expr, value) (__builtin_expect((expr), (value)))
267 #else
268 #define expect(expr, value) (expr)
269 #endif
270
271 #ifndef likely
272 #define likely(expr) expect((expr) != 0, 1)
273 #endif
274
275 #ifndef unlikely
276 #define unlikely(expr) expect((expr) != 0, 0)
277 #endif
278
279 #define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | \
280 (((x) & 0xffu) << 8)))
281
282 /* Basic types */
283 #define BYTE uint8_t
284 #define U16 uint16_t
285 #define U32 uint32_t
286 #define S32 int32_t
287 #define U64 uint64_t
288
289 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
290 #pragma pack(1)
291 #endif
292
293 typedef struct _U16_S {
294 U16 v;
295 } U16_S;
296 typedef struct _U32_S {
297 U32 v;
298 } U32_S;
299 typedef struct _U64_S {
300 U64 v;
301 } U64_S;
302
303 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
304 #pragma pack()
305 #endif
306
307 #define A64(x) (((U64_S *)(x))->v)
308 #define A32(x) (((U32_S *)(x))->v)
309 #define A16(x) (((U16_S *)(x))->v)
310
311 /*
312 * Constants
313 */
314 #define MINMATCH 4
315
316 #define HASH_LOG COMPRESSIONLEVEL
317 #define HASHTABLESIZE (1 << HASH_LOG)
318 #define HASH_MASK (HASHTABLESIZE - 1)
319
320 #define SKIPSTRENGTH (NOTCOMPRESSIBLE_CONFIRMATION > 2 ? \
321 NOTCOMPRESSIBLE_CONFIRMATION : 2)
322
323 #define COPYLENGTH 8
324 #define LASTLITERALS 5
325 #define MFLIMIT (COPYLENGTH + MINMATCH)
326 #define MINLENGTH (MFLIMIT + 1)
327
328 #define MAXD_LOG 16
329 #define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
330
331 #define ML_BITS 4
332 #define ML_MASK ((1U<<ML_BITS)-1)
333 #define RUN_BITS (8-ML_BITS)
334 #define RUN_MASK ((1U<<RUN_BITS)-1)
335
336
337 /*
338 * Architecture-specific macros
339 */
340 #if LZ4_ARCH64
341 #define STEPSIZE 8
342 #define UARCH U64
343 #define AARCH A64
344 #define LZ4_COPYSTEP(s, d) A64(d) = A64(s); d += 8; s += 8;
345 #define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d)
346 #define LZ4_SECURECOPY(s, d, e) if (d < e) LZ4_WILDCOPY(s, d, e)
347 #define HTYPE U32
348 #define INITBASE(base) const BYTE* const base = ip
349 #else /* !LZ4_ARCH64 */
350 #define STEPSIZE 4
351 #define UARCH U32
352 #define AARCH A32
353 #define LZ4_COPYSTEP(s, d) A32(d) = A32(s); d += 4; s += 4;
354 #define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d); LZ4_COPYSTEP(s, d);
355 #define LZ4_SECURECOPY LZ4_WILDCOPY
356 #define HTYPE const BYTE *
357 #define INITBASE(base) const int base = 0
358 #endif /* !LZ4_ARCH64 */
359
360 #if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE))
361 #define LZ4_READ_LITTLEENDIAN_16(d, s, p) \
362 { U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
363 #define LZ4_WRITE_LITTLEENDIAN_16(p, i) \
364 { U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p += 2; }
365 #else
366 #define LZ4_READ_LITTLEENDIAN_16(d, s, p) { d = (s) - A16(p); }
367 #define LZ4_WRITE_LITTLEENDIAN_16(p, v) { A16(p) = v; p += 2; }
368 #endif
369
370
371 /* Local structures */
372 struct refTables {
373 HTYPE hashTable[HASHTABLESIZE];
374 };
375
376
377 /* Macros */
378 #define LZ4_HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH * 8) - \
379 HASH_LOG))
380 #define LZ4_HASH_VALUE(p) LZ4_HASH_FUNCTION(A32(p))
381 #define LZ4_WILDCOPY(s, d, e) do { LZ4_COPYPACKET(s, d) } while (d < e);
382 #define LZ4_BLINDCOPY(s, d, l) { BYTE* e = (d) + l; LZ4_WILDCOPY(s, d, e); \
383 d = e; }
384
385
386 /* Private functions */
387 #if LZ4_ARCH64
388
389 static inline int
390 LZ4_NbCommonBytes(register U64 val)
391 {
392 #if defined(LZ4_BIG_ENDIAN)
393 #if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \
394 !defined(LZ4_FORCE_SW_BITCOUNT)
395 return (__builtin_clzll(val) >> 3);
396 #else
397 int r;
398 if (!(val >> 32)) {
399 r = 4;
400 } else {
401 r = 0;
402 val >>= 32;
403 }
404 if (!(val >> 16)) {
405 r += 2;
406 val >>= 8;
407 } else {
408 val >>= 24;
409 }
410 r += (!val);
411 return (r);
412 #endif
413 #else
414 #if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \
415 !defined(LZ4_FORCE_SW_BITCOUNT)
416 return (__builtin_ctzll(val) >> 3);
417 #else
418 static const int DeBruijnBytePos[64] =
419 { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5,
420 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5,
421 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4,
422 4, 5, 7, 2, 6, 5, 7, 6, 7, 7
423 };
424 return DeBruijnBytePos[((U64) ((val & -val) * 0x0218A392CDABBD3F)) >>
425 58];
426 #endif
427 #endif
428 }
429
430 #else
431
432 static inline int
433 LZ4_NbCommonBytes(register U32 val)
434 {
435 #if defined(LZ4_BIG_ENDIAN)
436 #if ((defined(__GNUC__) && (GCC_VERSION >= 304)) || defined(__clang__)) && \
437 !defined(LZ4_FORCE_SW_BITCOUNT)
438 return (__builtin_clz(val) >> 3);
439 #else
440 int r;
441 if (!(val >> 16)) {
442 r = 2;
443 val >>= 8;
444 } else {
445 r = 0;
446 val >>= 24;
447 }
448 r += (!val);
449 return (r);
450 #endif
451 #else
452 #if defined(__GNUC__) && (GCC_VERSION >= 304) && \
453 !defined(LZ4_FORCE_SW_BITCOUNT)
454 return (__builtin_ctz(val) >> 3);
455 #else
456 static const int DeBruijnBytePos[32] = {
457 0, 0, 3, 0, 3, 1, 3, 0,
458 3, 2, 2, 1, 3, 2, 0, 1,
459 3, 3, 1, 2, 2, 2, 2, 0,
460 3, 1, 2, 0, 1, 0, 1, 1
461 };
462 return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >>
463 27];
464 #endif
465 #endif
466 }
467
468 #endif
469
470 /* Compression functions */
471
472 static int
473 LZ4_compressCtx(void *ctx, const char *source, char *dest, int isize,
474 int osize)
475 {
476 struct refTables *srt = (struct refTables *)ctx;
477 HTYPE *HashTable = (HTYPE *) (srt->hashTable);
478
479 const BYTE *ip = (BYTE *) source;
480 INITBASE(base);
481 const BYTE *anchor = ip;
482 const BYTE *const iend = ip + isize;
483 const BYTE *const oend = (BYTE *) dest + osize;
484 const BYTE *const mflimit = iend - MFLIMIT;
485 #define matchlimit (iend - LASTLITERALS)
486
487 BYTE *op = (BYTE *) dest;
488
489 int len, length;
490 const int skipStrength = SKIPSTRENGTH;
491 U32 forwardH;
492
493
494 /* Init */
495 if (isize < MINLENGTH)
496 goto _last_literals;
497
498 /* First Byte */
499 HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
500 ip++;
501 forwardH = LZ4_HASH_VALUE(ip);
502
503 /* Main Loop */
504 for (;;) {
505 int findMatchAttempts = (1U << skipStrength) + 3;
506 const BYTE *forwardIp = ip;
507 const BYTE *ref;
508 BYTE *token;
509
510 /* Find a match */
511 do {
512 U32 h = forwardH;
513 int step = findMatchAttempts++ >> skipStrength;
514 ip = forwardIp;
515 forwardIp = ip + step;
516
517 if (unlikely(forwardIp > mflimit)) {
518 goto _last_literals;
519 }
520
521 forwardH = LZ4_HASH_VALUE(forwardIp);
522 ref = base + HashTable[h];
523 HashTable[h] = ip - base;
524
525 } while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
526
527 /* Catch up */
528 while ((ip > anchor) && (ref > (BYTE *) source) &&
529 unlikely(ip[-1] == ref[-1])) {
530 ip--;
531 ref--;
532 }
533
534 /* Encode Literal length */
535 length = ip - anchor;
536 token = op++;
537
538 /* Check output limit */
539 if (unlikely(op + length + (2 + 1 + LASTLITERALS) +
540 (length >> 8) > oend))
541 return (0);
542
543 if (length >= (int)RUN_MASK) {
544 *token = (RUN_MASK << ML_BITS);
545 len = length - RUN_MASK;
546 for (; len > 254; len -= 255)
547 *op++ = 255;
548 *op++ = (BYTE)len;
549 } else
550 *token = (length << ML_BITS);
551
552 /* Copy Literals */
553 LZ4_BLINDCOPY(anchor, op, length);
554
555 _next_match:
556 /* Encode Offset */
557 LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
558
559 /* Start Counting */
560 ip += MINMATCH;
561 ref += MINMATCH; /* MinMatch verified */
562 anchor = ip;
563 while (likely(ip < matchlimit - (STEPSIZE - 1))) {
564 UARCH diff = AARCH(ref) ^ AARCH(ip);
565 if (!diff) {
566 ip += STEPSIZE;
567 ref += STEPSIZE;
568 continue;
569 }
570 ip += LZ4_NbCommonBytes(diff);
571 goto _endCount;
572 }
573 #if LZ4_ARCH64
574 if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
575 ip += 4;
576 ref += 4;
577 }
578 #endif
579 if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
580 ip += 2;
581 ref += 2;
582 }
583 if ((ip < matchlimit) && (*ref == *ip))
584 ip++;
585 _endCount:
586
587 /* Encode MatchLength */
588 len = (ip - anchor);
589 /* Check output limit */
590 if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend))
591 return (0);
592 if (len >= (int)ML_MASK) {
593 *token += ML_MASK;
594 len -= ML_MASK;
595 for (; len > 509; len -= 510) {
596 *op++ = 255;
597 *op++ = 255;
598 }
599 if (len > 254) {
600 len -= 255;
601 *op++ = 255;
602 }
603 *op++ = (BYTE)len;
604 } else
605 *token += len;
606
607 /* Test end of chunk */
608 if (ip > mflimit) {
609 anchor = ip;
610 break;
611 }
612 /* Fill table */
613 HashTable[LZ4_HASH_VALUE(ip - 2)] = ip - 2 - base;
614
615 /* Test next position */
616 ref = base + HashTable[LZ4_HASH_VALUE(ip)];
617 HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
618 if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) {
619 token = op++;
620 *token = 0;
621 goto _next_match;
622 }
623 /* Prepare next loop */
624 anchor = ip++;
625 forwardH = LZ4_HASH_VALUE(ip);
626 }
627
628 _last_literals:
629 /* Encode Last Literals */
630 {
631 int lastRun = iend - anchor;
632 if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
633 oend)
634 return (0);
635 if (lastRun >= (int)RUN_MASK) {
636 *op++ = (RUN_MASK << ML_BITS);
637 lastRun -= RUN_MASK;
638 for (; lastRun > 254; lastRun -= 255) {
639 *op++ = 255;
640 }
641 *op++ = (BYTE)lastRun;
642 } else
643 *op++ = (lastRun << ML_BITS);
644 (void) memcpy(op, anchor, iend - anchor);
645 op += iend - anchor;
646 }
647
648 /* End */
649 return (int)(((char *)op) - dest);
650 }
651
652
653
654 /* Note : this function is valid only if isize < LZ4_64KLIMIT */
655 #define LZ4_64KLIMIT ((1 << 16) + (MFLIMIT - 1))
656 #define HASHLOG64K (HASH_LOG + 1)
657 #define HASH64KTABLESIZE (1U << HASHLOG64K)
658 #define LZ4_HASH64K_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8) - \
659 HASHLOG64K))
660 #define LZ4_HASH64K_VALUE(p) LZ4_HASH64K_FUNCTION(A32(p))
661
662 static int
663 LZ4_compress64kCtx(void *ctx, const char *source, char *dest, int isize,
664 int osize)
665 {
666 struct refTables *srt = (struct refTables *)ctx;
667 U16 *HashTable = (U16 *) (srt->hashTable);
668
669 const BYTE *ip = (BYTE *) source;
670 const BYTE *anchor = ip;
671 const BYTE *const base = ip;
672 const BYTE *const iend = ip + isize;
673 const BYTE *const oend = (BYTE *) dest + osize;
674 const BYTE *const mflimit = iend - MFLIMIT;
675 #define matchlimit (iend - LASTLITERALS)
676
677 BYTE *op = (BYTE *) dest;
678
679 int len, length;
680 const int skipStrength = SKIPSTRENGTH;
681 U32 forwardH;
682
683 /* Init */
684 if (isize < MINLENGTH)
685 goto _last_literals;
686
687 /* First Byte */
688 ip++;
689 forwardH = LZ4_HASH64K_VALUE(ip);
690
691 /* Main Loop */
692 for (;;) {
693 int findMatchAttempts = (1U << skipStrength) + 3;
694 const BYTE *forwardIp = ip;
695 const BYTE *ref;
696 BYTE *token;
697
698 /* Find a match */
699 do {
700 U32 h = forwardH;
701 int step = findMatchAttempts++ >> skipStrength;
702 ip = forwardIp;
703 forwardIp = ip + step;
704
705 if (forwardIp > mflimit) {
706 goto _last_literals;
707 }
708
709 forwardH = LZ4_HASH64K_VALUE(forwardIp);
710 ref = base + HashTable[h];
711 HashTable[h] = ip - base;
712
713 } while (A32(ref) != A32(ip));
714
715 /* Catch up */
716 while ((ip > anchor) && (ref > (BYTE *) source) &&
717 (ip[-1] == ref[-1])) {
718 ip--;
719 ref--;
720 }
721
722 /* Encode Literal length */
723 length = ip - anchor;
724 token = op++;
725
726 /* Check output limit */
727 if (unlikely(op + length + (2 + 1 + LASTLITERALS) +
728 (length >> 8) > oend))
729 return (0);
730
731 if (length >= (int)RUN_MASK) {
732 *token = (RUN_MASK << ML_BITS);
733 len = length - RUN_MASK;
734 for (; len > 254; len -= 255)
735 *op++ = 255;
736 *op++ = (BYTE)len;
737 } else
738 *token = (length << ML_BITS);
739
740 /* Copy Literals */
741 LZ4_BLINDCOPY(anchor, op, length);
742
743 _next_match:
744 /* Encode Offset */
745 LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
746
747 /* Start Counting */
748 ip += MINMATCH;
749 ref += MINMATCH; /* MinMatch verified */
750 anchor = ip;
751 while (ip < matchlimit - (STEPSIZE - 1)) {
752 UARCH diff = AARCH(ref) ^ AARCH(ip);
753 if (!diff) {
754 ip += STEPSIZE;
755 ref += STEPSIZE;
756 continue;
757 }
758 ip += LZ4_NbCommonBytes(diff);
759 goto _endCount;
760 }
761 #if LZ4_ARCH64
762 if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
763 ip += 4;
764 ref += 4;
765 }
766 #endif
767 if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
768 ip += 2;
769 ref += 2;
770 }
771 if ((ip < matchlimit) && (*ref == *ip))
772 ip++;
773 _endCount:
774
775 /* Encode MatchLength */
776 len = (ip - anchor);
777 /* Check output limit */
778 if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend))
779 return (0);
780 if (len >= (int)ML_MASK) {
781 *token += ML_MASK;
782 len -= ML_MASK;
783 for (; len > 509; len -= 510) {
784 *op++ = 255;
785 *op++ = 255;
786 }
787 if (len > 254) {
788 len -= 255;
789 *op++ = 255;
790 }
791 *op++ = (BYTE)len;
792 } else
793 *token += len;
794
795 /* Test end of chunk */
796 if (ip > mflimit) {
797 anchor = ip;
798 break;
799 }
800 /* Fill table */
801 HashTable[LZ4_HASH64K_VALUE(ip - 2)] = ip - 2 - base;
802
803 /* Test next position */
804 ref = base + HashTable[LZ4_HASH64K_VALUE(ip)];
805 HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base;
806 if (A32(ref) == A32(ip)) {
807 token = op++;
808 *token = 0;
809 goto _next_match;
810 }
811 /* Prepare next loop */
812 anchor = ip++;
813 forwardH = LZ4_HASH64K_VALUE(ip);
814 }
815
816 _last_literals:
817 /* Encode Last Literals */
818 {
819 int lastRun = iend - anchor;
820 if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
821 oend)
822 return (0);
823 if (lastRun >= (int)RUN_MASK) {
824 *op++ = (RUN_MASK << ML_BITS);
825 lastRun -= RUN_MASK;
826 for (; lastRun > 254; lastRun -= 255)
827 *op++ = 255;
828 *op++ = (BYTE)lastRun;
829 } else
830 *op++ = (lastRun << ML_BITS);
831 (void) memcpy(op, anchor, iend - anchor);
832 op += iend - anchor;
833 }
834
835 /* End */
836 return (int)(((char *)op) - dest);
837 }
838
839 static int
840 real_LZ4_compress(const char *source, char *dest, int isize, int osize)
841 {
842 void *ctx;
843 int result;
844
845 ASSERT(lz4_cache != NULL);
846 ctx = kmem_cache_alloc(lz4_cache, KM_SLEEP);
847
848 /*
849 * out of kernel memory, gently fall through - this will disable
850 * compression in zio_compress_data
851 */
852 if (ctx == NULL)
853 return (0);
854
855 memset(ctx, 0, sizeof (struct refTables));
856
857 if (isize < LZ4_64KLIMIT)
858 result = LZ4_compress64kCtx(ctx, source, dest, isize, osize);
859 else
860 result = LZ4_compressCtx(ctx, source, dest, isize, osize);
861
862 kmem_cache_free(lz4_cache, ctx);
863 return (result);
864 }
865
866 void
867 lz4_init(void)
868 {
869 lz4_cache = kmem_cache_create("lz4_cache",
870 sizeof (struct refTables), 0, NULL, NULL, NULL, NULL, NULL, 0);
871 }
872
873 void
874 lz4_fini(void)
875 {
876 if (lz4_cache) {
877 kmem_cache_destroy(lz4_cache);
878 lz4_cache = NULL;
879 }
880 }