]>
Commit | Line | Data |
---|---|---|
34dc7c2f BB |
1 | /* |
2 | * CDDL HEADER START | |
3 | * | |
4 | * The contents of this file are subject to the terms of the | |
5 | * Common Development and Distribution License, Version 1.0 only | |
6 | * (the "License"). You may not use this file except in compliance | |
7 | * with the License. | |
8 | * | |
9 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
10 | * or http://www.opensolaris.org/os/licensing. | |
11 | * See the License for the specific language governing permissions | |
12 | * and limitations under the License. | |
13 | * | |
14 | * When distributing Covered Code, include this CDDL HEADER in each | |
15 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
16 | * If applicable, add the following below this CDDL HEADER, with the | |
17 | * fields enclosed by brackets "[]" replaced with your own identifying | |
18 | * information: Portions Copyright [yyyy] [name of copyright owner] | |
19 | * | |
20 | * CDDL HEADER END | |
21 | */ | |
22 | /* | |
23 | * Copyright (c) 1998 by Sun Microsystems, Inc. | |
24 | * All rights reserved. | |
25 | */ | |
26 | ||
27 | ||
28 | ||
29 | /* | |
30 | * NOTE: this file is compiled into the kernel, cprboot, and savecore. | |
31 | * Therefore it must compile in kernel, boot, and userland source context; | |
32 | * so if you ever change this code, avoid references to external symbols. | |
33 | * | |
34 | * This compression algorithm is a derivative of LZRW1, which I'll call | |
35 | * LZJB in the classic LZ* spirit. All LZ* (Lempel-Ziv) algorithms are | |
36 | * based on the same basic principle: when a "phrase" (sequences of bytes) | |
37 | * is repeated in a data stream, we can save space by storing a reference to | |
38 | * the previous instance of that phrase (a "copy item") rather than storing | |
39 | * the phrase itself (a "literal item"). The compressor remembers phrases | |
40 | * in a simple hash table (the "Lempel history") that maps three-character | |
41 | * sequences (the minimum match) to the addresses where they were last seen. | |
42 | * | |
43 | * A copy item must encode both the length and the location of the matching | |
44 | * phrase so that decompress() can reconstruct the original data stream. | |
45 | * For example, here's how we'd encode "yadda yadda yadda, blah blah blah" | |
46 | * (with "_" replacing spaces for readability): | |
47 | * | |
48 | * Original: | |
49 | * | |
50 | * y a d d a _ y a d d a _ y a d d a , _ b l a h _ b l a h _ b l a h | |
51 | * | |
52 | * Compressed: | |
53 | * | |
54 | * y a d d a _ 6 11 , _ b l a h 5 10 | |
55 | * | |
56 | * In the compressed output, the "6 11" simply means "to get the original | |
57 | * data, execute memmove(ptr, ptr - 6, 11)". Note that in this example, | |
58 | * the match at "6 11" actually extends beyond the current location and | |
59 | * overlaps it. That's OK; like memmove(), decompress() handles overlap. | |
60 | * | |
61 | * There's still one more thing decompress() needs to know, which is how to | |
62 | * distinguish literal items from copy items. We encode this information | |
63 | * in an 8-bit bitmap that precedes each 8 items of output; if the Nth bit | |
64 | * is set, then the Nth item is a copy item. Thus the full encoding for | |
65 | * the example above would be: | |
66 | * | |
67 | * 0x40 y a d d a _ 6 11 , 0x20 _ b l a h 5 10 | |
68 | * | |
69 | * Finally, the "6 11" isn't really encoded as the two byte values 6 and 11 | |
70 | * in the output stream because, empirically, we get better compression by | |
71 | * dedicating more bits to offset, fewer to match length. LZJB uses 6 bits | |
72 | * to encode the match length, 10 bits to encode the offset. Since copy-item | |
73 | * encoding consumes 2 bytes, we don't generate copy items unless the match | |
74 | * length is at least 3; therefore, we can store (length - 3) in the 6-bit | |
75 | * match length field, which extends the maximum match from 63 to 66 bytes. | |
76 | * Thus the 2-byte encoding for a copy item is as follows: | |
77 | * | |
78 | * byte[0] = ((length - 3) << 2) | (offset >> 8); | |
79 | * byte[1] = (uint8_t)offset; | |
80 | * | |
81 | * In our example above, an offset of 6 with length 11 would be encoded as: | |
82 | * | |
83 | * byte[0] = ((11 - 3) << 2) | (6 >> 8) = 0x20 | |
84 | * byte[1] = (uint8_t)6 = 0x6 | |
85 | * | |
86 | * Similarly, an offset of 5 with length 10 would be encoded as: | |
87 | * | |
88 | * byte[0] = ((10 - 3) << 2) | (5 >> 8) = 0x1c | |
89 | * byte[1] = (uint8_t)5 = 0x5 | |
90 | * | |
91 | * Putting it all together, the actual LZJB output for our example is: | |
92 | * | |
93 | * 0x40 y a d d a _ 0x2006 , 0x20 _ b l a h 0x1c05 | |
94 | * | |
95 | * The main differences between LZRW1 and LZJB are as follows: | |
96 | * | |
97 | * (1) LZRW1 is sloppy about buffer overruns. LZJB never reads past the | |
98 | * end of its input, and never writes past the end of its output. | |
99 | * | |
100 | * (2) LZJB allows a maximum match length of 66 (vs. 18 for LZRW1), with | |
101 | * the trade-off being a shorter look-behind (1K vs. 4K for LZRW1). | |
102 | * | |
103 | * (3) LZJB records only the low-order 16 bits of pointers in the Lempel | |
104 | * history (which is all we need since the maximum look-behind is 1K), | |
105 | * and uses only 256 hash entries (vs. 4096 for LZRW1). This makes | |
106 | * the compression hash small enough to allocate on the stack, which | |
107 | * solves two problems: (1) it saves 64K of kernel/cprboot memory, | |
108 | * and (2) it makes the code MT-safe without any locking, since we | |
109 | * don't have multiple threads sharing a common hash table. | |
110 | * | |
111 | * (4) LZJB is faster at both compression and decompression, has a | |
112 | * better compression ratio, and is somewhat simpler than LZRW1. | |
113 | * | |
114 | * Finally, note that LZJB is non-deterministic: given the same input, | |
115 | * two calls to compress() may produce different output. This is a | |
116 | * general characteristic of most Lempel-Ziv derivatives because there's | |
117 | * no need to initialize the Lempel history; not doing so saves time. | |
118 | */ | |
119 | ||
120 | #include <sys/types.h> | |
121 | ||
122 | #define MATCH_BITS 6 | |
123 | #define MATCH_MIN 3 | |
124 | #define MATCH_MAX ((1 << MATCH_BITS) + (MATCH_MIN - 1)) | |
125 | #define OFFSET_MASK ((1 << (16 - MATCH_BITS)) - 1) | |
126 | #define LEMPEL_SIZE 256 | |
127 | ||
128 | size_t | |
129 | compress(void *s_start, void *d_start, size_t s_len) | |
130 | { | |
131 | uchar_t *src = s_start; | |
132 | uchar_t *dst = d_start; | |
133 | uchar_t *cpy, *copymap; | |
134 | int copymask = 1 << (NBBY - 1); | |
135 | int mlen, offset; | |
136 | uint16_t *hp; | |
137 | uint16_t lempel[LEMPEL_SIZE]; /* uninitialized; see above */ | |
138 | ||
139 | while (src < (uchar_t *)s_start + s_len) { | |
140 | if ((copymask <<= 1) == (1 << NBBY)) { | |
141 | if (dst >= (uchar_t *)d_start + s_len - 1 - 2 * NBBY) { | |
142 | mlen = s_len; | |
143 | for (src = s_start, dst = d_start; mlen; mlen--) | |
144 | *dst++ = *src++; | |
145 | return (s_len); | |
146 | } | |
147 | copymask = 1; | |
148 | copymap = dst; | |
149 | *dst++ = 0; | |
150 | } | |
151 | if (src > (uchar_t *)s_start + s_len - MATCH_MAX) { | |
152 | *dst++ = *src++; | |
153 | continue; | |
154 | } | |
155 | hp = &lempel[((src[0] + 13) ^ (src[1] - 13) ^ src[2]) & | |
156 | (LEMPEL_SIZE - 1)]; | |
157 | offset = (intptr_t)(src - *hp) & OFFSET_MASK; | |
158 | *hp = (uint16_t)(uintptr_t)src; | |
159 | cpy = src - offset; | |
160 | if (cpy >= (uchar_t *)s_start && cpy != src && | |
161 | src[0] == cpy[0] && src[1] == cpy[1] && src[2] == cpy[2]) { | |
162 | *copymap |= copymask; | |
163 | for (mlen = MATCH_MIN; mlen < MATCH_MAX; mlen++) | |
164 | if (src[mlen] != cpy[mlen]) | |
165 | break; | |
166 | *dst++ = ((mlen - MATCH_MIN) << (NBBY - MATCH_BITS)) | | |
167 | (offset >> NBBY); | |
168 | *dst++ = (uchar_t)offset; | |
169 | src += mlen; | |
170 | } else { | |
171 | *dst++ = *src++; | |
172 | } | |
173 | } | |
174 | return (dst - (uchar_t *)d_start); | |
175 | } | |
176 | ||
177 | size_t | |
178 | decompress(void *s_start, void *d_start, size_t s_len, size_t d_len) | |
179 | { | |
180 | uchar_t *src = s_start; | |
181 | uchar_t *dst = d_start; | |
182 | uchar_t *s_end = (uchar_t *)s_start + s_len; | |
183 | uchar_t *d_end = (uchar_t *)d_start + d_len; | |
184 | uchar_t *cpy, copymap; | |
185 | int copymask = 1 << (NBBY - 1); | |
186 | ||
187 | if (s_len >= d_len) { | |
188 | size_t d_rem = d_len; | |
189 | while (d_rem-- != 0) | |
190 | *dst++ = *src++; | |
191 | return (d_len); | |
192 | } | |
193 | ||
194 | while (src < s_end && dst < d_end) { | |
195 | if ((copymask <<= 1) == (1 << NBBY)) { | |
196 | copymask = 1; | |
197 | copymap = *src++; | |
198 | } | |
199 | if (copymap & copymask) { | |
200 | int mlen = (src[0] >> (NBBY - MATCH_BITS)) + MATCH_MIN; | |
201 | int offset = ((src[0] << NBBY) | src[1]) & OFFSET_MASK; | |
202 | src += 2; | |
203 | if ((cpy = dst - offset) >= (uchar_t *)d_start) | |
204 | while (--mlen >= 0 && dst < d_end) | |
205 | *dst++ = *cpy++; | |
206 | else | |
207 | /* | |
208 | * offset before start of destination buffer | |
209 | * indicates corrupt source data | |
210 | */ | |
211 | return (dst - (uchar_t *)d_start); | |
212 | } else { | |
213 | *dst++ = *src++; | |
214 | } | |
215 | } | |
216 | return (dst - (uchar_t *)d_start); | |
217 | } | |
218 | ||
219 | uint32_t | |
220 | checksum32(void *cp_arg, size_t length) | |
221 | { | |
222 | uchar_t *cp, *ep; | |
223 | uint32_t sum = 0; | |
224 | ||
225 | for (cp = cp_arg, ep = cp + length; cp < ep; cp++) | |
226 | sum = ((sum >> 1) | (sum << 31)) + *cp; | |
227 | return (sum); | |
228 | } |