]>
Commit | Line | Data |
---|---|---|
4f3865fb RP |
1 | /* inffast.c -- fast decoding |
2 | * Copyright (C) 1995-2004 Mark Adler | |
3 | * For conditions of distribution and use, see copyright notice in zlib.h | |
1da177e4 LT |
4 | */ |
5 | ||
6 | #include <linux/zutil.h> | |
7 | #include "inftrees.h" | |
4f3865fb | 8 | #include "inflate.h" |
1da177e4 LT |
9 | #include "inffast.h" |
10 | ||
6846ee5c BH |
11 | /* Only do the unaligned "Faster" variant when |
12 | * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS is set | |
13 | * | |
14 | * On powerpc, it won't be as we don't include autoconf.h | |
15 | * automatically for the boot wrapper, which is intended as | |
16 | * we run in an environment where we may not be able to deal | |
17 | * with (even rare) alignment faults. In addition, we do not | |
18 | * define __KERNEL__ for arch/powerpc/boot unlike x86 | |
19 | */ | |
20 | ||
21 | #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS | |
22 | #include <asm/unaligned.h> | |
23 | #include <asm/byteorder.h> | |
24 | #endif | |
25 | ||
4f3865fb RP |
26 | #ifndef ASMINF |
27 | ||
28 | /* Allow machine dependent optimization for post-increment or pre-increment. | |
29 | Based on testing to date, | |
30 | Pre-increment preferred for: | |
31 | - PowerPC G3 (Adler) | |
32 | - MIPS R5000 (Randers-Pehrson) | |
33 | Post-increment preferred for: | |
34 | - none | |
35 | No measurable difference: | |
36 | - Pentium III (Anderson) | |
37 | - M68060 (Nikl) | |
38 | */ | |
39 | #ifdef POSTINC | |
40 | # define OFF 0 | |
41 | # define PUP(a) *(a)++ | |
ac4c2a3b | 42 | # define UP_UNALIGNED(a) get_unaligned((a)++) |
4f3865fb RP |
43 | #else |
44 | # define OFF 1 | |
45 | # define PUP(a) *++(a) | |
ac4c2a3b | 46 | # define UP_UNALIGNED(a) get_unaligned(++(a)) |
4f3865fb RP |
47 | #endif |
48 | ||
49 | /* | |
50 | Decode literal, length, and distance codes and write out the resulting | |
51 | literal and match bytes until either not enough input or output is | |
52 | available, an end-of-block is encountered, or a data error is encountered. | |
53 | When large enough input and output buffers are supplied to inflate(), for | |
54 | example, a 16K input buffer and a 64K output buffer, more than 95% of the | |
55 | inflate execution time is spent in this routine. | |
56 | ||
57 | Entry assumptions: | |
58 | ||
59 | state->mode == LEN | |
60 | strm->avail_in >= 6 | |
61 | strm->avail_out >= 258 | |
62 | start >= strm->avail_out | |
63 | state->bits < 8 | |
64 | ||
65 | On return, state->mode is one of: | |
66 | ||
67 | LEN -- ran out of enough output space or enough available input | |
68 | TYPE -- reached end of block code, inflate() to interpret next block | |
69 | BAD -- error in block data | |
70 | ||
71 | Notes: | |
72 | ||
73 | - The maximum input bits used by a length/distance pair is 15 bits for the | |
74 | length code, 5 bits for the length extra, 15 bits for the distance code, | |
75 | and 13 bits for the distance extra. This totals 48 bits, or six bytes. | |
76 | Therefore if strm->avail_in >= 6, then there is enough input to avoid | |
77 | checking for available input while decoding. | |
78 | ||
79 | - The maximum bytes that a single length/distance pair can output is 258 | |
80 | bytes, which is the maximum length that can be coded. inflate_fast() | |
81 | requires strm->avail_out >= 258 for each loop to avoid checking for | |
82 | output space. | |
b762450e RD |
83 | |
84 | - @start: inflate()'s starting value for strm->avail_out | |
4f3865fb | 85 | */ |
b762450e | 86 | void inflate_fast(z_streamp strm, unsigned start) |
1da177e4 | 87 | { |
4f3865fb | 88 | struct inflate_state *state; |
8336793b DV |
89 | const unsigned char *in; /* local strm->next_in */ |
90 | const unsigned char *last; /* while in < last, enough input available */ | |
91 | unsigned char *out; /* local strm->next_out */ | |
92 | unsigned char *beg; /* inflate()'s initial strm->next_out */ | |
93 | unsigned char *end; /* while out < end, enough space available */ | |
4f3865fb RP |
94 | #ifdef INFLATE_STRICT |
95 | unsigned dmax; /* maximum distance from zlib header */ | |
96 | #endif | |
97 | unsigned wsize; /* window size or zero if not using window */ | |
98 | unsigned whave; /* valid bytes in the window */ | |
99 | unsigned write; /* window write index */ | |
8336793b | 100 | unsigned char *window; /* allocated sliding window, if wsize != 0 */ |
4f3865fb RP |
101 | unsigned long hold; /* local strm->hold */ |
102 | unsigned bits; /* local strm->bits */ | |
8336793b DV |
103 | code const *lcode; /* local strm->lencode */ |
104 | code const *dcode; /* local strm->distcode */ | |
4f3865fb RP |
105 | unsigned lmask; /* mask for first level of length codes */ |
106 | unsigned dmask; /* mask for first level of distance codes */ | |
107 | code this; /* retrieved table entry */ | |
108 | unsigned op; /* code bits, operation, extra bits, or */ | |
109 | /* window position, window bytes to copy */ | |
110 | unsigned len; /* match length, unused bytes */ | |
111 | unsigned dist; /* match distance */ | |
8336793b | 112 | unsigned char *from; /* where to copy match from */ |
4f3865fb RP |
113 | |
114 | /* copy state to local variables */ | |
115 | state = (struct inflate_state *)strm->state; | |
116 | in = strm->next_in - OFF; | |
117 | last = in + (strm->avail_in - 5); | |
118 | out = strm->next_out - OFF; | |
119 | beg = out - (start - strm->avail_out); | |
120 | end = out + (strm->avail_out - 257); | |
121 | #ifdef INFLATE_STRICT | |
122 | dmax = state->dmax; | |
123 | #endif | |
124 | wsize = state->wsize; | |
125 | whave = state->whave; | |
126 | write = state->write; | |
127 | window = state->window; | |
128 | hold = state->hold; | |
129 | bits = state->bits; | |
130 | lcode = state->lencode; | |
131 | dcode = state->distcode; | |
132 | lmask = (1U << state->lenbits) - 1; | |
133 | dmask = (1U << state->distbits) - 1; | |
134 | ||
135 | /* decode literals and length/distances until end-of-block or not enough | |
136 | input data or output space */ | |
1da177e4 | 137 | do { |
4f3865fb RP |
138 | if (bits < 15) { |
139 | hold += (unsigned long)(PUP(in)) << bits; | |
140 | bits += 8; | |
141 | hold += (unsigned long)(PUP(in)) << bits; | |
142 | bits += 8; | |
143 | } | |
144 | this = lcode[hold & lmask]; | |
145 | dolen: | |
146 | op = (unsigned)(this.bits); | |
147 | hold >>= op; | |
148 | bits -= op; | |
149 | op = (unsigned)(this.op); | |
150 | if (op == 0) { /* literal */ | |
151 | PUP(out) = (unsigned char)(this.val); | |
152 | } | |
153 | else if (op & 16) { /* length base */ | |
154 | len = (unsigned)(this.val); | |
155 | op &= 15; /* number of extra bits */ | |
156 | if (op) { | |
157 | if (bits < op) { | |
158 | hold += (unsigned long)(PUP(in)) << bits; | |
159 | bits += 8; | |
160 | } | |
161 | len += (unsigned)hold & ((1U << op) - 1); | |
162 | hold >>= op; | |
163 | bits -= op; | |
164 | } | |
165 | if (bits < 15) { | |
166 | hold += (unsigned long)(PUP(in)) << bits; | |
167 | bits += 8; | |
168 | hold += (unsigned long)(PUP(in)) << bits; | |
169 | bits += 8; | |
170 | } | |
171 | this = dcode[hold & dmask]; | |
172 | dodist: | |
173 | op = (unsigned)(this.bits); | |
174 | hold >>= op; | |
175 | bits -= op; | |
176 | op = (unsigned)(this.op); | |
177 | if (op & 16) { /* distance base */ | |
178 | dist = (unsigned)(this.val); | |
179 | op &= 15; /* number of extra bits */ | |
180 | if (bits < op) { | |
181 | hold += (unsigned long)(PUP(in)) << bits; | |
182 | bits += 8; | |
183 | if (bits < op) { | |
184 | hold += (unsigned long)(PUP(in)) << bits; | |
185 | bits += 8; | |
186 | } | |
187 | } | |
188 | dist += (unsigned)hold & ((1U << op) - 1); | |
189 | #ifdef INFLATE_STRICT | |
190 | if (dist > dmax) { | |
191 | strm->msg = (char *)"invalid distance too far back"; | |
192 | state->mode = BAD; | |
193 | break; | |
194 | } | |
195 | #endif | |
196 | hold >>= op; | |
197 | bits -= op; | |
198 | op = (unsigned)(out - beg); /* max distance in output */ | |
199 | if (dist > op) { /* see if copy from window */ | |
200 | op = dist - op; /* distance back in window */ | |
201 | if (op > whave) { | |
202 | strm->msg = (char *)"invalid distance too far back"; | |
203 | state->mode = BAD; | |
204 | break; | |
205 | } | |
206 | from = window - OFF; | |
207 | if (write == 0) { /* very common case */ | |
208 | from += wsize - op; | |
209 | if (op < len) { /* some from window */ | |
210 | len -= op; | |
211 | do { | |
212 | PUP(out) = PUP(from); | |
213 | } while (--op); | |
214 | from = out - dist; /* rest from output */ | |
215 | } | |
216 | } | |
217 | else if (write < op) { /* wrap around window */ | |
218 | from += wsize + write - op; | |
219 | op -= write; | |
220 | if (op < len) { /* some from end of window */ | |
221 | len -= op; | |
222 | do { | |
223 | PUP(out) = PUP(from); | |
224 | } while (--op); | |
225 | from = window - OFF; | |
226 | if (write < len) { /* some from start of window */ | |
227 | op = write; | |
228 | len -= op; | |
229 | do { | |
230 | PUP(out) = PUP(from); | |
231 | } while (--op); | |
232 | from = out - dist; /* rest from output */ | |
233 | } | |
234 | } | |
235 | } | |
236 | else { /* contiguous in window */ | |
237 | from += write - op; | |
238 | if (op < len) { /* some from window */ | |
239 | len -= op; | |
240 | do { | |
241 | PUP(out) = PUP(from); | |
242 | } while (--op); | |
243 | from = out - dist; /* rest from output */ | |
244 | } | |
245 | } | |
246 | while (len > 2) { | |
247 | PUP(out) = PUP(from); | |
248 | PUP(out) = PUP(from); | |
249 | PUP(out) = PUP(from); | |
250 | len -= 3; | |
251 | } | |
252 | if (len) { | |
253 | PUP(out) = PUP(from); | |
254 | if (len > 1) | |
255 | PUP(out) = PUP(from); | |
256 | } | |
257 | } | |
258 | else { | |
6846ee5c | 259 | #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS |
ac4c2a3b JT |
260 | unsigned short *sout; |
261 | unsigned long loops; | |
262 | ||
4f3865fb | 263 | from = out - dist; /* copy direct from output */ |
ac4c2a3b JT |
264 | /* minimum length is three */ |
265 | /* Align out addr */ | |
266 | if (!((long)(out - 1 + OFF) & 1)) { | |
267 | PUP(out) = PUP(from); | |
268 | len--; | |
269 | } | |
270 | sout = (unsigned short *)(out - OFF); | |
271 | if (dist > 2) { | |
272 | unsigned short *sfrom; | |
273 | ||
274 | sfrom = (unsigned short *)(from - OFF); | |
275 | loops = len >> 1; | |
276 | do | |
277 | PUP(sout) = UP_UNALIGNED(sfrom); | |
278 | while (--loops); | |
279 | out = (unsigned char *)sout + OFF; | |
280 | from = (unsigned char *)sfrom + OFF; | |
281 | } else { /* dist == 1 or dist == 2 */ | |
282 | unsigned short pat16; | |
283 | ||
284 | pat16 = *(sout-2+2*OFF); | |
285 | if (dist == 1) | |
286 | #if defined(__BIG_ENDIAN) | |
287 | pat16 = (pat16 & 0xff) | ((pat16 & 0xff) << 8); | |
288 | #elif defined(__LITTLE_ENDIAN) | |
289 | pat16 = (pat16 & 0xff00) | ((pat16 & 0xff00) >> 8); | |
290 | #else | |
291 | #error __BIG_ENDIAN nor __LITTLE_ENDIAN is defined | |
292 | #endif | |
293 | loops = len >> 1; | |
294 | do | |
295 | PUP(sout) = pat16; | |
296 | while (--loops); | |
297 | out = (unsigned char *)sout + OFF; | |
298 | } | |
299 | if (len & 1) | |
300 | PUP(out) = PUP(from); | |
6846ee5c BH |
301 | #else /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */ |
302 | from = out - dist; /* copy direct from output */ | |
303 | do { /* minimum length is three */ | |
304 | PUP(out) = PUP(from); | |
305 | PUP(out) = PUP(from); | |
306 | PUP(out) = PUP(from); | |
307 | len -= 3; | |
308 | } while (len > 2); | |
309 | if (len) { | |
310 | PUP(out) = PUP(from); | |
311 | if (len > 1) | |
312 | PUP(out) = PUP(from); | |
313 | } | |
314 | #endif /* !CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */ | |
4f3865fb RP |
315 | } |
316 | } | |
317 | else if ((op & 64) == 0) { /* 2nd level distance code */ | |
318 | this = dcode[this.val + (hold & ((1U << op) - 1))]; | |
319 | goto dodist; | |
1da177e4 | 320 | } |
4f3865fb RP |
321 | else { |
322 | strm->msg = (char *)"invalid distance code"; | |
323 | state->mode = BAD; | |
324 | break; | |
1da177e4 | 325 | } |
4f3865fb RP |
326 | } |
327 | else if ((op & 64) == 0) { /* 2nd level length code */ | |
328 | this = lcode[this.val + (hold & ((1U << op) - 1))]; | |
329 | goto dolen; | |
330 | } | |
331 | else if (op & 32) { /* end-of-block */ | |
332 | state->mode = TYPE; | |
1da177e4 | 333 | break; |
1da177e4 | 334 | } |
4f3865fb RP |
335 | else { |
336 | strm->msg = (char *)"invalid literal/length code"; | |
337 | state->mode = BAD; | |
338 | break; | |
339 | } | |
340 | } while (in < last && out < end); | |
341 | ||
342 | /* return unused bytes (on entry, bits < 8, so in won't go too far back) */ | |
343 | len = bits >> 3; | |
344 | in -= len; | |
345 | bits -= len << 3; | |
346 | hold &= (1U << bits) - 1; | |
347 | ||
348 | /* update state and return */ | |
349 | strm->next_in = in + OFF; | |
350 | strm->next_out = out + OFF; | |
351 | strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last)); | |
352 | strm->avail_out = (unsigned)(out < end ? | |
353 | 257 + (end - out) : 257 - (out - end)); | |
354 | state->hold = hold; | |
355 | state->bits = bits; | |
356 | return; | |
1da177e4 | 357 | } |
4f3865fb RP |
358 | |
359 | /* | |
360 | inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe): | |
361 | - Using bit fields for code structure | |
362 | - Different op definition to avoid & for extra bits (do & for table bits) | |
363 | - Three separate decoding do-loops for direct, window, and write == 0 | |
364 | - Special case for distance > 1 copies to do overlapped load and store copy | |
365 | - Explicit branch predictions (based on measured branch probabilities) | |
366 | - Deferring match copy and interspersed it with decoding subsequent codes | |
367 | - Swapping literal/length else | |
368 | - Swapping window/direct else | |
369 | - Larger unrolled copy loops (three is about right) | |
370 | - Moving len -= 3 statement into middle of loop | |
371 | */ | |
372 | ||
373 | #endif /* !ASMINF */ |