]>
Commit | Line | Data |
---|---|---|
92f5a8d4 TL |
1 | /* fitblk.c: example of fitting compressed output to a specified size |
2 | Not copyrighted -- provided to the public domain | |
3 | Version 1.1 25 November 2004 Mark Adler */ | |
4 | ||
5 | /* Version history: | |
6 | 1.0 24 Nov 2004 First version | |
7 | 1.1 25 Nov 2004 Change deflateInit2() to deflateInit() | |
8 | Use fixed-size, stack-allocated raw buffers | |
9 | Simplify code moving compression to subroutines | |
10 | Use assert() for internal errors | |
11 | Add detailed description of approach | |
12 | */ | |
13 | ||
14 | /* Approach to just fitting a requested compressed size: | |
15 | ||
16 | fitblk performs three compression passes on a portion of the input | |
17 | data in order to determine how much of that input will compress to | |
18 | nearly the requested output block size. The first pass generates | |
19 | enough deflate blocks to produce output to fill the requested | |
20 | output size plus a specfied excess amount (see the EXCESS define | |
21 | below). The last deflate block may go quite a bit past that, but | |
22 | is discarded. The second pass decompresses and recompresses just | |
23 | the compressed data that fit in the requested plus excess sized | |
24 | buffer. The deflate process is terminated after that amount of | |
25 | input, which is less than the amount consumed on the first pass. | |
26 | The last deflate block of the result will be of a comparable size | |
27 | to the final product, so that the header for that deflate block and | |
28 | the compression ratio for that block will be about the same as in | |
29 | the final product. The third compression pass decompresses the | |
30 | result of the second step, but only the compressed data up to the | |
31 | requested size minus an amount to allow the compressed stream to | |
32 | complete (see the MARGIN define below). That will result in a | |
33 | final compressed stream whose length is less than or equal to the | |
34 | requested size. Assuming sufficient input and a requested size | |
35 | greater than a few hundred bytes, the shortfall will typically be | |
36 | less than ten bytes. | |
37 | ||
38 | If the input is short enough that the first compression completes | |
39 | before filling the requested output size, then that compressed | |
40 | stream is return with no recompression. | |
41 | ||
42 | EXCESS is chosen to be just greater than the shortfall seen in a | |
43 | two pass approach similar to the above. That shortfall is due to | |
44 | the last deflate block compressing more efficiently with a smaller | |
45 | header on the second pass. EXCESS is set to be large enough so | |
46 | that there is enough uncompressed data for the second pass to fill | |
47 | out the requested size, and small enough so that the final deflate | |
48 | block of the second pass will be close in size to the final deflate | |
49 | block of the third and final pass. MARGIN is chosen to be just | |
50 | large enough to assure that the final compression has enough room | |
51 | to complete in all cases. | |
52 | */ | |
53 | ||
54 | #include <stdio.h> | |
55 | #include <stdlib.h> | |
56 | #include <assert.h> | |
57 | #include "zlib.h" | |
58 | ||
59 | #define local static | |
60 | ||
61 | /* print nastygram and leave */ | |
62 | local void quit(char *why) | |
63 | { | |
64 | fprintf(stderr, "fitblk abort: %s\n", why); | |
65 | exit(1); | |
66 | } | |
67 | ||
68 | #define RAWLEN 4096 /* intermediate uncompressed buffer size */ | |
69 | ||
70 | /* compress from file to def until provided buffer is full or end of | |
71 | input reached; return last deflate() return value, or Z_ERRNO if | |
72 | there was read error on the file */ | |
73 | local int partcompress(FILE *in, z_streamp def) | |
74 | { | |
75 | int ret, flush; | |
76 | unsigned char raw[RAWLEN]; | |
77 | ||
78 | flush = Z_NO_FLUSH; | |
79 | do { | |
80 | def->avail_in = fread(raw, 1, RAWLEN, in); | |
81 | if (ferror(in)) | |
82 | return Z_ERRNO; | |
83 | def->next_in = raw; | |
84 | if (feof(in)) | |
85 | flush = Z_FINISH; | |
86 | ret = deflate(def, flush); | |
87 | assert(ret != Z_STREAM_ERROR); | |
88 | } while (def->avail_out != 0 && flush == Z_NO_FLUSH); | |
89 | return ret; | |
90 | } | |
91 | ||
92 | /* recompress from inf's input to def's output; the input for inf and | |
93 | the output for def are set in those structures before calling; | |
94 | return last deflate() return value, or Z_MEM_ERROR if inflate() | |
95 | was not able to allocate enough memory when it needed to */ | |
96 | local int recompress(z_streamp inf, z_streamp def) | |
97 | { | |
98 | int ret, flush; | |
99 | unsigned char raw[RAWLEN]; | |
100 | ||
101 | flush = Z_NO_FLUSH; | |
102 | do { | |
103 | /* decompress */ | |
104 | inf->avail_out = RAWLEN; | |
105 | inf->next_out = raw; | |
106 | ret = inflate(inf, Z_NO_FLUSH); | |
107 | assert(ret != Z_STREAM_ERROR && ret != Z_DATA_ERROR && | |
108 | ret != Z_NEED_DICT); | |
109 | if (ret == Z_MEM_ERROR) | |
110 | return ret; | |
111 | ||
112 | /* compress what was decompresed until done or no room */ | |
113 | def->avail_in = RAWLEN - inf->avail_out; | |
114 | def->next_in = raw; | |
115 | if (inf->avail_out != 0) | |
116 | flush = Z_FINISH; | |
117 | ret = deflate(def, flush); | |
118 | assert(ret != Z_STREAM_ERROR); | |
119 | } while (ret != Z_STREAM_END && def->avail_out != 0); | |
120 | return ret; | |
121 | } | |
122 | ||
123 | #define EXCESS 256 /* empirically determined stream overage */ | |
124 | #define MARGIN 8 /* amount to back off for completion */ | |
125 | ||
126 | /* compress from stdin to fixed-size block on stdout */ | |
127 | int main(int argc, char **argv) | |
128 | { | |
129 | int ret; /* return code */ | |
130 | unsigned size; /* requested fixed output block size */ | |
131 | unsigned have; /* bytes written by deflate() call */ | |
132 | unsigned char *blk; /* intermediate and final stream */ | |
133 | unsigned char *tmp; /* close to desired size stream */ | |
134 | z_stream def, inf; /* zlib deflate and inflate states */ | |
135 | ||
136 | /* get requested output size */ | |
137 | if (argc != 2) | |
138 | quit("need one argument: size of output block"); | |
139 | ret = strtol(argv[1], argv + 1, 10); | |
140 | if (argv[1][0] != 0) | |
141 | quit("argument must be a number"); | |
142 | if (ret < 8) /* 8 is minimum zlib stream size */ | |
143 | quit("need positive size of 8 or greater"); | |
144 | size = (unsigned)ret; | |
145 | ||
146 | /* allocate memory for buffers and compression engine */ | |
147 | blk = malloc(size + EXCESS); | |
148 | def.zalloc = Z_NULL; | |
149 | def.zfree = Z_NULL; | |
150 | def.opaque = Z_NULL; | |
151 | ret = deflateInit(&def, Z_DEFAULT_COMPRESSION); | |
152 | if (ret != Z_OK || blk == NULL) | |
153 | quit("out of memory"); | |
154 | ||
155 | /* compress from stdin until output full, or no more input */ | |
156 | def.avail_out = size + EXCESS; | |
157 | def.next_out = blk; | |
158 | ret = partcompress(stdin, &def); | |
159 | if (ret == Z_ERRNO) | |
160 | quit("error reading input"); | |
161 | ||
162 | /* if it all fit, then size was undersubscribed -- done! */ | |
163 | if (ret == Z_STREAM_END && def.avail_out >= EXCESS) { | |
164 | /* write block to stdout */ | |
165 | have = size + EXCESS - def.avail_out; | |
166 | if (fwrite(blk, 1, have, stdout) != have || ferror(stdout)) | |
167 | quit("error writing output"); | |
168 | ||
169 | /* clean up and print results to stderr */ | |
170 | ret = deflateEnd(&def); | |
171 | assert(ret != Z_STREAM_ERROR); | |
172 | free(blk); | |
173 | fprintf(stderr, | |
174 | "%u bytes unused out of %u requested (all input)\n", | |
175 | size - have, size); | |
176 | return 0; | |
177 | } | |
178 | ||
179 | /* it didn't all fit -- set up for recompression */ | |
180 | inf.zalloc = Z_NULL; | |
181 | inf.zfree = Z_NULL; | |
182 | inf.opaque = Z_NULL; | |
183 | inf.avail_in = 0; | |
184 | inf.next_in = Z_NULL; | |
185 | ret = inflateInit(&inf); | |
186 | tmp = malloc(size + EXCESS); | |
187 | if (ret != Z_OK || tmp == NULL) | |
188 | quit("out of memory"); | |
189 | ret = deflateReset(&def); | |
190 | assert(ret != Z_STREAM_ERROR); | |
191 | ||
192 | /* do first recompression close to the right amount */ | |
193 | inf.avail_in = size + EXCESS; | |
194 | inf.next_in = blk; | |
195 | def.avail_out = size + EXCESS; | |
196 | def.next_out = tmp; | |
197 | ret = recompress(&inf, &def); | |
198 | if (ret == Z_MEM_ERROR) | |
199 | quit("out of memory"); | |
200 | ||
201 | /* set up for next reocmpression */ | |
202 | ret = inflateReset(&inf); | |
203 | assert(ret != Z_STREAM_ERROR); | |
204 | ret = deflateReset(&def); | |
205 | assert(ret != Z_STREAM_ERROR); | |
206 | ||
207 | /* do second and final recompression (third compression) */ | |
208 | inf.avail_in = size - MARGIN; /* assure stream will complete */ | |
209 | inf.next_in = tmp; | |
210 | def.avail_out = size; | |
211 | def.next_out = blk; | |
212 | ret = recompress(&inf, &def); | |
213 | if (ret == Z_MEM_ERROR) | |
214 | quit("out of memory"); | |
215 | assert(ret == Z_STREAM_END); /* otherwise MARGIN too small */ | |
216 | ||
217 | /* done -- write block to stdout */ | |
218 | have = size - def.avail_out; | |
219 | if (fwrite(blk, 1, have, stdout) != have || ferror(stdout)) | |
220 | quit("error writing output"); | |
221 | ||
222 | /* clean up and print results to stderr */ | |
223 | free(tmp); | |
224 | ret = inflateEnd(&inf); | |
225 | assert(ret != Z_STREAM_ERROR); | |
226 | ret = deflateEnd(&def); | |
227 | assert(ret != Z_STREAM_ERROR); | |
228 | free(blk); | |
229 | fprintf(stderr, | |
230 | "%u bytes unused out of %u requested (%lu input)\n", | |
231 | size - have, size, def.total_in); | |
232 | return 0; | |
233 | } |