2 // Copyright (c) 2013-2017 Vinnie Falco (vinnie dot falco at gmail dot com)
4 // Distributed under the Boost Software License, Version 1.0. (See accompanying
5 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
7 // This is a derivative work based on Zlib, copyright below:
9 Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler
11 This software is provided 'as-is', without any express or implied
12 warranty. In no event will the authors be held liable for any damages
13 arising from the use of this software.
15 Permission is granted to anyone to use this software for any purpose,
16 including commercial applications, and to alter it and redistribute it
17 freely, subject to the following restrictions:
19 1. The origin of this software must not be misrepresented; you must not
20 claim that you wrote the original software. If you use this software
21 in a product, an acknowledgment in the product documentation would be
22 appreciated but is not required.
23 2. Altered source versions must be plainly marked as such, and must not be
24 misrepresented as being the original software.
25 3. This notice may not be removed or altered from any source distribution.
27 Jean-loup Gailly Mark Adler
28 jloup@gzip.org madler@alumni.caltech.edu
30 The data format used by the zlib library is described by RFCs (Request for
31 Comments) 1950 to 1952 in the files http://tools.ietf.org/html/rfc1950
32 (zlib format), rfc1951 (deflate format) and rfc1952 (gzip format).
35 #ifndef BEAST_ZLIB_DETAIL_DEFLATE_STREAM_HPP
36 #define BEAST_ZLIB_DETAIL_DEFLATE_STREAM_HPP
38 #include <beast/zlib/zlib.hpp>
39 #include <beast/zlib/detail/ranges.hpp>
40 #include <beast/core/detail/type_traits.hpp>
41 #include <boost/assert.hpp>
42 #include <boost/optional.hpp>
48 #include <type_traits>
57 * The "deflation" process depends on being able to identify portions
58 * of the input text which are identical to earlier input (within a
59 * sliding window trailing behind the input currently being processed).
61 * Each code tree is stored in a compressed form which is itself
62 * a Huffman encoding of the lengths of all the code strings (in
63 * ascending order by source values). The actual code strings are
64 * reconstructed from the lengths in the inflate process, as described
65 * in the deflate specification.
67 * The most straightforward technique turns out to be the fastest for
68 * most input files: try all possible matches and select the longest.
69 * The key feature of this algorithm is that insertions into the string
70 * dictionary are very simple and thus fast, and deletions are avoided
71 * completely. Insertions are performed at each input character, whereas
72 * string matches are performed only when the previous match ends. So it
73 * is preferable to spend more time in matches to allow very fast string
74 * insertions and avoid deletions. The matching algorithm for small
75 * strings is inspired from that of Rabin & Karp. A brute force approach
76 * is used to find longer strings when a small match has been found.
77 * A similar algorithm is used in comic (by Jan-Mark Wams) and freeze
78 * (by Leonid Broukhis).
79 * A previous version of this file used a more sophisticated algorithm
80 * (by Fiala and Greene) which is guaranteed to run in linear amortized
81 * time, but has a larger average cost, uses more memory and is patented.
82 * However the F&G algorithm may be faster for some highly redundant
83 * files if the parameter max_chain_length (described below) is too large.
87 * The idea of lazy evaluation of matches is due to Jan-Mark Wams, and
88 * I found it in 'freeze' written by Leonid Broukhis.
89 * Thanks to many people for bug reports and testing.
93 * Deutsch, L.P.,"DEFLATE Compressed Data Format Specification".
94 * Available in http://tools.ietf.org/html/rfc1951
96 * A description of the Rabin and Karp algorithm is given in the book
97 * "Algorithms" by R. Sedgewick, Addison-Wesley, p252.
99 * Fiala,E.R., and Greene,D.H.
100 * Data Compression with Finite Windows, Comm.ACM, 32,4 (1989) 490-595
107 // Upper limit on code length
108 static std::uint8_t constexpr maxBits = 15;
110 // Number of length codes, not counting the special END_BLOCK code
111 static std::uint16_t constexpr lengthCodes = 29;
113 // Number of literal bytes 0..255
114 static std::uint16_t constexpr literals = 256;
116 // Number of Literal or Length codes, including the END_BLOCK code
117 static std::uint16_t constexpr lCodes = literals + 1 + lengthCodes;
119 // Number of distance code lengths
120 static std::uint16_t constexpr dCodes = 30;
122 // Number of codes used to transfer the bit lengths
123 static std::uint16_t constexpr blCodes = 19;
125 // Number of distance codes
126 static std::uint16_t constexpr distCodeLen = 512;
128 // Size limit on bit length codes
129 static std::uint8_t constexpr maxBlBits= 7;
131 static std::uint16_t constexpr minMatch = 3;
132 static std::uint16_t constexpr maxMatch = 258;
134 // Can't change minMatch without also changing code, see original zlib
135 static_assert(minMatch==3, "");
137 // end of block literal code
138 static std::uint16_t constexpr END_BLOCK = 256;
140 // repeat previous bit length 3-6 times (2 bits of repeat count)
141 static std::uint8_t constexpr REP_3_6 = 16;
143 // repeat a zero length 3-10 times (3 bits of repeat count)
144 static std::uint8_t constexpr REPZ_3_10 = 17;
146 // repeat a zero length 11-138 times (7 bits of repeat count)
147 static std::uint8_t constexpr REPZ_11_138 = 18;
149 // The three kinds of block type
150 static std::uint8_t constexpr STORED_BLOCK = 0;
151 static std::uint8_t constexpr STATIC_TREES = 1;
152 static std::uint8_t constexpr DYN_TREES = 2;
154 // Maximum value for memLevel in deflateInit2
155 static std::uint8_t constexpr MAX_MEM_LEVEL = 9;
158 static std::uint8_t constexpr DEF_MEM_LEVEL = MAX_MEM_LEVEL;
160 /* Note: the deflate() code requires max_lazy >= minMatch and max_chain >= 4
161 For deflate_fast() (levels <= 3) good is ignored and lazy has a different
166 static std::uint16_t constexpr HEAP_SIZE = 2 * lCodes + 1;
168 // size of bit buffer in bi_buf
169 static std::uint8_t constexpr Buf_size = 16;
171 // Matches of length 3 are discarded if their distance exceeds kTooFar
172 static std::size_t constexpr kTooFar = 4096;
174 /* Minimum amount of lookahead, except at the end of the input file.
175 See deflate.c for comments about the minMatch+1.
177 static std::size_t constexpr kMinLookahead = maxMatch + minMatch+1;
179 /* Number of bytes after end of data in window to initialize in order
180 to avoid memory checker errors from longest match routines
182 static std::size_t constexpr kWinInit = maxMatch;
184 // Describes a single value and its code string.
187 std::uint16_t fc; // frequency count or bit string
188 std::uint16_t dl; // parent node in tree or length of bit string
191 operator==(ct_data const& rhs) const
193 return fc == rhs.fc && dl == rhs.dl;
199 ct_data const* static_tree;// static tree or NULL
200 std::uint8_t const* extra_bits; // extra bits for each code or NULL
201 std::uint16_t extra_base; // base index for extra_bits
202 std::uint16_t elems; // max number of elements in the tree
203 std::uint8_t max_length; // max bit length for the codes
208 // Number of extra bits for each length code
209 std::uint8_t const extra_lbits[lengthCodes] = {
210 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0
213 // Number of extra bits for each distance code
214 std::uint8_t const extra_dbits[dCodes] = {
215 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13
218 // Number of extra bits for each bit length code
219 std::uint8_t const extra_blbits[blCodes] = {
220 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,7
223 // The lengths of the bit length codes are sent in order
224 // of decreasing probability, to avoid transmitting the
225 // lengths for unused bit length codes.
226 std::uint8_t const bl_order[blCodes] = {
227 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15
230 ct_data ltree[lCodes + 2];
232 ct_data dtree[dCodes];
234 // Distance codes. The first 256 values correspond to the distances
235 // 3 .. 258, the last 256 values correspond to the top 8 bits of
236 // the 15 bit distances.
237 std::uint8_t dist_code[distCodeLen];
239 std::uint8_t length_code[maxMatch-minMatch+1];
241 std::uint8_t base_length[lengthCodes];
243 std::uint16_t base_dist[dCodes];
245 static_desc l_desc = {
246 ltree, extra_lbits, literals+1, lCodes, maxBits
249 static_desc d_desc = {
250 dtree, extra_dbits, 0, dCodes, maxBits
253 static_desc bl_desc =
255 nullptr, extra_blbits, 0, blCodes, maxBlBits
261 ct_data *dyn_tree; /* the dynamic tree */
262 int max_code; /* largest code with non zero frequency */
263 static_desc const* stat_desc; /* the corresponding static tree */
268 need_more, /* block not completed, need more input or more output */
269 block_done, /* block flush performed */
270 finish_started, /* finish started, need only more output at next deflate */
271 finish_done /* finish done, accept no more input or output */
274 // VFALCO This might not be needed, e.g. for zip/gzip
285 /* A std::uint16_t is an index in the character window. We use short instead of int to
286 * save space in the various tables. IPos is used only for parameter passing.
288 using IPos = unsigned;
290 using self = deflate_stream;
291 typedef block_state(self::*compress_func)(z_params& zs, Flush flush);
293 //--------------------------------------------------------------------------
295 lut_type const& lut_;
297 bool inited_ = false;
298 std::size_t buf_size_;
299 std::unique_ptr<std::uint8_t[]> buf_;
301 int status_; // as the name implies
302 Byte* pending_buf_; // output still pending
304 pending_buf_size_; // size of pending_buf
305 Byte* pending_out_; // next pending byte to output to the stream
306 uInt pending_; // nb of bytes in the pending buffer
307 boost::optional<Flush>
308 last_flush_; // value of flush param for previous deflate call
310 uInt w_size_; // LZ77 window size (32K by default)
311 uInt w_bits_; // log2(w_size) (8..16)
312 uInt w_mask_; // w_size - 1
314 /* Sliding window. Input bytes are read into the second half of the window,
315 and move to the first half later to keep a dictionary of at least wSize
316 bytes. With this organization, matches are limited to a distance of
317 wSize-maxMatch bytes, but this ensures that IO is always
318 performed with a length multiple of the block size. Also, it limits
319 the window size to 64K.
320 To do: use the user input buffer as sliding window.
322 Byte *window_ = nullptr;
324 /* Actual size of window: 2*wSize, except when the user input buffer
325 is directly used as sliding window.
327 std::uint32_t window_size_;
329 /* Link to older string with same hash index. To limit the size of this
330 array to 64K, this link is maintained only for the last 32K strings.
331 An index in this array is thus a window index modulo 32K.
333 std::uint16_t* prev_;
335 std::uint16_t* head_; // Heads of the hash chains or 0
337 uInt ins_h_; // hash index of string to be inserted
338 uInt hash_size_; // number of elements in hash table
339 uInt hash_bits_; // log2(hash_size)
340 uInt hash_mask_; // hash_size-1
342 /* Number of bits by which ins_h must be shifted at each input
343 step. It must be such that after minMatch steps,
344 the oldest byte no longer takes part in the hash key, that is:
345 hash_shift * minMatch >= hash_bits
349 /* Window position at the beginning of the current output block.
350 Gets negative when the window is moved backwards.
354 uInt match_length_; // length of best match
355 IPos prev_match_; // previous match
356 int match_available_; // set if previous match exists
357 uInt strstart_; // start of string to insert
358 uInt match_start_; // start of matching string
359 uInt lookahead_; // number of valid bytes ahead in window
361 /* Length of the best match at previous step. Matches not greater
362 than this are discarded. This is used in the lazy match evaluation.
366 /* To speed up deflation, hash chains are never searched beyond
367 this length. A higher limit improves compression ratio but
370 uInt max_chain_length_;
372 /* Attempt to find a better match only when the current match is strictly
373 smaller than this value. This mechanism is used only for compression
376 OR Insert new strings in the hash table only if the match length is not
377 greater than this length. This saves time but degrades compression.
378 used only for compression levels <= 3.
380 uInt max_lazy_match_;
382 int level_; // compression level (1..9)
383 Strategy strategy_; // favor or force Huffman coding
385 // Use a faster search when the previous match is longer than this
388 int nice_match_; // Stop searching when current match exceeds this
391 HEAP_SIZE]; // literal and length tree
393 2*dCodes+1]; // distance tree
395 2*blCodes+1]; // Huffman tree for bit lengths
397 tree_desc l_desc_; // desc. for literal tree
398 tree_desc d_desc_; // desc. for distance tree
399 tree_desc bl_desc_; // desc. for bit length tree
401 // number of codes at each bit length for an optimal tree
402 std::uint16_t bl_count_[maxBits+1];
404 // Index within the heap array of least frequent node in the Huffman tree
405 static std::size_t constexpr kSmallest = 1;
407 /* The sons of heap[n] are heap[2*n] and heap[2*n+1].
408 heap[0] is not used. The same heap array is used to build all trees.
411 int heap_[2*lCodes+1]; // heap used to build the Huffman trees
412 int heap_len_; // number of elements in the heap
413 int heap_max_; // element of largest frequency
415 // Depth of each subtree used as tie breaker for trees of equal frequency
416 std::uint8_t depth_[2*lCodes+1];
418 std::uint8_t *l_buf_; // buffer for literals or lengths
420 /* Size of match buffer for literals/lengths.
421 There are 4 reasons for limiting lit_bufsize to 64K:
422 - frequencies can be kept in 16 bit counters
423 - if compression is not successful for the first block, all input
424 data is still in the window so we can still emit a stored block even
425 when input comes from standard input. (This can also be done for
426 all blocks if lit_bufsize is not greater than 32K.)
427 - if compression is not successful for a file smaller than 64K, we can
428 even emit a stored file instead of a stored block (saving 5 bytes).
429 This is applicable only for zip (not gzip or zlib).
430 - creating new Huffman trees less frequently may not provide fast
431 adaptation to changes in the input data statistics. (Take for
432 example a binary file with poorly compressible code followed by
433 a highly compressible string table.) Smaller buffer sizes give
434 fast adaptation but have of course the overhead of transmitting
435 trees more frequently.
436 - I can't count above 4
439 uInt last_lit_; // running index in l_buf_
441 /* Buffer for distances. To simplify the code, d_buf_ and l_buf_
442 have the same number of elements. To use different lengths, an
443 extra flag array would be necessary.
445 std::uint16_t* d_buf_;
447 std::uint32_t opt_len_; // bit length of current block with optimal trees
448 std::uint32_t static_len_; // bit length of current block with static trees
449 uInt matches_; // number of string matches in current block
450 uInt insert_; // bytes at end of window left to insert
453 Bits are inserted starting at the bottom (least significant bits).
455 std::uint16_t bi_buf_;
457 /* Number of valid bits in bi_buf._ All bits above the last valid
462 /* High water mark offset in window for initialized bytes -- bytes
463 above this are set to zero in order to avoid memory check warnings
464 when longest match routines access bytes past the input. This is
465 then updated to the new high water mark.
467 std::uint32_t high_water_;
469 //--------------------------------------------------------------------------
476 /* In order to simplify the code, particularly on 16 bit machines, match
477 distances are limited to MAX_DIST instead of WSIZE.
482 return w_size_ - kMinLookahead;
486 put_byte(std::uint8_t c)
488 pending_buf_[pending_++] = c;
492 put_short(std::uint16_t w)
498 /* Send a value on a given number of bits.
499 IN assertion: length <= 16 and value fits in length bits.
502 send_bits(int value, int length)
504 if(bi_valid_ > (int)Buf_size - length)
506 bi_buf_ |= (std::uint16_t)value << bi_valid_;
508 bi_buf_ = (std::uint16_t)value >> (Buf_size - bi_valid_);
509 bi_valid_ += length - Buf_size;
513 bi_buf_ |= (std::uint16_t)(value) << bi_valid_;
518 // Send a code of the given tree
520 send_code(int value, ct_data const* tree)
522 send_bits(tree[value].fc, tree[value].dl);
525 /* Mapping from a distance to a distance code. dist is the
526 distance - 1 and must not have side effects. _dist_code[256]
527 and _dist_code[257] are never used.
530 d_code(unsigned dist)
533 return lut_.dist_code[dist];
534 return lut_.dist_code[256+(dist>>7)];
537 /* Update a hash value with the given input byte
538 IN assertion: all calls to to update_hash are made with
539 consecutive input characters, so that a running hash
540 key can be computed from the previous key instead of
541 complete recalculation each time.
544 update_hash(uInt& h, std::uint8_t c)
546 h = ((h << hash_shift_) ^ c) & hash_mask_;
549 /* Initialize the hash table (avoiding 64K overflow for 16
550 bit systems). prev[] will be initialized on the fly.
555 head_[hash_size_-1] = 0;
556 std::memset((Byte *)head_, 0,
557 (unsigned)(hash_size_-1)*sizeof(*head_));
560 /* Compares two subtrees, using the tree depth as tie breaker
561 when the subtrees have equal frequency. This minimizes the
565 smaller(ct_data const* tree, int n, int m)
567 return tree[n].fc < tree[m].fc ||
568 (tree[n].fc == tree[m].fc &&
569 depth_[n] <= depth_[m]);
572 /* Insert string str in the dictionary and set match_head to the
573 previous head of the hash chain (the most recent string with
574 same hash key). Return the previous length of the hash chain.
575 If this file is compiled with -DFASTEST, the compression level
576 is forced to 1, and no hash chains are maintained.
577 IN assertion: all calls to to INSERT_STRING are made with
578 consecutive input characters and the first minMatch
579 bytes of str are valid (except for the last minMatch-1
580 bytes of the input file).
583 insert_string(IPos& hash_head)
585 update_hash(ins_h_, window_[strstart_ + (minMatch-1)]);
586 hash_head = prev_[strstart_ & w_mask_] = head_[ins_h_];
587 head_[ins_h_] = (std::uint16_t)strstart_;
590 //--------------------------------------------------------------------------
592 /* Values for max_lazy_match, good_match and max_chain_length, depending on
593 * the desired pack level (0..9). The values given below have been tuned to
594 * exclude worst case performance for pathological files. Better values may be
595 * found for specific files.
599 std::uint16_t good_length; /* reduce lazy search above this match length */
600 std::uint16_t max_lazy; /* do not perform lazy search above this match length */
601 std::uint16_t nice_length; /* quit search above this match length */
602 std::uint16_t max_chain;
606 std::uint16_t good_length_,
607 std::uint16_t max_lazy_,
608 std::uint16_t nice_length_,
609 std::uint16_t max_chain_,
611 : good_length(good_length_)
612 , max_lazy(max_lazy_)
613 , nice_length(nice_length_)
614 , max_chain(max_chain_)
622 get_config(std::size_t level)
626 // good lazy nice chain
627 case 0: return { 0, 0, 0, 0, &self::deflate_stored}; // store only
628 case 1: return { 4, 4, 8, 4, &self::deflate_fast}; // max speed, no lazy matches
629 case 2: return { 4, 5, 16, 8, &self::deflate_fast};
630 case 3: return { 4, 6, 32, 32, &self::deflate_fast};
631 case 4: return { 4, 4, 16, 16, &self::deflate_slow}; // lazy matches
632 case 5: return { 8, 16, 32, 32, &self::deflate_slow};
633 case 6: return { 8, 16, 128, 128, &self::deflate_slow};
634 case 7: return { 8, 32, 128, 256, &self::deflate_slow};
635 case 8: return { 32, 128, 258, 1024, &self::deflate_slow};
637 case 9: return { 32, 258, 258, 4096, &self::deflate_slow}; // max compression
650 bi_reverse(unsigned code, int len);
652 template<class = void>
655 gen_codes(ct_data *tree, int max_code, std::uint16_t *bl_count);
657 template<class = void>
662 template<class = void> void doReset (int level, int windowBits, int memLevel, Strategy strategy);
663 template<class = void> void doReset ();
664 template<class = void> void doClear ();
665 template<class = void> std::size_t doUpperBound (std::size_t sourceLen) const;
666 template<class = void> void doTune (int good_length, int max_lazy, int nice_length, int max_chain);
667 template<class = void> void doParams (z_params& zs, int level, Strategy strategy, error_code& ec);
668 template<class = void> void doWrite (z_params& zs, Flush flush, error_code& ec);
669 template<class = void> void doDictionary (Byte const* dict, uInt dictLength, error_code& ec);
670 template<class = void> void doPrime (int bits, int value, error_code& ec);
671 template<class = void> void doPending (unsigned* value, int* bits);
673 template<class = void> void init ();
674 template<class = void> void lm_init ();
675 template<class = void> void init_block ();
676 template<class = void> void pqdownheap (ct_data const* tree, int k);
677 template<class = void> void pqremove (ct_data const* tree, int& top);
678 template<class = void> void gen_bitlen (tree_desc *desc);
679 template<class = void> void build_tree (tree_desc *desc);
680 template<class = void> void scan_tree (ct_data *tree, int max_code);
681 template<class = void> void send_tree (ct_data *tree, int max_code);
682 template<class = void> int build_bl_tree ();
683 template<class = void> void send_all_trees (int lcodes, int dcodes, int blcodes);
684 template<class = void> void compress_block (ct_data const* ltree, ct_data const* dtree);
685 template<class = void> int detect_data_type ();
686 template<class = void> void bi_windup ();
687 template<class = void> void bi_flush ();
688 template<class = void> void copy_block (char *buf, unsigned len, int header);
690 template<class = void> void tr_init ();
691 template<class = void> void tr_align ();
692 template<class = void> void tr_flush_bits ();
693 template<class = void> void tr_stored_block (char *bu, std::uint32_t stored_len, int last);
694 template<class = void> void tr_tally_dist (std::uint16_t dist, std::uint8_t len, bool& flush);
695 template<class = void> void tr_tally_lit (std::uint8_t c, bool& flush);
697 template<class = void> void tr_flush_block (z_params& zs, char *buf, std::uint32_t stored_len, int last);
698 template<class = void> void fill_window (z_params& zs);
699 template<class = void> void flush_pending (z_params& zs);
700 template<class = void> void flush_block (z_params& zs, bool last);
701 template<class = void> int read_buf (z_params& zs, Byte *buf, unsigned size);
702 template<class = void> uInt longest_match (IPos cur_match);
704 template<class = void> block_state f_stored (z_params& zs, Flush flush);
705 template<class = void> block_state f_fast (z_params& zs, Flush flush);
706 template<class = void> block_state f_slow (z_params& zs, Flush flush);
707 template<class = void> block_state f_rle (z_params& zs, Flush flush);
708 template<class = void> block_state f_huff (z_params& zs, Flush flush);
711 deflate_stored(z_params& zs, Flush flush)
713 return f_stored(zs, flush);
717 deflate_fast(z_params& zs, Flush flush)
719 return f_fast(zs, flush);
723 deflate_slow(z_params& zs, Flush flush)
725 return f_slow(zs, flush);
729 deflate_rle(z_params& zs, Flush flush)
731 return f_rle(zs, flush);
735 deflate_huff(z_params& zs, Flush flush)
737 return f_huff(zs, flush);
741 //--------------------------------------------------------------------------
743 // Reverse the first len bits of a code
747 bi_reverse(unsigned code, int len)
760 /* Generate the codes for a given tree and bit counts (which need not be optimal).
761 IN assertion: the array bl_count contains the bit length statistics for
762 the given tree and the field len is set for all tree elements.
763 OUT assertion: the field code is set for all tree elements of non
769 gen_codes(ct_data *tree, int max_code, std::uint16_t *bl_count)
771 std::uint16_t next_code[maxBits+1]; /* next code value for each bit length */
772 std::uint16_t code = 0; /* running code value */
773 int bits; /* bit index */
774 int n; /* code index */
776 // The distribution counts are first used to
777 // generate the code values without bit reversal.
778 for(bits = 1; bits <= maxBits; bits++)
780 code = (code + bl_count[bits-1]) << 1;
781 next_code[bits] = code;
783 // Check that the bit counts in bl_count are consistent.
784 // The last code must be all ones.
785 BOOST_ASSERT(code + bl_count[maxBits]-1 == (1<<maxBits)-1);
786 for(n = 0; n <= max_code; n++)
788 int len = tree[n].dl;
791 tree[n].fc = bi_reverse(next_code[len]++, len);
797 deflate_stream::get_lut() ->
806 // number of codes at each bit length for an optimal tree
807 //std::uint16_t bl_count[maxBits+1];
809 // Initialize the mapping length (0..255) -> length code (0..28)
811 for(std::uint8_t code = 0; code < lengthCodes-1; ++code)
813 tables.base_length[code] = length;
814 auto const run = 1U << tables.extra_lbits[code];
815 for(unsigned n = 0; n < run; ++n)
816 tables.length_code[length++] = code;
818 BOOST_ASSERT(length == 256);
819 // Note that the length 255 (match length 258) can be represented
820 // in two different ways: code 284 + 5 bits or code 285, so we
821 // overwrite length_code[255] to use the best encoding:
822 tables.length_code[255] = lengthCodes-1;
824 // Initialize the mapping dist (0..32K) -> dist code (0..29)
827 std::uint16_t dist = 0;
828 for(code = 0; code < 16; code++)
830 tables.base_dist[code] = dist;
831 auto const run = 1U << tables.extra_dbits[code];
832 for(unsigned n = 0; n < run; ++n)
833 tables.dist_code[dist++] = code;
835 BOOST_ASSERT(dist == 256);
836 // from now on, all distances are divided by 128
838 for(; code < dCodes; ++code)
840 tables.base_dist[code] = dist << 7;
841 auto const run = 1U << (tables.extra_dbits[code]-7);
842 for(std::size_t n = 0; n < run; ++n)
843 tables.dist_code[256 + dist++] = code;
845 BOOST_ASSERT(dist == 256);
848 // Construct the codes of the static literal tree
849 std::uint16_t bl_count[maxBits+1];
850 std::memset(bl_count, 0, sizeof(bl_count));
853 tables.ltree[n++].dl = 8;
856 tables.ltree[n++].dl = 9;
859 tables.ltree[n++].dl = 7;
862 tables.ltree[n++].dl = 8;
864 // Codes 286 and 287 do not exist, but we must include them in the tree
865 // construction to get a canonical Huffman tree (longest code all ones)
866 gen_codes(tables.ltree, lCodes+1, bl_count);
868 for(n = 0; n < dCodes; ++n)
870 tables.dtree[n].dl = 5;
872 static_cast<std::uint16_t>(bi_reverse(n, 5));
876 static init const data;
889 if(level == Z_DEFAULT_COMPRESSION)
892 // VFALCO What do we do about this?
893 // until 256-byte window bug fixed
897 using beast::detail::make_exception;
899 if(level < 0 || level > 9)
900 throw make_exception<std::invalid_argument>(
901 "invalid level", __FILE__, __LINE__);
903 if(windowBits < 8 || windowBits > 15)
904 throw make_exception<std::invalid_argument>(
905 "invalid windowBits", __FILE__, __LINE__);
907 if(memLevel < 1 || memLevel > MAX_MEM_LEVEL)
908 throw make_exception<std::invalid_argument>(
909 "invalid memLevel", __FILE__, __LINE__);
911 w_bits_ = windowBits;
913 hash_bits_ = memLevel + 7;
915 // 16K elements by default
916 lit_bufsize_ = 1 << (memLevel + 6);
919 strategy_ = strategy;
943 doUpperBound(std::size_t sourceLen) const
948 /* conservative upper bound for compressed data */
949 complen = sourceLen +
950 ((sourceLen + 7) >> 3) + ((sourceLen + 63) >> 6) + 5;
952 /* compute wrapper length */
955 /* if not default parameters, return conservative bound */
956 if(w_bits_ != 15 || hash_bits_ != 8 + 7)
957 return complen + wraplen;
959 /* default settings: return tight bound for that case */
960 return sourceLen + (sourceLen >> 12) + (sourceLen >> 14) +
961 (sourceLen >> 25) + 13 - 6 + wraplen;
973 good_match_ = good_length;
974 nice_match_ = nice_length;
975 max_lazy_match_ = max_lazy;
976 max_chain_length_ = max_chain;
982 doParams(z_params& zs, int level, Strategy strategy, error_code& ec)
986 if(level == Z_DEFAULT_COMPRESSION)
988 if(level < 0 || level > 9)
990 ec = error::stream_error;
993 func = get_config(level_).func;
995 if((strategy != strategy_ || func != get_config(level).func) &&
998 // Flush the last buffer:
999 doWrite(zs, Flush::block, ec);
1000 if(ec == error::need_buffers && pending_ == 0)
1006 max_lazy_match_ = get_config(level).max_lazy;
1007 good_match_ = get_config(level).good_length;
1008 nice_match_ = get_config(level).nice_length;
1009 max_chain_length_ = get_config(level).max_chain;
1011 strategy_ = strategy;
1017 doWrite(z_params& zs, Flush flush, error_code& ec)
1021 if(zs.next_out == 0 || (zs.next_in == 0 && zs.avail_in != 0) ||
1022 (status_ == FINISH_STATE && flush != Flush::finish))
1024 ec = error::stream_error;
1027 if(zs.avail_out == 0)
1029 ec = error::need_buffers;
1033 // value of flush param for previous deflate call
1034 boost::optional<Flush> old_flush = last_flush_;
1035 last_flush_ = flush;
1037 // Flush as much pending output as possible
1041 if(zs.avail_out == 0)
1043 /* Since avail_out is 0, deflate will be called again with
1044 * more output space, but possibly with both pending and
1045 * avail_in equal to zero. There won't be anything to do,
1046 * but this is not an error situation so make sure we
1047 * return OK instead of BUF_ERROR at next call of deflate:
1049 last_flush_ = boost::none;
1053 else if(zs.avail_in == 0 && flush <= old_flush &&
1054 flush != Flush::finish)
1056 /* Make sure there is something to do and avoid duplicate consecutive
1057 * flushes. For repeated and useless calls with Flush::finish, we keep
1058 * returning Z_STREAM_END instead of Z_BUF_ERROR.
1060 ec = error::need_buffers;
1064 // User must not provide more input after the first FINISH:
1065 if(status_ == FINISH_STATE && zs.avail_in != 0)
1067 ec = error::need_buffers;
1071 /* Start a new block or continue the current one.
1073 if(zs.avail_in != 0 || lookahead_ != 0 ||
1074 (flush != Flush::none && status_ != FINISH_STATE))
1080 case Strategy::huffman:
1081 bstate = deflate_huff(zs, flush);
1084 bstate = deflate_rle(zs, flush);
1088 bstate = (this->*(get_config(level_).func))(zs, flush);
1093 if(bstate == finish_started || bstate == finish_done)
1095 status_ = FINISH_STATE;
1097 if(bstate == need_more || bstate == finish_started)
1099 if(zs.avail_out == 0)
1101 last_flush_ = boost::none; /* avoid BUF_ERROR next call, see above */
1104 /* If flush != Flush::none && avail_out == 0, the next call
1105 of deflate should use the same flush parameter to make sure
1106 that the flush is complete. So we don't have to output an
1107 empty block here, this will be done at next call. This also
1108 ensures that for a very small output buffer, we emit at most
1112 if(bstate == block_done)
1114 if(flush == Flush::partial)
1118 else if(flush != Flush::block)
1120 /* FULL_FLUSH or SYNC_FLUSH */
1121 tr_stored_block((char*)0, 0L, 0);
1122 /* For a full flush, this empty block will be recognized
1123 * as a special marker by inflate_sync().
1125 if(flush == Flush::full)
1127 clear_hash(); // forget history
1137 if(zs.avail_out == 0)
1139 last_flush_ = boost::none; /* avoid BUF_ERROR at next call, see above */
1145 if(flush == Flush::finish)
1147 ec = error::end_of_stream;
1152 // VFALCO Warning: untested
1156 doDictionary(Byte const* dict, uInt dictLength, error_code& ec)
1160 ec = error::stream_error;
1166 /* if dict would fill window, just replace the history */
1167 if(dictLength >= w_size_)
1173 dict += dictLength - w_size_; /* use the tail */
1174 dictLength = w_size_;
1177 /* insert dict into window and hash */
1179 zs.avail_in = dictLength;
1180 zs.next_in = (const Byte *)dict;
1184 while(lookahead_ >= minMatch)
1186 uInt str = strstart_;
1187 uInt n = lookahead_ - (minMatch-1);
1190 update_hash(ins_h_, window_[str + minMatch-1]);
1191 prev_[str & w_mask_] = head_[ins_h_];
1192 head_[ins_h_] = (std::uint16_t)str;
1197 lookahead_ = minMatch-1;
1200 strstart_ += lookahead_;
1201 block_start_ = (long)strstart_;
1202 insert_ = lookahead_;
1204 match_length_ = prev_length_ = minMatch-1;
1205 match_available_ = 0;
1211 doPrime(int bits, int value, error_code& ec)
1215 if((Byte *)(d_buf_) < pending_out_ + ((Buf_size + 7) >> 3))
1217 ec = error::need_buffers;
1223 int put = Buf_size - bi_valid_;
1226 bi_buf_ |= (std::uint16_t)((value & ((1 << put) - 1)) << bi_valid_);
1238 doPending(unsigned* value, int* bits)
1246 //--------------------------------------------------------------------------
1248 // Do lazy initialization
1254 // Caller must set these:
1261 w_size_ = 1 << w_bits_;
1262 w_mask_ = w_size_ - 1;
1264 hash_size_ = 1 << hash_bits_;
1265 hash_mask_ = hash_size_ - 1;
1266 hash_shift_ = ((hash_bits_+minMatch-1)/minMatch);
1268 auto const nwindow = w_size_ * 2*sizeof(Byte);
1269 auto const nprev = w_size_ * sizeof(std::uint16_t);
1270 auto const nhead = hash_size_ * sizeof(std::uint16_t);
1271 auto const noverlay = lit_bufsize_ * (sizeof(std::uint16_t)+2);
1272 auto const needed = nwindow + nprev + nhead + noverlay;
1274 if(! buf_ || buf_size_ != needed)
1276 buf_.reset(new std::uint8_t[needed]);
1280 window_ = reinterpret_cast<Byte*>(buf_.get());
1281 prev_ = reinterpret_cast<std::uint16_t*>(buf_.get() + nwindow);
1282 head_ = reinterpret_cast<std::uint16_t*>(buf_.get() + nwindow + nprev);
1284 /* We overlay pending_buf_ and d_buf_ + l_buf_. This works
1285 since the average output size for(length, distance)
1286 codes is <= 24 bits.
1288 auto overlay = reinterpret_cast<std::uint16_t*>(
1289 buf_.get() + nwindow + nprev + nhead);
1291 // nothing written to window_ yet
1295 reinterpret_cast<std::uint8_t*>(overlay);
1297 static_cast<std::uint32_t>(lit_bufsize_) *
1298 (sizeof(std::uint16_t) + 2L);
1300 d_buf_ = overlay + lit_bufsize_ / sizeof(std::uint16_t);
1301 l_buf_ = pending_buf_ + (1 + sizeof(std::uint16_t)) * lit_bufsize_;
1304 pending_out_ = pending_buf_;
1306 status_ = BUSY_STATE;
1307 last_flush_ = Flush::none;
1315 /* Initialize the "longest match" routines for a new zlib stream
1322 window_size_ = (std::uint32_t)2L*w_size_;
1326 /* Set the default configuration parameters:
1328 // VFALCO TODO just copy the config struct
1329 max_lazy_match_ = get_config(level_).max_lazy;
1330 good_match_ = get_config(level_).good_length;
1331 nice_match_ = get_config(level_).nice_length;
1332 max_chain_length_ = get_config(level_).max_chain;
1338 match_length_ = prev_length_ = minMatch-1;
1339 match_available_ = 0;
1343 // Initialize a new block.
1350 for(int n = 0; n < lCodes; n++)
1351 dyn_ltree_[n].fc = 0;
1352 for(int n = 0; n < dCodes; n++)
1353 dyn_dtree_[n].fc = 0;
1354 for(int n = 0; n < blCodes; n++)
1356 dyn_ltree_[END_BLOCK].fc = 1;
1363 /* Restore the heap property by moving down the tree starting at node k,
1364 exchanging a node with the smallest of its two sons if necessary,
1365 stopping when the heap property is re-established (each father smaller
1372 ct_data const* tree, // the tree to restore
1373 int k) // node to move down
1376 int j = k << 1; // left son of k
1377 while(j <= heap_len_)
1379 // Set j to the smallest of the two sons:
1381 smaller(tree, heap_[j+1], heap_[j]))
1383 // Exit if v is smaller than both sons
1384 if(smaller(tree, v, heap_[j]))
1387 // Exchange v with the smallest son
1388 heap_[k] = heap_[j];
1391 // And continue down the tree,
1392 // setting j to the left son of k
1398 /* Remove the smallest element from the heap and recreate the heap
1399 with one less element. Updates heap and heap_len.
1405 pqremove(ct_data const* tree, int& top)
1407 top = heap_[kSmallest];
1408 heap_[kSmallest] = heap_[heap_len_--];
1409 pqdownheap(tree, kSmallest);
1412 /* Compute the optimal bit lengths for a tree and update the total bit length
1413 for the current block.
1414 IN assertion: the fields freq and dad are set, heap[heap_max] and
1415 above are the tree nodes sorted by increasing frequency.
1416 OUT assertions: the field len is set to the optimal bit length, the
1417 array bl_count contains the frequencies for each bit length.
1418 The length opt_len is updated; static_len is also updated if stree is
1424 gen_bitlen(tree_desc *desc)
1426 ct_data *tree = desc->dyn_tree;
1427 int max_code = desc->max_code;
1428 ct_data const* stree = desc->stat_desc->static_tree;
1429 std::uint8_t const *extra = desc->stat_desc->extra_bits;
1430 int base = desc->stat_desc->extra_base;
1431 int max_length = desc->stat_desc->max_length;
1432 int h; // heap index
1433 int n, m; // iterate over the tree elements
1434 int bits; // bit length
1435 int xbits; // extra bits
1436 std::uint16_t f; // frequency
1437 int overflow = 0; // number of elements with bit length too large
1439 std::fill(&bl_count_[0], &bl_count_[maxBits+1], 0);
1441 /* In a first pass, compute the optimal bit lengths (which may
1442 * overflow in the case of the bit length tree).
1444 tree[heap_[heap_max_]].dl = 0; // root of the heap
1446 for(h = heap_max_+1; h < HEAP_SIZE; h++) {
1448 bits = tree[tree[n].dl].dl + 1;
1449 if(bits > max_length) bits = max_length, overflow++;
1450 // We overwrite tree[n].dl which is no longer needed
1451 tree[n].dl = (std::uint16_t)bits;
1454 continue; // not a leaf node
1459 xbits = extra[n-base];
1461 opt_len_ += (std::uint32_t)f * (bits + xbits);
1463 static_len_ += (std::uint32_t)f * (stree[n].dl + xbits);
1468 // Find the first bit length which could increase:
1471 bits = max_length-1;
1472 while(bl_count_[bits] == 0)
1474 bl_count_[bits]--; // move one leaf down the tree
1475 bl_count_[bits+1] += 2; // move one overflow item as its brother
1476 bl_count_[max_length]--;
1477 /* The brother of the overflow item also moves one step up,
1478 * but this does not affect bl_count[max_length]
1482 while(overflow > 0);
1484 /* Now recompute all bit lengths, scanning in increasing frequency.
1485 * h is still equal to HEAP_SIZE. (It is simpler to reconstruct all
1486 * lengths instead of fixing only the wrong ones. This idea is taken
1487 * from 'ar' written by Haruhiko Okumura.)
1489 for(bits = max_length; bits != 0; bits--)
1491 n = bl_count_[bits];
1497 if((unsigned) tree[m].dl != (unsigned) bits)
1499 opt_len_ += ((long)bits - (long)tree[m].dl) *(long)tree[m].fc;
1500 tree[m].dl = (std::uint16_t)bits;
1507 /* Construct one Huffman tree and assigns the code bit strings and lengths.
1508 Update the total bit length for the current block.
1509 IN assertion: the field freq is set for all tree elements.
1510 OUT assertions: the fields len and code are set to the optimal bit length
1511 and corresponding code. The length opt_len is updated; static_len is
1512 also updated if stree is not null. The field max_code is set.
1517 build_tree(tree_desc *desc)
1519 ct_data *tree = desc->dyn_tree;
1520 ct_data const* stree = desc->stat_desc->static_tree;
1521 int elems = desc->stat_desc->elems;
1522 int n, m; // iterate over heap elements
1523 int max_code = -1; // largest code with non zero frequency
1524 int node; // new node being created
1526 /* Construct the initial heap, with least frequent element in
1527 * heap[kSmallest]. The sons of heap[n] are heap[2*n] and heap[2*n+1].
1528 * heap[0] is not used.
1531 heap_max_ = HEAP_SIZE;
1533 for(n = 0; n < elems; n++)
1537 heap_[++(heap_len_)] = max_code = n;
1546 /* The pkzip format requires that at least one distance code exists,
1547 * and that at least one bit should be sent even if there is only one
1548 * possible code. So to avoid special checks later on we force at least
1549 * two codes of non zero frequency.
1551 while(heap_len_ < 2)
1553 node = heap_[++(heap_len_)] = (max_code < 2 ? ++max_code : 0);
1558 static_len_ -= stree[node].dl;
1559 // node is 0 or 1 so it does not have extra bits
1561 desc->max_code = max_code;
1563 /* The elements heap[heap_len/2+1 .. heap_len] are leaves of the tree,
1564 * establish sub-heaps of increasing lengths:
1566 for(n = heap_len_/2; n >= 1; n--)
1567 pqdownheap(tree, n);
1569 /* Construct the Huffman tree by repeatedly combining the least two
1572 node = elems; /* next internal node of the tree */
1575 pqremove(tree, n); /* n = node of least frequency */
1576 m = heap_[kSmallest]; /* m = node of next least frequency */
1578 heap_[--(heap_max_)] = n; /* keep the nodes sorted by frequency */
1579 heap_[--(heap_max_)] = m;
1581 /* Create a new node father of n and m */
1582 tree[node].fc = tree[n].fc + tree[m].fc;
1583 depth_[node] = (std::uint8_t)((depth_[n] >= depth_[m] ?
1584 depth_[n] : depth_[m]) + 1);
1585 tree[n].dl = tree[m].dl = (std::uint16_t)node;
1586 /* and insert the new node in the heap */
1587 heap_[kSmallest] = node++;
1588 pqdownheap(tree, kSmallest);
1591 while(heap_len_ >= 2);
1593 heap_[--(heap_max_)] = heap_[kSmallest];
1595 /* At this point, the fields freq and dad are set. We can now
1596 * generate the bit lengths.
1598 gen_bitlen((tree_desc *)desc);
1600 /* The field len is now set, we can generate the bit codes */
1601 gen_codes(tree, max_code, bl_count_);
1604 /* Scan a literal or distance tree to determine the frequencies
1605 of the codes in the bit length tree.
1611 ct_data *tree, // the tree to be scanned
1612 int max_code) // and its largest code of non zero frequency
1614 int n; // iterates over all tree elements
1615 int prevlen = -1; // last emitted length
1616 int curlen; // length of current code
1617 int nextlen = tree[0].dl; // length of next code
1618 int count = 0; // repeat count of the current code
1619 int max_count = 7; // max repeat count
1620 int min_count = 4; // min repeat count
1627 tree[max_code+1].dl = (std::uint16_t)0xffff; // guard
1629 for(n = 0; n <= max_code; n++)
1631 curlen = nextlen; nextlen = tree[n+1].dl;
1632 if(++count < max_count && curlen == nextlen)
1636 else if(count < min_count)
1638 bl_tree_[curlen].fc += count;
1640 else if(curlen != 0)
1642 if(curlen != prevlen) bl_tree_[curlen].fc++;
1643 bl_tree_[REP_3_6].fc++;
1645 else if(count <= 10)
1647 bl_tree_[REPZ_3_10].fc++;
1651 bl_tree_[REPZ_11_138].fc++;
1660 else if(curlen == nextlen)
1673 /* Send a literal or distance tree in compressed form,
1674 using the codes in bl_tree.
1680 ct_data *tree, // the tree to be scanned
1681 int max_code) // and its largest code of non zero frequency
1683 int n; // iterates over all tree elements
1684 int prevlen = -1; // last emitted length
1685 int curlen; // length of current code
1686 int nextlen = tree[0].dl; // length of next code
1687 int count = 0; // repeat count of the current code
1688 int max_count = 7; // max repeat count
1689 int min_count = 4; // min repeat count
1691 // tree[max_code+1].dl = -1; // guard already set
1698 for(n = 0; n <= max_code; n++)
1701 nextlen = tree[n+1].dl;
1702 if(++count < max_count && curlen == nextlen)
1706 else if(count < min_count)
1710 send_code(curlen, bl_tree_);
1712 while (--count != 0);
1714 else if(curlen != 0)
1716 if(curlen != prevlen)
1718 send_code(curlen, bl_tree_);
1721 BOOST_ASSERT(count >= 3 && count <= 6);
1722 send_code(REP_3_6, bl_tree_);
1723 send_bits(count-3, 2);
1725 else if(count <= 10)
1727 send_code(REPZ_3_10, bl_tree_);
1728 send_bits(count-3, 3);
1732 send_code(REPZ_11_138, bl_tree_);
1733 send_bits(count-11, 7);
1742 else if(curlen == nextlen)
1755 /* Construct the Huffman tree for the bit lengths and return
1756 the index in bl_order of the last bit length code to send.
1763 int max_blindex; // index of last bit length code of non zero freq
1765 // Determine the bit length frequencies for literal and distance trees
1766 scan_tree((ct_data *)dyn_ltree_, l_desc_.max_code);
1767 scan_tree((ct_data *)dyn_dtree_, d_desc_.max_code);
1769 // Build the bit length tree:
1770 build_tree((tree_desc *)(&(bl_desc_)));
1771 /* opt_len now includes the length of the tree representations, except
1772 * the lengths of the bit lengths codes and the 5+5+4 bits for the counts.
1775 /* Determine the number of bit length codes to send. The pkzip format
1776 * requires that at least 4 bit length codes be sent. (appnote.txt says
1777 * 3 but the actual value used is 4.)
1779 for(max_blindex = blCodes-1; max_blindex >= 3; max_blindex--)
1781 if(bl_tree_[lut_.bl_order[max_blindex]].dl != 0)
1784 // Update opt_len to include the bit length tree and counts
1785 opt_len_ += 3*(max_blindex+1) + 5+5+4;
1789 /* Send the header for a block using dynamic Huffman trees: the counts,
1790 the lengths of the bit length codes, the literal tree and the distance
1792 IN assertion: lcodes >= 257, dcodes >= 1, blcodes >= 4.
1800 int blcodes) // number of codes for each tree
1802 int rank; // index in bl_order
1804 BOOST_ASSERT(lcodes >= 257 && dcodes >= 1 && blcodes >= 4);
1805 BOOST_ASSERT(lcodes <= lCodes && dcodes <= dCodes && blcodes <= blCodes);
1806 send_bits(lcodes-257, 5); // not +255 as stated in appnote.txt
1807 send_bits(dcodes-1, 5);
1808 send_bits(blcodes-4, 4); // not -3 as stated in appnote.txt
1809 for(rank = 0; rank < blcodes; rank++)
1810 send_bits(bl_tree_[lut_.bl_order[rank]].dl, 3);
1811 send_tree((ct_data *)dyn_ltree_, lcodes-1); // literal tree
1812 send_tree((ct_data *)dyn_dtree_, dcodes-1); // distance tree
1815 /* Send the block data compressed using the given Huffman trees
1821 ct_data const* ltree, // literal tree
1822 ct_data const* dtree) // distance tree
1824 unsigned dist; /* distance of matched string */
1825 int lc; /* match length or unmatched char (if dist == 0) */
1826 unsigned lx = 0; /* running index in l_buf */
1827 unsigned code; /* the code to send */
1828 int extra; /* number of extra bits to send */
1838 send_code(lc, ltree); /* send a literal byte */
1842 /* Here, lc is the match length - minMatch */
1843 code = lut_.length_code[lc];
1844 send_code(code+literals+1, ltree); /* send the length code */
1845 extra = lut_.extra_lbits[code];
1848 lc -= lut_.base_length[code];
1849 send_bits(lc, extra); /* send the extra length bits */
1851 dist--; /* dist is now the match distance - 1 */
1852 code = d_code(dist);
1853 BOOST_ASSERT(code < dCodes);
1855 send_code(code, dtree); /* send the distance code */
1856 extra = lut_.extra_dbits[code];
1859 dist -= lut_.base_dist[code];
1860 send_bits(dist, extra); /* send the extra distance bits */
1862 } /* literal or match pair ? */
1864 /* Check that the overlay between pending_buf and d_buf+l_buf is ok: */
1865 BOOST_ASSERT((uInt)(pending_) < lit_bufsize_ + 2*lx);
1867 while(lx < last_lit_);
1870 send_code(END_BLOCK, ltree);
1873 /* Check if the data type is TEXT or BINARY, using the following algorithm:
1874 - TEXT if the two conditions below are satisfied:
1875 a) There are no non-portable control characters belonging to the
1876 "black list" (0..6, 14..25, 28..31).
1877 b) There is at least one printable character belonging to the
1878 "white list" (9 {TAB}, 10 {LF}, 13 {CR}, 32..255).
1880 - The following partially-portable control characters form a
1881 "gray list" that is ignored in this detection algorithm:
1882 (7 {BEL}, 8 {BS}, 11 {VT}, 12 {FF}, 26 {SUB}, 27 {ESC}).
1883 IN assertion: the fields fc of dyn_ltree are set.
1890 /* black_mask is the bit mask of black-listed bytes
1891 * set bits 0..6, 14..25, and 28..31
1892 * 0xf3ffc07f = binary 11110011111111111100000001111111
1894 unsigned long black_mask = 0xf3ffc07fUL;
1897 // Check for non-textual ("black-listed") bytes.
1898 for(n = 0; n <= 31; n++, black_mask >>= 1)
1899 if((black_mask & 1) && (dyn_ltree_[n].fc != 0))
1902 // Check for textual ("white-listed") bytes. */
1903 if(dyn_ltree_[9].fc != 0 || dyn_ltree_[10].fc != 0
1904 || dyn_ltree_[13].fc != 0)
1906 for(n = 32; n < literals; n++)
1907 if(dyn_ltree_[n].fc != 0)
1910 /* There are no "black-listed" or "white-listed" bytes:
1911 * this stream either is empty or has tolerated ("gray-listed") bytes only.
1916 /* Flush the bit buffer and align the output on a byte boundary
1925 else if(bi_valid_ > 0)
1926 put_byte((Byte)bi_buf_);
1931 /* Flush the bit buffer, keeping at most 7 bits in it.
1944 else if(bi_valid_ >= 8)
1946 put_byte((Byte)bi_buf_);
1952 /* Copy a stored block, storing first the length and its
1953 one's complement if requested.
1959 char *buf, // the input data
1960 unsigned len, // its length
1961 int header) // true if block header must be written
1963 bi_windup(); // align on byte boundary
1967 put_short((std::uint16_t)len);
1968 put_short((std::uint16_t)~len);
1970 // VFALCO Use memcpy?
1975 //------------------------------------------------------------------------------
1977 /* Initialize the tree data structures for a new zlib stream.
1984 l_desc_.dyn_tree = dyn_ltree_;
1985 l_desc_.stat_desc = &lut_.l_desc;
1987 d_desc_.dyn_tree = dyn_dtree_;
1988 d_desc_.stat_desc = &lut_.d_desc;
1990 bl_desc_.dyn_tree = bl_tree_;
1991 bl_desc_.stat_desc = &lut_.bl_desc;
1996 // Initialize the first block of the first file:
2000 /* Send one empty static block to give enough lookahead for inflate.
2001 This takes 10 bits, of which 7 may remain in the bit buffer.
2008 send_bits(STATIC_TREES<<1, 3);
2009 send_code(END_BLOCK, lut_.ltree);
2013 /* Flush the bits in the bit buffer to pending output (leaves at most 7 bits)
2023 /* Send a stored block
2029 char *buf, // input block
2030 std::uint32_t stored_len, // length of input block
2031 int last) // one if this is the last block for a file
2033 send_bits((STORED_BLOCK<<1)+last, 3); // send block type
2034 copy_block(buf, (unsigned)stored_len, 1); // with header
2041 tr_tally_dist(std::uint16_t dist, std::uint8_t len, bool& flush)
2043 d_buf_[last_lit_] = dist;
2044 l_buf_[last_lit_++] = len;
2046 dyn_ltree_[lut_.length_code[len]+literals+1].fc++;
2047 dyn_dtree_[d_code(dist)].fc++;
2048 flush = (last_lit_ == lit_bufsize_-1);
2055 tr_tally_lit(std::uint8_t c, bool& flush)
2057 d_buf_[last_lit_] = 0;
2058 l_buf_[last_lit_++] = c;
2060 flush = (last_lit_ == lit_bufsize_-1);
2063 //------------------------------------------------------------------------------
2065 /* Determine the best encoding for the current block: dynamic trees,
2066 static trees or store, and output the encoded block to the zip file.
2073 char *buf, // input block, or NULL if too old
2074 std::uint32_t stored_len, // length of input block
2075 int last) // one if this is the last block for a file
2077 std::uint32_t opt_lenb;
2078 std::uint32_t static_lenb; // opt_len and static_len in bytes
2079 int max_blindex = 0; // index of last bit length code of non zero freq
2081 // Build the Huffman trees unless a stored block is forced
2084 // Check if the file is binary or text
2085 if(zs.data_type == Z_UNKNOWN)
2086 zs.data_type = detect_data_type();
2088 // Construct the literal and distance trees
2089 build_tree((tree_desc *)(&(l_desc_)));
2091 build_tree((tree_desc *)(&(d_desc_)));
2092 /* At this point, opt_len and static_len are the total bit lengths of
2093 * the compressed block data, excluding the tree representations.
2096 /* Build the bit length tree for the above two trees, and get the index
2097 * in bl_order of the last bit length code to send.
2099 max_blindex = build_bl_tree();
2101 /* Determine the best encoding. Compute the block lengths in bytes. */
2102 opt_lenb = (opt_len_+3+7)>>3;
2103 static_lenb = (static_len_+3+7)>>3;
2105 if(static_lenb <= opt_lenb)
2106 opt_lenb = static_lenb;
2110 // VFALCO This assertion fails even in the original ZLib,
2111 // happens with strategy == Z_HUFFMAN_ONLY, see:
2112 // https://github.com/madler/zlib/issues/172
2117 opt_lenb = static_lenb = stored_len + 5; // force a stored block
2121 if(buf != (char*)0) { /* force stored block */
2123 if(stored_len+4 <= opt_lenb && buf != (char*)0) {
2124 /* 4: two words for the lengths */
2126 /* The test buf != NULL is only necessary if LIT_BUFSIZE > WSIZE.
2127 * Otherwise we can't have processed more than WSIZE input bytes since
2128 * the last block flush, because compression would have been
2129 * successful. If LIT_BUFSIZE <= WSIZE, it is never too late to
2130 * transform a block into a stored block.
2132 tr_stored_block(buf, stored_len, last);
2136 else if(static_lenb >= 0)
2138 // force static trees
2141 else if(strategy_ == Strategy::fixed || static_lenb == opt_lenb)
2144 send_bits((STATIC_TREES<<1)+last, 3);
2145 compress_block(lut_.ltree, lut_.dtree);
2149 send_bits((DYN_TREES<<1)+last, 3);
2150 send_all_trees(l_desc_.max_code+1, d_desc_.max_code+1,
2152 compress_block((const ct_data *)dyn_ltree_,
2153 (const ct_data *)dyn_dtree_);
2155 /* The above check is made mod 2^32, for files larger than 512 MB
2156 * and std::size_t implemented on 32 bits.
2167 fill_window(z_params& zs)
2170 unsigned more; // Amount of free space at the end of the window.
2172 uInt wsize = w_size_;
2176 more = (unsigned)(window_size_ -
2177 (std::uint32_t)lookahead_ -(std::uint32_t)strstart_);
2179 // VFALCO We don't support systems below 32-bit
2181 // Deal with !@#$% 64K limit:
2182 if(sizeof(int) <= 2)
2184 if(more == 0 && strstart_ == 0 && lookahead_ == 0)
2188 else if(more == (unsigned)(-1))
2190 /* Very unlikely, but possible on 16 bit machine if
2191 * strstart == 0 && lookahead == 1 (input done a byte at time)
2198 /* If the window is almost full and there is insufficient lookahead,
2199 move the upper half to the lower one to make room in the upper half.
2201 if(strstart_ >= wsize+max_dist())
2203 std::memcpy(window_, window_+wsize, (unsigned)wsize);
2204 match_start_ -= wsize;
2205 strstart_ -= wsize; // we now have strstart >= max_dist
2206 block_start_ -= (long) wsize;
2208 /* Slide the hash table (could be avoided with 32 bit values
2209 at the expense of memory usage). We slide even when level == 0
2210 to keep the hash table consistent if we switch back to level > 0
2211 later. (Using level 0 permanently is not an optimal usage of
2212 zlib, so we don't care about this pathological case.)
2219 *p = (std::uint16_t)(m >= wsize ? m-wsize : 0);
2228 *p = (std::uint16_t)(m >= wsize ? m-wsize : 0);
2229 /* If n is not on any hash chain, prev[n] is garbage but
2230 its value will never be used.
2236 if(zs.avail_in == 0)
2239 /* If there was no sliding:
2240 strstart <= WSIZE+max_dist-1 && lookahead <= kMinLookahead - 1 &&
2241 more == window_size - lookahead - strstart
2242 => more >= window_size - (kMinLookahead-1 + WSIZE + max_dist-1)
2243 => more >= window_size - 2*WSIZE + 2
2244 In the BIG_MEM or MMAP case (not yet supported),
2245 window_size == input_size + kMinLookahead &&
2246 strstart + lookahead_ <= input_size => more >= kMinLookahead.
2247 Otherwise, window_size == 2*WSIZE so more >= 2.
2248 If there was sliding, more >= WSIZE. So in all cases, more >= 2.
2250 n = read_buf(zs, window_ + strstart_ + lookahead_, more);
2253 // Initialize the hash value now that we have some input:
2254 if(lookahead_ + insert_ >= minMatch)
2256 uInt str = strstart_ - insert_;
2257 ins_h_ = window_[str];
2258 update_hash(ins_h_, window_[str + 1]);
2261 update_hash(ins_h_, window_[str + minMatch-1]);
2262 prev_[str & w_mask_] = head_[ins_h_];
2263 head_[ins_h_] = (std::uint16_t)str;
2266 if(lookahead_ + insert_ < minMatch)
2270 /* If the whole input has less than minMatch bytes, ins_h is garbage,
2271 but this is not important since only literal bytes will be emitted.
2274 while(lookahead_ < kMinLookahead && zs.avail_in != 0);
2276 /* If the kWinInit bytes after the end of the current data have never been
2277 written, then zero those bytes in order to avoid memory check reports of
2278 the use of uninitialized (or uninitialised as Julian writes) bytes by
2279 the longest match routines. Update the high water mark for the next
2280 time through here. kWinInit is set to maxMatch since the longest match
2281 routines allow scanning to strstart + maxMatch, ignoring lookahead.
2283 if(high_water_ < window_size_)
2285 std::uint32_t curr = strstart_ + (std::uint32_t)(lookahead_);
2288 if(high_water_ < curr)
2290 /* Previous high water mark below current data -- zero kWinInit
2291 bytes or up to end of window, whichever is less.
2293 init = window_size_ - curr;
2296 std::memset(window_ + curr, 0, (unsigned)init);
2297 high_water_ = curr + init;
2299 else if(high_water_ < (std::uint32_t)curr + kWinInit)
2301 /* High water mark at or above current data, but below current data
2302 plus kWinInit -- zero out to current data plus kWinInit, or up
2303 to end of window, whichever is less.
2305 init = (std::uint32_t)curr + kWinInit - high_water_;
2306 if(init > window_size_ - high_water_)
2307 init = window_size_ - high_water_;
2308 std::memset(window_ + high_water_, 0, (unsigned)init);
2309 high_water_ += init;
2314 /* Flush as much pending output as possible. All write() output goes
2315 through this function so some applications may wish to modify it
2316 to avoid allocating a large strm->next_out buffer and copying into it.
2317 (See also read_buf()).
2322 flush_pending(z_params& zs)
2325 auto len = clamp(pending_, zs.avail_out);
2329 std::memcpy(zs.next_out, pending_out_, len);
2331 static_cast<std::uint8_t*>(zs.next_out) + len;
2332 pending_out_ += len;
2333 zs.total_out += len;
2334 zs.avail_out -= len;
2337 pending_out_ = pending_buf_;
2340 /* Flush the current block, with given end-of-file flag.
2341 IN assertion: strstart is set to the end of the current match.
2347 flush_block(z_params& zs, bool last)
2350 (block_start_ >= 0L ?
2351 (char *)&window_[(unsigned)block_start_] :
2353 (std::uint32_t)((long)strstart_ - block_start_),
2355 block_start_ = strstart_;
2359 /* Read a new buffer from the current input stream, update the adler32
2360 and total number of bytes read. All write() input goes through
2361 this function so some applications may wish to modify it to avoid
2362 allocating a large strm->next_in buffer and copying from it.
2363 (See also flush_pending()).
2368 read_buf(z_params& zs, Byte *buf, unsigned size)
2370 auto len = clamp(zs.avail_in, size);
2376 std::memcpy(buf, zs.next_in, len);
2377 zs.next_in = static_cast<
2378 std::uint8_t const*>(zs.next_in) + len;
2383 /* Set match_start to the longest match starting at the given string and
2384 return its length. Matches shorter or equal to prev_length are discarded,
2385 in which case the result is equal to prev_length and match_start is
2387 IN assertions: cur_match is the head of the hash chain for the current
2388 string (strstart) and its distance is <= max_dist, and prev_length >= 1
2389 OUT assertion: the match length is not greater than s->lookahead_.
2391 For 80x86 and 680x0, an optimized version will be provided in match.asm or
2392 match.S. The code will be functionally equivalent.
2397 longest_match(IPos cur_match)
2399 unsigned chain_length = max_chain_length_;/* max hash chain length */
2400 Byte *scan = window_ + strstart_; /* current string */
2401 Byte *match; /* matched string */
2402 int len; /* length of current match */
2403 int best_len = prev_length_; /* best match length so far */
2404 int nice_match = nice_match_; /* stop if match long enough */
2405 IPos limit = strstart_ > (IPos)max_dist() ?
2406 strstart_ - (IPos)max_dist() : 0;
2407 /* Stop when cur_match becomes <= limit. To simplify the code,
2408 * we prevent matches with the string of window index 0.
2410 std::uint16_t *prev = prev_;
2411 uInt wmask = w_mask_;
2413 Byte *strend = window_ + strstart_ + maxMatch;
2414 Byte scan_end1 = scan[best_len-1];
2415 Byte scan_end = scan[best_len];
2417 /* The code is optimized for HASH_BITS >= 8 and maxMatch-2 multiple of 16.
2418 * It is easy to get rid of this optimization if necessary.
2420 BOOST_ASSERT(hash_bits_ >= 8 && maxMatch == 258);
2422 /* Do not waste too much time if we already have a good match: */
2423 if(prev_length_ >= good_match_) {
2426 /* Do not look for matches beyond the end of the input. This is necessary
2427 * to make deflate deterministic.
2429 if((uInt)nice_match > lookahead_)
2430 nice_match = lookahead_;
2432 BOOST_ASSERT((std::uint32_t)strstart_ <= window_size_-kMinLookahead);
2435 BOOST_ASSERT(cur_match < strstart_);
2436 match = window_ + cur_match;
2438 /* Skip to next match if the match length cannot increase
2439 * or if the match length is less than 2. Note that the checks below
2440 * for insufficient lookahead only occur occasionally for performance
2441 * reasons. Therefore uninitialized memory will be accessed, and
2442 * conditional jumps will be made that depend on those values.
2443 * However the length of the match is limited to the lookahead, so
2444 * the output of deflate is not affected by the uninitialized values.
2446 if( match[best_len] != scan_end ||
2447 match[best_len-1] != scan_end1 ||
2449 *++match != scan[1])
2452 /* The check at best_len-1 can be removed because it will be made
2453 * again later. (This heuristic is not always a win.)
2454 * It is not necessary to compare scan[2] and match[2] since they
2455 * are always equal when the other bytes match, given that
2456 * the hash keys are equal and that HASH_BITS >= 8.
2459 BOOST_ASSERT(*scan == *match);
2461 /* We check for insufficient lookahead only every 8th comparison;
2462 * the 256th check will be made at strstart+258.
2467 while( *++scan == *++match && *++scan == *++match &&
2468 *++scan == *++match && *++scan == *++match &&
2469 *++scan == *++match && *++scan == *++match &&
2470 *++scan == *++match && *++scan == *++match &&
2473 BOOST_ASSERT(scan <= window_+(unsigned)(window_size_-1));
2475 len = maxMatch - (int)(strend - scan);
2476 scan = strend - maxMatch;
2478 if(len > best_len) {
2479 match_start_ = cur_match;
2481 if(len >= nice_match) break;
2482 scan_end1 = scan[best_len-1];
2483 scan_end = scan[best_len];
2486 while((cur_match = prev[cur_match & wmask]) > limit
2487 && --chain_length != 0);
2489 if((uInt)best_len <= lookahead_)
2490 return (uInt)best_len;
2494 //------------------------------------------------------------------------------
2496 /* Copy without compression as much as possible from the input stream, return
2497 the current block state.
2498 This function does not insert new strings in the dictionary since
2499 uncompressible data is probably not useful. This function is used
2500 only for the level=0 compression option.
2501 NOTE: this function should be optimized to avoid extra copying from
2502 window to pending_buf.
2508 f_stored(z_params& zs, Flush flush) ->
2511 /* Stored blocks are limited to 0xffff bytes, pending_buf is limited
2512 * to pending_buf_size, and each stored block has a 5 byte header:
2514 std::uint32_t max_block_size = 0xffff;
2515 std::uint32_t max_start;
2517 if(max_block_size > pending_buf_size_ - 5) {
2518 max_block_size = pending_buf_size_ - 5;
2521 /* Copy as much as possible from input to output: */
2523 /* Fill the window as much as possible: */
2524 if(lookahead_ <= 1) {
2526 BOOST_ASSERT(strstart_ < w_size_+max_dist() ||
2527 block_start_ >= (long)w_size_);
2530 if(lookahead_ == 0 && flush == Flush::none)
2533 if(lookahead_ == 0) break; /* flush the current block */
2535 BOOST_ASSERT(block_start_ >= 0L);
2537 strstart_ += lookahead_;
2540 /* Emit a stored block if pending_buf will be full: */
2541 max_start = block_start_ + max_block_size;
2542 if(strstart_ == 0 || (std::uint32_t)strstart_ >= max_start) {
2543 /* strstart == 0 is possible when wraparound on 16-bit machine */
2544 lookahead_ = (uInt)(strstart_ - max_start);
2545 strstart_ = (uInt)max_start;
2546 flush_block(zs, false);
2547 if(zs.avail_out == 0)
2550 /* Flush if we may have to slide, otherwise block_start may become
2551 * negative and the data will be gone:
2553 if(strstart_ - (uInt)block_start_ >= max_dist()) {
2554 flush_block(zs, false);
2555 if(zs.avail_out == 0)
2560 if(flush == Flush::finish)
2562 flush_block(zs, true);
2563 if(zs.avail_out == 0)
2564 return finish_started;
2567 if((long)strstart_ > block_start_)
2569 flush_block(zs, false);
2570 if(zs.avail_out == 0)
2576 /* Compress as much as possible from the input stream, return the current
2578 This function does not perform lazy evaluation of matches and inserts
2579 new strings in the dictionary only for unmatched strings or for short
2580 matches. It is used only for the fast compression options.
2586 f_fast(z_params& zs, Flush flush) ->
2589 IPos hash_head; /* head of the hash chain */
2590 bool bflush; /* set if current block must be flushed */
2594 /* Make sure that we always have enough lookahead, except
2595 * at the end of the input file. We need maxMatch bytes
2596 * for the next match, plus minMatch bytes to insert the
2597 * string following the next match.
2599 if(lookahead_ < kMinLookahead)
2602 if(lookahead_ < kMinLookahead && flush == Flush::none)
2605 break; /* flush the current block */
2608 /* Insert the string window[strstart .. strstart+2] in the
2609 * dictionary, and set hash_head to the head of the hash chain:
2612 if(lookahead_ >= minMatch) {
2613 insert_string(hash_head);
2616 /* Find the longest match, discarding those <= prev_length.
2617 * At this point we have always match_length < minMatch
2619 if(hash_head != 0 && strstart_ - hash_head <= max_dist()) {
2620 /* To simplify the code, we prevent matches with the string
2621 * of window index 0 (in particular we have to avoid a match
2622 * of the string with itself at the start of the input file).
2624 match_length_ = longest_match (hash_head);
2625 /* longest_match() sets match_start */
2627 if(match_length_ >= minMatch)
2629 tr_tally_dist(strstart_ - match_start_,
2630 match_length_ - minMatch, bflush);
2632 lookahead_ -= match_length_;
2634 /* Insert new strings in the hash table only if the match length
2635 * is not too large. This saves time but degrades compression.
2637 if(match_length_ <= max_lazy_match_ &&
2638 lookahead_ >= minMatch) {
2639 match_length_--; /* string at strstart already in table */
2643 insert_string(hash_head);
2644 /* strstart never exceeds WSIZE-maxMatch, so there are
2645 * always minMatch bytes ahead.
2648 while(--match_length_ != 0);
2653 strstart_ += match_length_;
2655 ins_h_ = window_[strstart_];
2656 update_hash(ins_h_, window_[strstart_+1]);
2657 /* If lookahead < minMatch, ins_h is garbage, but it does not
2658 * matter since it will be recomputed at next deflate call.
2664 /* No match, output a literal byte */
2665 tr_tally_lit(window_[strstart_], bflush);
2671 flush_block(zs, false);
2672 if(zs.avail_out == 0)
2676 insert_ = strstart_ < minMatch-1 ? strstart_ : minMatch-1;
2677 if(flush == Flush::finish)
2679 flush_block(zs, true);
2680 if(zs.avail_out == 0)
2681 return finish_started;
2686 flush_block(zs, false);
2687 if(zs.avail_out == 0)
2693 /* Same as above, but achieves better compression. We use a lazy
2694 evaluation for matches: a match is finally adopted only if there is
2695 no better match at the next window position.
2701 f_slow(z_params& zs, Flush flush) ->
2704 IPos hash_head; /* head of hash chain */
2705 bool bflush; /* set if current block must be flushed */
2707 /* Process the input block. */
2710 /* Make sure that we always have enough lookahead, except
2711 * at the end of the input file. We need maxMatch bytes
2712 * for the next match, plus minMatch bytes to insert the
2713 * string following the next match.
2715 if(lookahead_ < kMinLookahead)
2718 if(lookahead_ < kMinLookahead && flush == Flush::none)
2721 break; /* flush the current block */
2724 /* Insert the string window[strstart .. strstart+2] in the
2725 * dictionary, and set hash_head to the head of the hash chain:
2728 if(lookahead_ >= minMatch)
2729 insert_string(hash_head);
2731 /* Find the longest match, discarding those <= prev_length.
2733 prev_length_ = match_length_, prev_match_ = match_start_;
2734 match_length_ = minMatch-1;
2736 if(hash_head != 0 && prev_length_ < max_lazy_match_ &&
2737 strstart_ - hash_head <= max_dist())
2739 /* To simplify the code, we prevent matches with the string
2740 * of window index 0 (in particular we have to avoid a match
2741 * of the string with itself at the start of the input file).
2743 match_length_ = longest_match(hash_head);
2744 /* longest_match() sets match_start */
2746 if(match_length_ <= 5 && (strategy_ == Strategy::filtered
2747 || (match_length_ == minMatch &&
2748 strstart_ - match_start_ > kTooFar)
2751 /* If prev_match is also minMatch, match_start is garbage
2752 * but we will ignore the current match anyway.
2754 match_length_ = minMatch-1;
2757 /* If there was a match at the previous step and the current
2758 * match is not better, output the previous match:
2760 if(prev_length_ >= minMatch && match_length_ <= prev_length_)
2762 /* Do not insert strings in hash table beyond this. */
2763 uInt max_insert = strstart_ + lookahead_ - minMatch;
2765 tr_tally_dist(strstart_ -1 - prev_match_,
2766 prev_length_ - minMatch, bflush);
2768 /* Insert in hash table all strings up to the end of the match.
2769 * strstart-1 and strstart are already inserted. If there is not
2770 * enough lookahead, the last two strings are not inserted in
2773 lookahead_ -= prev_length_-1;
2776 if(++strstart_ <= max_insert)
2777 insert_string(hash_head);
2779 while(--prev_length_ != 0);
2780 match_available_ = 0;
2781 match_length_ = minMatch-1;
2786 flush_block(zs, false);
2787 if(zs.avail_out == 0)
2792 else if(match_available_)
2794 /* If there was no match at the previous position, output a
2795 * single literal. If there was a match but the current match
2796 * is longer, truncate the previous match to a single literal.
2798 tr_tally_lit(window_[strstart_-1], bflush);
2800 flush_block(zs, false);
2803 if(zs.avail_out == 0)
2808 /* There is no previous match to compare with, wait for
2809 * the next step to decide.
2811 match_available_ = 1;
2816 BOOST_ASSERT(flush != Flush::none);
2817 if(match_available_)
2819 tr_tally_lit(window_[strstart_-1], bflush);
2820 match_available_ = 0;
2822 insert_ = strstart_ < minMatch-1 ? strstart_ : minMatch-1;
2823 if(flush == Flush::finish)
2825 flush_block(zs, true);
2826 if(zs.avail_out == 0)
2827 return finish_started;
2832 flush_block(zs, false);
2833 if(zs.avail_out == 0)
2839 /* For Strategy::rle, simply look for runs of bytes, generate matches only of distance
2840 one. Do not maintain a hash table. (It will be regenerated if this run of
2841 deflate switches away from Strategy::rle.)
2847 f_rle(z_params& zs, Flush flush) ->
2850 bool bflush; // set if current block must be flushed
2851 uInt prev; // byte at distance one to match
2852 Byte *scan, *strend; // scan goes up to strend for length of run
2856 /* Make sure that we always have enough lookahead, except
2857 * at the end of the input file. We need maxMatch bytes
2858 * for the longest run, plus one for the unrolled loop.
2860 if(lookahead_ <= maxMatch) {
2862 if(lookahead_ <= maxMatch && flush == Flush::none) {
2865 if(lookahead_ == 0) break; /* flush the current block */
2868 /* See how many times the previous byte repeats */
2870 if(lookahead_ >= minMatch && strstart_ > 0) {
2871 scan = window_ + strstart_ - 1;
2873 if(prev == *++scan && prev == *++scan && prev == *++scan) {
2874 strend = window_ + strstart_ + maxMatch;
2876 } while(prev == *++scan && prev == *++scan &&
2877 prev == *++scan && prev == *++scan &&
2878 prev == *++scan && prev == *++scan &&
2879 prev == *++scan && prev == *++scan &&
2881 match_length_ = maxMatch - (int)(strend - scan);
2882 if(match_length_ > lookahead_)
2883 match_length_ = lookahead_;
2885 BOOST_ASSERT(scan <= window_+(uInt)(window_size_-1));
2888 /* Emit match if have run of minMatch or longer, else emit literal */
2889 if(match_length_ >= minMatch) {
2890 tr_tally_dist(1, match_length_ - minMatch, bflush);
2892 lookahead_ -= match_length_;
2893 strstart_ += match_length_;
2896 /* No match, output a literal byte */
2897 tr_tally_lit(window_[strstart_], bflush);
2903 flush_block(zs, false);
2904 if(zs.avail_out == 0)
2909 if(flush == Flush::finish)
2911 flush_block(zs, true);
2912 if(zs.avail_out == 0)
2913 return finish_started;
2918 flush_block(zs, false);
2919 if(zs.avail_out == 0)
2925 /* ===========================================================================
2926 * For Strategy::huffman, do not look for matches. Do not maintain a hash table.
2927 * (It will be regenerated if this run of deflate switches away from Huffman.)
2933 f_huff(z_params& zs, Flush flush) ->
2936 bool bflush; // set if current block must be flushed
2940 // Make sure that we have a literal to write.
2946 if(flush == Flush::none)
2948 break; // flush the current block
2952 // Output a literal byte
2954 tr_tally_lit(window_[strstart_], bflush);
2959 flush_block(zs, false);
2960 if(zs.avail_out == 0)
2965 if(flush == Flush::finish)
2967 flush_block(zs, true);
2968 if(zs.avail_out == 0)
2969 return finish_started;
2974 flush_block(zs, false);
2975 if(zs.avail_out == 0)