2 // Copyright (c) 2016-2017 Vinnie Falco (vinnie dot falco at gmail dot com)
4 // Distributed under the Boost Software License, Version 1.0. (See accompanying
5 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
7 // Official repository: https://github.com/boostorg/beast
9 // This is a derivative work based on Zlib, copyright below:
11 Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler
13 This software is provided 'as-is', without any express or implied
14 warranty. In no event will the authors be held liable for any damages
15 arising from the use of this software.
17 Permission is granted to anyone to use this software for any purpose,
18 including commercial applications, and to alter it and redistribute it
19 freely, subject to the following restrictions:
21 1. The origin of this software must not be misrepresented; you must not
22 claim that you wrote the original software. If you use this software
23 in a product, an acknowledgment in the product documentation would be
24 appreciated but is not required.
25 2. Altered source versions must be plainly marked as such, and must not be
26 misrepresented as being the original software.
27 3. This notice may not be removed or altered from any source distribution.
29 Jean-loup Gailly Mark Adler
30 jloup@gzip.org madler@alumni.caltech.edu
32 The data format used by the zlib library is described by RFCs (Request for
33 Comments) 1950 to 1952 in the files http://tools.ietf.org/html/rfc1950
34 (zlib format), rfc1951 (deflate format) and rfc1952 (gzip format).
37 #ifndef BOOST_BEAST_ZLIB_DETAIL_DEFLATE_STREAM_HPP
38 #define BOOST_BEAST_ZLIB_DETAIL_DEFLATE_STREAM_HPP
40 #include <boost/beast/zlib/zlib.hpp>
41 #include <boost/beast/zlib/detail/ranges.hpp>
42 #include <boost/beast/core/detail/type_traits.hpp>
43 #include <boost/assert.hpp>
44 #include <boost/config.hpp>
45 #include <boost/make_unique.hpp>
46 #include <boost/optional.hpp>
47 #include <boost/throw_exception.hpp>
53 #include <type_traits>
63 * The "deflation" process depends on being able to identify portions
64 * of the input text which are identical to earlier input (within a
65 * sliding window trailing behind the input currently being processed).
67 * Each code tree is stored in a compressed form which is itself
68 * a Huffman encoding of the lengths of all the code strings (in
69 * ascending order by source values). The actual code strings are
70 * reconstructed from the lengths in the inflate process, as described
71 * in the deflate specification.
73 * The most straightforward technique turns out to be the fastest for
74 * most input files: try all possible matches and select the longest.
75 * The key feature of this algorithm is that insertions into the string
76 * dictionary are very simple and thus fast, and deletions are avoided
77 * completely. Insertions are performed at each input character, whereas
78 * string matches are performed only when the previous match ends. So it
79 * is preferable to spend more time in matches to allow very fast string
80 * insertions and avoid deletions. The matching algorithm for small
81 * strings is inspired from that of Rabin & Karp. A brute force approach
82 * is used to find longer strings when a small match has been found.
83 * A similar algorithm is used in comic (by Jan-Mark Wams) and freeze
84 * (by Leonid Broukhis).
85 * A previous version of this file used a more sophisticated algorithm
86 * (by Fiala and Greene) which is guaranteed to run in linear amortized
87 * time, but has a larger average cost, uses more memory and is patented.
88 * However the F&G algorithm may be faster for some highly redundant
89 * files if the parameter max_chain_length (described below) is too large.
93 * The idea of lazy evaluation of matches is due to Jan-Mark Wams, and
94 * I found it in 'freeze' written by Leonid Broukhis.
95 * Thanks to many people for bug reports and testing.
99 * Deutsch, L.P.,"DEFLATE Compressed Data Format Specification".
100 * Available in http://tools.ietf.org/html/rfc1951
102 * A description of the Rabin and Karp algorithm is given in the book
103 * "Algorithms" by R. Sedgewick, Addison-Wesley, p252.
105 * Fiala,E.R., and Greene,D.H.
106 * Data Compression with Finite Windows, Comm.ACM, 32,4 (1989) 490-595
113 // Upper limit on code length
114 static std::uint8_t constexpr maxBits = 15;
116 // Number of length codes, not counting the special END_BLOCK code
117 static std::uint16_t constexpr lengthCodes = 29;
119 // Number of literal bytes 0..255
120 static std::uint16_t constexpr literals = 256;
122 // Number of Literal or Length codes, including the END_BLOCK code
123 static std::uint16_t constexpr lCodes = literals + 1 + lengthCodes;
125 // Number of distance code lengths
126 static std::uint16_t constexpr dCodes = 30;
128 // Number of codes used to transfer the bit lengths
129 static std::uint16_t constexpr blCodes = 19;
131 // Number of distance codes
132 static std::uint16_t constexpr distCodeLen = 512;
134 // Size limit on bit length codes
135 static std::uint8_t constexpr maxBlBits= 7;
137 static std::uint16_t constexpr minMatch = 3;
138 static std::uint16_t constexpr maxMatch = 258;
140 // Can't change minMatch without also changing code, see original zlib
141 BOOST_STATIC_ASSERT(minMatch == 3);
143 // end of block literal code
144 static std::uint16_t constexpr END_BLOCK = 256;
146 // repeat previous bit length 3-6 times (2 bits of repeat count)
147 static std::uint8_t constexpr REP_3_6 = 16;
149 // repeat a zero length 3-10 times (3 bits of repeat count)
150 static std::uint8_t constexpr REPZ_3_10 = 17;
152 // repeat a zero length 11-138 times (7 bits of repeat count)
153 static std::uint8_t constexpr REPZ_11_138 = 18;
155 // The three kinds of block type
156 static std::uint8_t constexpr STORED_BLOCK = 0;
157 static std::uint8_t constexpr STATIC_TREES = 1;
158 static std::uint8_t constexpr DYN_TREES = 2;
160 // Maximum value for memLevel in deflateInit2
161 static std::uint8_t constexpr max_mem_level = 9;
164 static std::uint8_t constexpr DEF_MEM_LEVEL = max_mem_level;
166 /* Note: the deflate() code requires max_lazy >= minMatch and max_chain >= 4
167 For deflate_fast() (levels <= 3) good is ignored and lazy has a different
172 static std::uint16_t constexpr HEAP_SIZE = 2 * lCodes + 1;
174 // size of bit buffer in bi_buf
175 static std::uint8_t constexpr Buf_size = 16;
177 // Matches of length 3 are discarded if their distance exceeds kTooFar
178 static std::size_t constexpr kTooFar = 4096;
180 /* Minimum amount of lookahead, except at the end of the input file.
181 See deflate.c for comments about the minMatch+1.
183 static std::size_t constexpr kMinLookahead = maxMatch + minMatch+1;
185 /* Number of bytes after end of data in window to initialize in order
186 to avoid memory checker errors from longest match routines
188 static std::size_t constexpr kWinInit = maxMatch;
190 // Describes a single value and its code string.
193 std::uint16_t fc; // frequency count or bit string
194 std::uint16_t dl; // parent node in tree or length of bit string
197 operator==(ct_data const& rhs) const
199 return fc == rhs.fc && dl == rhs.dl;
205 ct_data const* static_tree;// static tree or NULL
206 std::uint8_t const* extra_bits; // extra bits for each code or NULL
207 std::uint16_t extra_base; // base index for extra_bits
208 std::uint16_t elems; // max number of elements in the tree
209 std::uint8_t max_length; // max bit length for the codes
214 // Number of extra bits for each length code
215 std::uint8_t const extra_lbits[lengthCodes] = {
216 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0
219 // Number of extra bits for each distance code
220 std::uint8_t const extra_dbits[dCodes] = {
221 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13
224 // Number of extra bits for each bit length code
225 std::uint8_t const extra_blbits[blCodes] = {
226 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,7
229 // The lengths of the bit length codes are sent in order
230 // of decreasing probability, to avoid transmitting the
231 // lengths for unused bit length codes.
232 std::uint8_t const bl_order[blCodes] = {
233 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15
236 ct_data ltree[lCodes + 2];
238 ct_data dtree[dCodes];
240 // Distance codes. The first 256 values correspond to the distances
241 // 3 .. 258, the last 256 values correspond to the top 8 bits of
242 // the 15 bit distances.
243 std::uint8_t dist_code[distCodeLen];
245 std::uint8_t length_code[maxMatch-minMatch+1];
247 std::uint8_t base_length[lengthCodes];
249 std::uint16_t base_dist[dCodes];
251 static_desc l_desc = {
252 ltree, extra_lbits, literals+1, lCodes, maxBits
255 static_desc d_desc = {
256 dtree, extra_dbits, 0, dCodes, maxBits
259 static_desc bl_desc =
261 nullptr, extra_blbits, 0, blCodes, maxBlBits
267 ct_data *dyn_tree; /* the dynamic tree */
268 int max_code; /* largest code with non zero frequency */
269 static_desc const* stat_desc; /* the corresponding static tree */
274 need_more, /* block not completed, need more input or more output */
275 block_done, /* block flush performed */
276 finish_started, /* finish started, need only more output at next deflate */
277 finish_done /* finish done, accept no more input or output */
280 // VFALCO This might not be needed, e.g. for zip/gzip
291 /* A std::uint16_t is an index in the character window. We use short instead of int to
292 * save space in the various tables. IPos is used only for parameter passing.
294 using IPos = unsigned;
296 using self = deflate_stream;
297 typedef block_state(self::*compress_func)(z_params& zs, Flush flush);
299 //--------------------------------------------------------------------------
301 lut_type const& lut_;
303 bool inited_ = false;
304 std::size_t buf_size_;
305 std::unique_ptr<std::uint8_t[]> buf_;
307 int status_; // as the name implies
308 Byte* pending_buf_; // output still pending
310 pending_buf_size_; // size of pending_buf
311 Byte* pending_out_; // next pending byte to output to the stream
312 uInt pending_; // nb of bytes in the pending buffer
313 boost::optional<Flush>
314 last_flush_; // value of flush param for previous deflate call
316 uInt w_size_; // LZ77 window size (32K by default)
317 uInt w_bits_; // log2(w_size) (8..16)
318 uInt w_mask_; // w_size - 1
320 /* Sliding window. Input bytes are read into the second half of the window,
321 and move to the first half later to keep a dictionary of at least wSize
322 bytes. With this organization, matches are limited to a distance of
323 wSize-maxMatch bytes, but this ensures that IO is always
324 performed with a length multiple of the block size. Also, it limits
325 the window size to 64K.
326 To do: use the user input buffer as sliding window.
328 Byte *window_ = nullptr;
330 /* Actual size of window: 2*wSize, except when the user input buffer
331 is directly used as sliding window.
333 std::uint32_t window_size_;
335 /* Link to older string with same hash index. To limit the size of this
336 array to 64K, this link is maintained only for the last 32K strings.
337 An index in this array is thus a window index modulo 32K.
339 std::uint16_t* prev_;
341 std::uint16_t* head_; // Heads of the hash chains or 0
343 uInt ins_h_; // hash index of string to be inserted
344 uInt hash_size_; // number of elements in hash table
345 uInt hash_bits_; // log2(hash_size)
346 uInt hash_mask_; // hash_size-1
348 /* Number of bits by which ins_h must be shifted at each input
349 step. It must be such that after minMatch steps,
350 the oldest byte no longer takes part in the hash key, that is:
351 hash_shift * minMatch >= hash_bits
355 /* Window position at the beginning of the current output block.
356 Gets negative when the window is moved backwards.
360 uInt match_length_; // length of best match
361 IPos prev_match_; // previous match
362 int match_available_; // set if previous match exists
363 uInt strstart_; // start of string to insert
364 uInt match_start_; // start of matching string
365 uInt lookahead_; // number of valid bytes ahead in window
367 /* Length of the best match at previous step. Matches not greater
368 than this are discarded. This is used in the lazy match evaluation.
372 /* To speed up deflation, hash chains are never searched beyond
373 this length. A higher limit improves compression ratio but
376 uInt max_chain_length_;
378 /* Attempt to find a better match only when the current match is strictly
379 smaller than this value. This mechanism is used only for compression
382 OR Insert new strings in the hash table only if the match length is not
383 greater than this length. This saves time but degrades compression.
384 used only for compression levels <= 3.
386 uInt max_lazy_match_;
388 int level_; // compression level (1..9)
389 Strategy strategy_; // favor or force Huffman coding
391 // Use a faster search when the previous match is longer than this
394 int nice_match_; // Stop searching when current match exceeds this
397 HEAP_SIZE]; // literal and length tree
399 2*dCodes+1]; // distance tree
401 2*blCodes+1]; // Huffman tree for bit lengths
403 tree_desc l_desc_; // desc. for literal tree
404 tree_desc d_desc_; // desc. for distance tree
405 tree_desc bl_desc_; // desc. for bit length tree
407 // number of codes at each bit length for an optimal tree
408 std::uint16_t bl_count_[maxBits+1];
410 // Index within the heap array of least frequent node in the Huffman tree
411 static std::size_t constexpr kSmallest = 1;
413 /* The sons of heap[n] are heap[2*n] and heap[2*n+1].
414 heap[0] is not used. The same heap array is used to build all trees.
417 int heap_[2*lCodes+1]; // heap used to build the Huffman trees
418 int heap_len_; // number of elements in the heap
419 int heap_max_; // element of largest frequency
421 // Depth of each subtree used as tie breaker for trees of equal frequency
422 std::uint8_t depth_[2*lCodes+1];
424 std::uint8_t *l_buf_; // buffer for literals or lengths
426 /* Size of match buffer for literals/lengths.
427 There are 4 reasons for limiting lit_bufsize to 64K:
428 - frequencies can be kept in 16 bit counters
429 - if compression is not successful for the first block, all input
430 data is still in the window so we can still emit a stored block even
431 when input comes from standard input. (This can also be done for
432 all blocks if lit_bufsize is not greater than 32K.)
433 - if compression is not successful for a file smaller than 64K, we can
434 even emit a stored file instead of a stored block (saving 5 bytes).
435 This is applicable only for zip (not gzip or zlib).
436 - creating new Huffman trees less frequently may not provide fast
437 adaptation to changes in the input data statistics. (Take for
438 example a binary file with poorly compressible code followed by
439 a highly compressible string table.) Smaller buffer sizes give
440 fast adaptation but have of course the overhead of transmitting
441 trees more frequently.
442 - I can't count above 4
445 uInt last_lit_; // running index in l_buf_
447 /* Buffer for distances. To simplify the code, d_buf_ and l_buf_
448 have the same number of elements. To use different lengths, an
449 extra flag array would be necessary.
451 std::uint16_t* d_buf_;
453 std::uint32_t opt_len_; // bit length of current block with optimal trees
454 std::uint32_t static_len_; // bit length of current block with static trees
455 uInt matches_; // number of string matches in current block
456 uInt insert_; // bytes at end of window left to insert
459 Bits are inserted starting at the bottom (least significant bits).
461 std::uint16_t bi_buf_;
463 /* Number of valid bits in bi_buf._ All bits above the last valid
468 /* High water mark offset in window for initialized bytes -- bytes
469 above this are set to zero in order to avoid memory check warnings
470 when longest match routines access bytes past the input. This is
471 then updated to the new high water mark.
473 std::uint32_t high_water_;
475 //--------------------------------------------------------------------------
482 /* In order to simplify the code, particularly on 16 bit machines, match
483 distances are limited to MAX_DIST instead of WSIZE.
488 return w_size_ - kMinLookahead;
492 put_byte(std::uint8_t c)
494 pending_buf_[pending_++] = c;
498 put_short(std::uint16_t w)
504 /* Send a value on a given number of bits.
505 IN assertion: length <= 16 and value fits in length bits.
508 send_bits(int value, int length)
510 if(bi_valid_ > (int)Buf_size - length)
512 bi_buf_ |= (std::uint16_t)value << bi_valid_;
514 bi_buf_ = (std::uint16_t)value >> (Buf_size - bi_valid_);
515 bi_valid_ += length - Buf_size;
519 bi_buf_ |= (std::uint16_t)(value) << bi_valid_;
524 // Send a code of the given tree
526 send_code(int value, ct_data const* tree)
528 send_bits(tree[value].fc, tree[value].dl);
531 /* Mapping from a distance to a distance code. dist is the
532 distance - 1 and must not have side effects. _dist_code[256]
533 and _dist_code[257] are never used.
536 d_code(unsigned dist)
539 return lut_.dist_code[dist];
540 return lut_.dist_code[256+(dist>>7)];
543 /* Update a hash value with the given input byte
544 IN assertion: all calls to to update_hash are made with
545 consecutive input characters, so that a running hash
546 key can be computed from the previous key instead of
547 complete recalculation each time.
550 update_hash(uInt& h, std::uint8_t c)
552 h = ((h << hash_shift_) ^ c) & hash_mask_;
555 /* Initialize the hash table (avoiding 64K overflow for 16
556 bit systems). prev[] will be initialized on the fly.
561 head_[hash_size_-1] = 0;
562 std::memset((Byte *)head_, 0,
563 (unsigned)(hash_size_-1)*sizeof(*head_));
566 /* Compares two subtrees, using the tree depth as tie breaker
567 when the subtrees have equal frequency. This minimizes the
571 smaller(ct_data const* tree, int n, int m)
573 return tree[n].fc < tree[m].fc ||
574 (tree[n].fc == tree[m].fc &&
575 depth_[n] <= depth_[m]);
578 /* Insert string str in the dictionary and set match_head to the
579 previous head of the hash chain (the most recent string with
580 same hash key). Return the previous length of the hash chain.
581 If this file is compiled with -DFASTEST, the compression level
582 is forced to 1, and no hash chains are maintained.
583 IN assertion: all calls to to INSERT_STRING are made with
584 consecutive input characters and the first minMatch
585 bytes of str are valid (except for the last minMatch-1
586 bytes of the input file).
589 insert_string(IPos& hash_head)
591 update_hash(ins_h_, window_[strstart_ + (minMatch-1)]);
592 hash_head = prev_[strstart_ & w_mask_] = head_[ins_h_];
593 head_[ins_h_] = (std::uint16_t)strstart_;
596 //--------------------------------------------------------------------------
598 /* Values for max_lazy_match, good_match and max_chain_length, depending on
599 * the desired pack level (0..9). The values given below have been tuned to
600 * exclude worst case performance for pathological files. Better values may be
601 * found for specific files.
605 std::uint16_t good_length; /* reduce lazy search above this match length */
606 std::uint16_t max_lazy; /* do not perform lazy search above this match length */
607 std::uint16_t nice_length; /* quit search above this match length */
608 std::uint16_t max_chain;
612 std::uint16_t good_length_,
613 std::uint16_t max_lazy_,
614 std::uint16_t nice_length_,
615 std::uint16_t max_chain_,
617 : good_length(good_length_)
618 , max_lazy(max_lazy_)
619 , nice_length(nice_length_)
620 , max_chain(max_chain_)
628 get_config(std::size_t level)
632 // good lazy nice chain
633 case 0: return { 0, 0, 0, 0, &self::deflate_stored}; // store only
634 case 1: return { 4, 4, 8, 4, &self::deflate_fast}; // max speed, no lazy matches
635 case 2: return { 4, 5, 16, 8, &self::deflate_fast};
636 case 3: return { 4, 6, 32, 32, &self::deflate_fast};
637 case 4: return { 4, 4, 16, 16, &self::deflate_slow}; // lazy matches
638 case 5: return { 8, 16, 32, 32, &self::deflate_slow};
639 case 6: return { 8, 16, 128, 128, &self::deflate_slow};
640 case 7: return { 8, 32, 128, 256, &self::deflate_slow};
641 case 8: return { 32, 128, 258, 1024, &self::deflate_slow};
643 case 9: return { 32, 258, 258, 4096, &self::deflate_slow}; // max compression
654 template<class Unsigned>
657 bi_reverse(Unsigned code, unsigned len);
659 template<class = void>
662 gen_codes(ct_data *tree, int max_code, std::uint16_t *bl_count);
664 template<class = void>
669 template<class = void> void doReset (int level, int windowBits, int memLevel, Strategy strategy);
670 template<class = void> void doReset ();
671 template<class = void> void doClear ();
672 template<class = void> std::size_t doUpperBound (std::size_t sourceLen) const;
673 template<class = void> void doTune (int good_length, int max_lazy, int nice_length, int max_chain);
674 template<class = void> void doParams (z_params& zs, int level, Strategy strategy, error_code& ec);
675 template<class = void> void doWrite (z_params& zs, boost::optional<Flush> flush, error_code& ec);
676 template<class = void> void doDictionary (Byte const* dict, uInt dictLength, error_code& ec);
677 template<class = void> void doPrime (int bits, int value, error_code& ec);
678 template<class = void> void doPending (unsigned* value, int* bits);
680 template<class = void> void init ();
681 template<class = void> void lm_init ();
682 template<class = void> void init_block ();
683 template<class = void> void pqdownheap (ct_data const* tree, int k);
684 template<class = void> void pqremove (ct_data const* tree, int& top);
685 template<class = void> void gen_bitlen (tree_desc *desc);
686 template<class = void> void build_tree (tree_desc *desc);
687 template<class = void> void scan_tree (ct_data *tree, int max_code);
688 template<class = void> void send_tree (ct_data *tree, int max_code);
689 template<class = void> int build_bl_tree ();
690 template<class = void> void send_all_trees (int lcodes, int dcodes, int blcodes);
691 template<class = void> void compress_block (ct_data const* ltree, ct_data const* dtree);
692 template<class = void> int detect_data_type ();
693 template<class = void> void bi_windup ();
694 template<class = void> void bi_flush ();
695 template<class = void> void copy_block (char *buf, unsigned len, int header);
697 template<class = void> void tr_init ();
698 template<class = void> void tr_align ();
699 template<class = void> void tr_flush_bits ();
700 template<class = void> void tr_stored_block (char *bu, std::uint32_t stored_len, int last);
701 template<class = void> void tr_tally_dist (std::uint16_t dist, std::uint8_t len, bool& flush);
702 template<class = void> void tr_tally_lit (std::uint8_t c, bool& flush);
704 template<class = void> void tr_flush_block (z_params& zs, char *buf, std::uint32_t stored_len, int last);
705 template<class = void> void fill_window (z_params& zs);
706 template<class = void> void flush_pending (z_params& zs);
707 template<class = void> void flush_block (z_params& zs, bool last);
708 template<class = void> int read_buf (z_params& zs, Byte *buf, unsigned size);
709 template<class = void> uInt longest_match (IPos cur_match);
711 template<class = void> block_state f_stored (z_params& zs, Flush flush);
712 template<class = void> block_state f_fast (z_params& zs, Flush flush);
713 template<class = void> block_state f_slow (z_params& zs, Flush flush);
714 template<class = void> block_state f_rle (z_params& zs, Flush flush);
715 template<class = void> block_state f_huff (z_params& zs, Flush flush);
718 deflate_stored(z_params& zs, Flush flush)
720 return f_stored(zs, flush);
724 deflate_fast(z_params& zs, Flush flush)
726 return f_fast(zs, flush);
730 deflate_slow(z_params& zs, Flush flush)
732 return f_slow(zs, flush);
736 deflate_rle(z_params& zs, Flush flush)
738 return f_rle(zs, flush);
742 deflate_huff(z_params& zs, Flush flush)
744 return f_huff(zs, flush);
748 //--------------------------------------------------------------------------
750 // Reverse the first len bits of a code
751 template<class Unsigned>
755 bi_reverse(Unsigned code, unsigned len)
757 BOOST_STATIC_ASSERT(std::is_unsigned<Unsigned>::value);
758 BOOST_ASSERT(len <= 8 * sizeof(unsigned));
770 /* Generate the codes for a given tree and bit counts (which need not be optimal).
771 IN assertion: the array bl_count contains the bit length statistics for
772 the given tree and the field len is set for all tree elements.
773 OUT assertion: the field code is set for all tree elements of non
779 gen_codes(ct_data *tree, int max_code, std::uint16_t *bl_count)
781 std::uint16_t next_code[maxBits+1]; /* next code value for each bit length */
782 std::uint16_t code = 0; /* running code value */
783 int bits; /* bit index */
784 int n; /* code index */
786 // The distribution counts are first used to
787 // generate the code values without bit reversal.
788 for(bits = 1; bits <= maxBits; bits++)
790 code = (code + bl_count[bits-1]) << 1;
791 next_code[bits] = code;
793 // Check that the bit counts in bl_count are consistent.
794 // The last code must be all ones.
795 BOOST_ASSERT(code + bl_count[maxBits]-1 == (1<<maxBits)-1);
796 for(n = 0; n <= max_code; n++)
798 int len = tree[n].dl;
801 tree[n].fc = bi_reverse(next_code[len]++, len);
807 deflate_stream::get_lut() ->
816 // number of codes at each bit length for an optimal tree
817 //std::uint16_t bl_count[maxBits+1];
819 // Initialize the mapping length (0..255) -> length code (0..28)
820 std::uint8_t length = 0;
821 for(std::uint8_t code = 0; code < lengthCodes-1; ++code)
823 tables.base_length[code] = length;
824 auto const run = 1U << tables.extra_lbits[code];
825 for(unsigned n = 0; n < run; ++n)
826 tables.length_code[length++] = code;
828 BOOST_ASSERT(length == 0);
829 // Note that the length 255 (match length 258) can be represented
830 // in two different ways: code 284 + 5 bits or code 285, so we
831 // overwrite length_code[255] to use the best encoding:
832 tables.length_code[255] = lengthCodes-1;
834 // Initialize the mapping dist (0..32K) -> dist code (0..29)
837 std::uint16_t dist = 0;
838 for(code = 0; code < 16; code++)
840 tables.base_dist[code] = dist;
841 auto const run = 1U << tables.extra_dbits[code];
842 for(unsigned n = 0; n < run; ++n)
843 tables.dist_code[dist++] = code;
845 BOOST_ASSERT(dist == 256);
846 // from now on, all distances are divided by 128
848 for(; code < dCodes; ++code)
850 tables.base_dist[code] = dist << 7;
851 auto const run = 1U << (tables.extra_dbits[code]-7);
852 for(std::size_t n = 0; n < run; ++n)
853 tables.dist_code[256 + dist++] = code;
855 BOOST_ASSERT(dist == 256);
858 // Construct the codes of the static literal tree
859 std::uint16_t bl_count[maxBits+1];
860 std::memset(bl_count, 0, sizeof(bl_count));
863 tables.ltree[n++].dl = 8;
866 tables.ltree[n++].dl = 9;
869 tables.ltree[n++].dl = 7;
872 tables.ltree[n++].dl = 8;
874 // Codes 286 and 287 do not exist, but we must include them in the tree
875 // construction to get a canonical Huffman tree (longest code all ones)
876 gen_codes(tables.ltree, lCodes+1, bl_count);
878 for(n = 0; n < dCodes; ++n)
880 tables.dtree[n].dl = 5;
882 static_cast<std::uint16_t>(bi_reverse(n, 5));
886 static init const data;
899 if(level == default_size)
902 // VFALCO What do we do about this?
903 // until 256-byte window bug fixed
907 if(level < 0 || level > 9)
908 BOOST_THROW_EXCEPTION(std::invalid_argument{
911 if(windowBits < 8 || windowBits > 15)
912 BOOST_THROW_EXCEPTION(std::invalid_argument{
913 "invalid windowBits"});
915 if(memLevel < 1 || memLevel > max_mem_level)
916 BOOST_THROW_EXCEPTION(std::invalid_argument{
917 "invalid memLevel"});
919 w_bits_ = windowBits;
921 hash_bits_ = memLevel + 7;
923 // 16K elements by default
924 lit_bufsize_ = 1 << (memLevel + 6);
927 strategy_ = strategy;
951 doUpperBound(std::size_t sourceLen) const
956 /* conservative upper bound for compressed data */
957 complen = sourceLen +
958 ((sourceLen + 7) >> 3) + ((sourceLen + 63) >> 6) + 5;
960 /* compute wrapper length */
963 /* if not default parameters, return conservative bound */
964 if(w_bits_ != 15 || hash_bits_ != 8 + 7)
965 return complen + wraplen;
967 /* default settings: return tight bound for that case */
968 return sourceLen + (sourceLen >> 12) + (sourceLen >> 14) +
969 (sourceLen >> 25) + 13 - 6 + wraplen;
981 good_match_ = good_length;
982 nice_match_ = nice_length;
983 max_lazy_match_ = max_lazy;
984 max_chain_length_ = max_chain;
990 doParams(z_params& zs, int level, Strategy strategy, error_code& ec)
994 if(level == default_size)
996 if(level < 0 || level > 9)
998 ec = error::stream_error;
1001 func = get_config(level_).func;
1003 if((strategy != strategy_ || func != get_config(level).func) &&
1006 // Flush the last buffer:
1007 doWrite(zs, Flush::block, ec);
1008 if(ec == error::need_buffers && pending_ == 0)
1009 ec.assign(0, ec.category());
1014 max_lazy_match_ = get_config(level).max_lazy;
1015 good_match_ = get_config(level).good_length;
1016 nice_match_ = get_config(level).nice_length;
1017 max_chain_length_ = get_config(level).max_chain;
1019 strategy_ = strategy;
1022 // VFALCO boost::optional param is a workaround for
1023 // gcc "maybe uninitialized" warning
1024 // https://github.com/boostorg/beast/issues/532
1029 doWrite(z_params& zs, boost::optional<Flush> flush, error_code& ec)
1033 if(zs.next_out == 0 || (zs.next_in == 0 && zs.avail_in != 0) ||
1034 (status_ == FINISH_STATE && flush != Flush::finish))
1036 ec = error::stream_error;
1039 if(zs.avail_out == 0)
1041 ec = error::need_buffers;
1045 // value of flush param for previous deflate call
1046 auto old_flush = boost::make_optional<Flush>(
1047 last_flush_.is_initialized(),
1048 last_flush_ ? *last_flush_ : Flush::none);
1050 last_flush_ = flush;
1052 // Flush as much pending output as possible
1056 if(zs.avail_out == 0)
1058 /* Since avail_out is 0, deflate will be called again with
1059 * more output space, but possibly with both pending and
1060 * avail_in equal to zero. There won't be anything to do,
1061 * but this is not an error situation so make sure we
1062 * return OK instead of BUF_ERROR at next call of deflate:
1064 last_flush_ = boost::none;
1068 else if(zs.avail_in == 0 && (
1069 old_flush && flush <= *old_flush
1070 ) && flush != Flush::finish)
1072 /* Make sure there is something to do and avoid duplicate consecutive
1073 * flushes. For repeated and useless calls with Flush::finish, we keep
1074 * returning Z_STREAM_END instead of Z_BUF_ERROR.
1076 ec = error::need_buffers;
1080 // User must not provide more input after the first FINISH:
1081 if(status_ == FINISH_STATE && zs.avail_in != 0)
1083 ec = error::need_buffers;
1087 /* Start a new block or continue the current one.
1089 if(zs.avail_in != 0 || lookahead_ != 0 ||
1090 (flush != Flush::none && status_ != FINISH_STATE))
1096 case Strategy::huffman:
1097 bstate = deflate_huff(zs, flush.get());
1100 bstate = deflate_rle(zs, flush.get());
1104 bstate = (this->*(get_config(level_).func))(zs, flush.get());
1109 if(bstate == finish_started || bstate == finish_done)
1111 status_ = FINISH_STATE;
1113 if(bstate == need_more || bstate == finish_started)
1115 if(zs.avail_out == 0)
1117 last_flush_ = boost::none; /* avoid BUF_ERROR next call, see above */
1120 /* If flush != Flush::none && avail_out == 0, the next call
1121 of deflate should use the same flush parameter to make sure
1122 that the flush is complete. So we don't have to output an
1123 empty block here, this will be done at next call. This also
1124 ensures that for a very small output buffer, we emit at most
1128 if(bstate == block_done)
1130 if(flush == Flush::partial)
1134 else if(flush != Flush::block)
1136 /* FULL_FLUSH or SYNC_FLUSH */
1137 tr_stored_block((char*)0, 0L, 0);
1138 /* For a full flush, this empty block will be recognized
1139 * as a special marker by inflate_sync().
1141 if(flush == Flush::full)
1143 clear_hash(); // forget history
1153 if(zs.avail_out == 0)
1155 last_flush_ = boost::none; /* avoid BUF_ERROR at next call, see above */
1161 if(flush == Flush::finish)
1163 ec = error::end_of_stream;
1168 // VFALCO Warning: untested
1172 doDictionary(Byte const* dict, uInt dictLength, error_code& ec)
1176 ec = error::stream_error;
1182 /* if dict would fill window, just replace the history */
1183 if(dictLength >= w_size_)
1189 dict += dictLength - w_size_; /* use the tail */
1190 dictLength = w_size_;
1193 /* insert dict into window and hash */
1195 zs.avail_in = dictLength;
1196 zs.next_in = (const Byte *)dict;
1200 while(lookahead_ >= minMatch)
1202 uInt str = strstart_;
1203 uInt n = lookahead_ - (minMatch-1);
1206 update_hash(ins_h_, window_[str + minMatch-1]);
1207 prev_[str & w_mask_] = head_[ins_h_];
1208 head_[ins_h_] = (std::uint16_t)str;
1213 lookahead_ = minMatch-1;
1216 strstart_ += lookahead_;
1217 block_start_ = (long)strstart_;
1218 insert_ = lookahead_;
1220 match_length_ = prev_length_ = minMatch-1;
1221 match_available_ = 0;
1227 doPrime(int bits, int value, error_code& ec)
1231 if((Byte *)(d_buf_) < pending_out_ + ((Buf_size + 7) >> 3))
1233 ec = error::need_buffers;
1239 int put = Buf_size - bi_valid_;
1242 bi_buf_ |= (std::uint16_t)((value & ((1 << put) - 1)) << bi_valid_);
1254 doPending(unsigned* value, int* bits)
1262 //--------------------------------------------------------------------------
1264 // Do lazy initialization
1270 // Caller must set these:
1277 w_size_ = 1 << w_bits_;
1278 w_mask_ = w_size_ - 1;
1280 hash_size_ = 1 << hash_bits_;
1281 hash_mask_ = hash_size_ - 1;
1282 hash_shift_ = ((hash_bits_+minMatch-1)/minMatch);
1284 auto const nwindow = w_size_ * 2*sizeof(Byte);
1285 auto const nprev = w_size_ * sizeof(std::uint16_t);
1286 auto const nhead = hash_size_ * sizeof(std::uint16_t);
1287 auto const noverlay = lit_bufsize_ * (sizeof(std::uint16_t)+2);
1288 auto const needed = nwindow + nprev + nhead + noverlay;
1290 if(! buf_ || buf_size_ != needed)
1292 buf_ = boost::make_unique_noinit<
1293 std::uint8_t[]>(needed);
1297 window_ = reinterpret_cast<Byte*>(buf_.get());
1298 prev_ = reinterpret_cast<std::uint16_t*>(buf_.get() + nwindow);
1299 head_ = reinterpret_cast<std::uint16_t*>(buf_.get() + nwindow + nprev);
1301 /* We overlay pending_buf_ and d_buf_ + l_buf_. This works
1302 since the average output size for(length, distance)
1303 codes is <= 24 bits.
1305 auto overlay = reinterpret_cast<std::uint16_t*>(
1306 buf_.get() + nwindow + nprev + nhead);
1308 // nothing written to window_ yet
1312 reinterpret_cast<std::uint8_t*>(overlay);
1314 static_cast<std::uint32_t>(lit_bufsize_) *
1315 (sizeof(std::uint16_t) + 2L);
1317 d_buf_ = overlay + lit_bufsize_ / sizeof(std::uint16_t);
1318 l_buf_ = pending_buf_ + (1 + sizeof(std::uint16_t)) * lit_bufsize_;
1321 pending_out_ = pending_buf_;
1323 status_ = BUSY_STATE;
1324 last_flush_ = Flush::none;
1332 /* Initialize the "longest match" routines for a new zlib stream
1339 window_size_ = (std::uint32_t)2L*w_size_;
1343 /* Set the default configuration parameters:
1345 // VFALCO TODO just copy the config struct
1346 max_lazy_match_ = get_config(level_).max_lazy;
1347 good_match_ = get_config(level_).good_length;
1348 nice_match_ = get_config(level_).nice_length;
1349 max_chain_length_ = get_config(level_).max_chain;
1355 match_length_ = prev_length_ = minMatch-1;
1356 match_available_ = 0;
1360 // Initialize a new block.
1367 for(int n = 0; n < lCodes; n++)
1368 dyn_ltree_[n].fc = 0;
1369 for(int n = 0; n < dCodes; n++)
1370 dyn_dtree_[n].fc = 0;
1371 for(int n = 0; n < blCodes; n++)
1373 dyn_ltree_[END_BLOCK].fc = 1;
1380 /* Restore the heap property by moving down the tree starting at node k,
1381 exchanging a node with the smallest of its two sons if necessary,
1382 stopping when the heap property is re-established (each father smaller
1389 ct_data const* tree, // the tree to restore
1390 int k) // node to move down
1393 int j = k << 1; // left son of k
1394 while(j <= heap_len_)
1396 // Set j to the smallest of the two sons:
1398 smaller(tree, heap_[j+1], heap_[j]))
1400 // Exit if v is smaller than both sons
1401 if(smaller(tree, v, heap_[j]))
1404 // Exchange v with the smallest son
1405 heap_[k] = heap_[j];
1408 // And continue down the tree,
1409 // setting j to the left son of k
1415 /* Remove the smallest element from the heap and recreate the heap
1416 with one less element. Updates heap and heap_len.
1422 pqremove(ct_data const* tree, int& top)
1424 top = heap_[kSmallest];
1425 heap_[kSmallest] = heap_[heap_len_--];
1426 pqdownheap(tree, kSmallest);
1429 /* Compute the optimal bit lengths for a tree and update the total bit length
1430 for the current block.
1431 IN assertion: the fields freq and dad are set, heap[heap_max] and
1432 above are the tree nodes sorted by increasing frequency.
1433 OUT assertions: the field len is set to the optimal bit length, the
1434 array bl_count contains the frequencies for each bit length.
1435 The length opt_len is updated; static_len is also updated if stree is
1441 gen_bitlen(tree_desc *desc)
1443 ct_data *tree = desc->dyn_tree;
1444 int max_code = desc->max_code;
1445 ct_data const* stree = desc->stat_desc->static_tree;
1446 std::uint8_t const *extra = desc->stat_desc->extra_bits;
1447 int base = desc->stat_desc->extra_base;
1448 int max_length = desc->stat_desc->max_length;
1449 int h; // heap index
1450 int n, m; // iterate over the tree elements
1451 int bits; // bit length
1452 int xbits; // extra bits
1453 std::uint16_t f; // frequency
1454 int overflow = 0; // number of elements with bit length too large
1456 std::fill(&bl_count_[0], &bl_count_[maxBits+1], std::uint16_t{0});
1458 /* In a first pass, compute the optimal bit lengths (which may
1459 * overflow in the case of the bit length tree).
1461 tree[heap_[heap_max_]].dl = 0; // root of the heap
1463 for(h = heap_max_+1; h < HEAP_SIZE; h++) {
1465 bits = tree[tree[n].dl].dl + 1;
1466 if(bits > max_length) bits = max_length, overflow++;
1467 // We overwrite tree[n].dl which is no longer needed
1468 tree[n].dl = (std::uint16_t)bits;
1471 continue; // not a leaf node
1476 xbits = extra[n-base];
1478 opt_len_ += (std::uint32_t)f * (bits + xbits);
1480 static_len_ += (std::uint32_t)f * (stree[n].dl + xbits);
1485 // Find the first bit length which could increase:
1488 bits = max_length-1;
1489 while(bl_count_[bits] == 0)
1491 bl_count_[bits]--; // move one leaf down the tree
1492 bl_count_[bits+1] += 2; // move one overflow item as its brother
1493 bl_count_[max_length]--;
1494 /* The brother of the overflow item also moves one step up,
1495 * but this does not affect bl_count[max_length]
1499 while(overflow > 0);
1501 /* Now recompute all bit lengths, scanning in increasing frequency.
1502 * h is still equal to HEAP_SIZE. (It is simpler to reconstruct all
1503 * lengths instead of fixing only the wrong ones. This idea is taken
1504 * from 'ar' written by Haruhiko Okumura.)
1506 for(bits = max_length; bits != 0; bits--)
1508 n = bl_count_[bits];
1514 if((unsigned) tree[m].dl != (unsigned) bits)
1516 opt_len_ += ((long)bits - (long)tree[m].dl) *(long)tree[m].fc;
1517 tree[m].dl = (std::uint16_t)bits;
1524 /* Construct one Huffman tree and assigns the code bit strings and lengths.
1525 Update the total bit length for the current block.
1526 IN assertion: the field freq is set for all tree elements.
1527 OUT assertions: the fields len and code are set to the optimal bit length
1528 and corresponding code. The length opt_len is updated; static_len is
1529 also updated if stree is not null. The field max_code is set.
1534 build_tree(tree_desc *desc)
1536 ct_data *tree = desc->dyn_tree;
1537 ct_data const* stree = desc->stat_desc->static_tree;
1538 int elems = desc->stat_desc->elems;
1539 int n, m; // iterate over heap elements
1540 int max_code = -1; // largest code with non zero frequency
1541 int node; // new node being created
1543 /* Construct the initial heap, with least frequent element in
1544 * heap[kSmallest]. The sons of heap[n] are heap[2*n] and heap[2*n+1].
1545 * heap[0] is not used.
1548 heap_max_ = HEAP_SIZE;
1550 for(n = 0; n < elems; n++)
1554 heap_[++(heap_len_)] = max_code = n;
1563 /* The pkzip format requires that at least one distance code exists,
1564 * and that at least one bit should be sent even if there is only one
1565 * possible code. So to avoid special checks later on we force at least
1566 * two codes of non zero frequency.
1568 while(heap_len_ < 2)
1570 node = heap_[++(heap_len_)] = (max_code < 2 ? ++max_code : 0);
1575 static_len_ -= stree[node].dl;
1576 // node is 0 or 1 so it does not have extra bits
1578 desc->max_code = max_code;
1580 /* The elements heap[heap_len/2+1 .. heap_len] are leaves of the tree,
1581 * establish sub-heaps of increasing lengths:
1583 for(n = heap_len_/2; n >= 1; n--)
1584 pqdownheap(tree, n);
1586 /* Construct the Huffman tree by repeatedly combining the least two
1589 node = elems; /* next internal node of the tree */
1592 pqremove(tree, n); /* n = node of least frequency */
1593 m = heap_[kSmallest]; /* m = node of next least frequency */
1595 heap_[--(heap_max_)] = n; /* keep the nodes sorted by frequency */
1596 heap_[--(heap_max_)] = m;
1598 /* Create a new node father of n and m */
1599 tree[node].fc = tree[n].fc + tree[m].fc;
1600 depth_[node] = (std::uint8_t)((depth_[n] >= depth_[m] ?
1601 depth_[n] : depth_[m]) + 1);
1602 tree[n].dl = tree[m].dl = (std::uint16_t)node;
1603 /* and insert the new node in the heap */
1604 heap_[kSmallest] = node++;
1605 pqdownheap(tree, kSmallest);
1608 while(heap_len_ >= 2);
1610 heap_[--(heap_max_)] = heap_[kSmallest];
1612 /* At this point, the fields freq and dad are set. We can now
1613 * generate the bit lengths.
1615 gen_bitlen((tree_desc *)desc);
1617 /* The field len is now set, we can generate the bit codes */
1618 gen_codes(tree, max_code, bl_count_);
1621 /* Scan a literal or distance tree to determine the frequencies
1622 of the codes in the bit length tree.
1628 ct_data *tree, // the tree to be scanned
1629 int max_code) // and its largest code of non zero frequency
1631 int n; // iterates over all tree elements
1632 int prevlen = -1; // last emitted length
1633 int curlen; // length of current code
1634 int nextlen = tree[0].dl; // length of next code
1635 std::uint16_t count = 0; // repeat count of the current code
1636 int max_count = 7; // max repeat count
1637 int min_count = 4; // min repeat count
1644 tree[max_code+1].dl = (std::uint16_t)0xffff; // guard
1646 for(n = 0; n <= max_code; n++)
1648 curlen = nextlen; nextlen = tree[n+1].dl;
1649 if(++count < max_count && curlen == nextlen)
1653 else if(count < min_count)
1655 bl_tree_[curlen].fc += count;
1657 else if(curlen != 0)
1659 if(curlen != prevlen) bl_tree_[curlen].fc++;
1660 bl_tree_[REP_3_6].fc++;
1662 else if(count <= 10)
1664 bl_tree_[REPZ_3_10].fc++;
1668 bl_tree_[REPZ_11_138].fc++;
1677 else if(curlen == nextlen)
1690 /* Send a literal or distance tree in compressed form,
1691 using the codes in bl_tree.
1697 ct_data *tree, // the tree to be scanned
1698 int max_code) // and its largest code of non zero frequency
1700 int n; // iterates over all tree elements
1701 int prevlen = -1; // last emitted length
1702 int curlen; // length of current code
1703 int nextlen = tree[0].dl; // length of next code
1704 int count = 0; // repeat count of the current code
1705 int max_count = 7; // max repeat count
1706 int min_count = 4; // min repeat count
1708 // tree[max_code+1].dl = -1; // guard already set
1715 for(n = 0; n <= max_code; n++)
1718 nextlen = tree[n+1].dl;
1719 if(++count < max_count && curlen == nextlen)
1723 else if(count < min_count)
1727 send_code(curlen, bl_tree_);
1729 while (--count != 0);
1731 else if(curlen != 0)
1733 if(curlen != prevlen)
1735 send_code(curlen, bl_tree_);
1738 BOOST_ASSERT(count >= 3 && count <= 6);
1739 send_code(REP_3_6, bl_tree_);
1740 send_bits(count-3, 2);
1742 else if(count <= 10)
1744 send_code(REPZ_3_10, bl_tree_);
1745 send_bits(count-3, 3);
1749 send_code(REPZ_11_138, bl_tree_);
1750 send_bits(count-11, 7);
1759 else if(curlen == nextlen)
1772 /* Construct the Huffman tree for the bit lengths and return
1773 the index in bl_order of the last bit length code to send.
1780 int max_blindex; // index of last bit length code of non zero freq
1782 // Determine the bit length frequencies for literal and distance trees
1783 scan_tree((ct_data *)dyn_ltree_, l_desc_.max_code);
1784 scan_tree((ct_data *)dyn_dtree_, d_desc_.max_code);
1786 // Build the bit length tree:
1787 build_tree((tree_desc *)(&(bl_desc_)));
1788 /* opt_len now includes the length of the tree representations, except
1789 * the lengths of the bit lengths codes and the 5+5+4 bits for the counts.
1792 /* Determine the number of bit length codes to send. The pkzip format
1793 * requires that at least 4 bit length codes be sent. (appnote.txt says
1794 * 3 but the actual value used is 4.)
1796 for(max_blindex = blCodes-1; max_blindex >= 3; max_blindex--)
1798 if(bl_tree_[lut_.bl_order[max_blindex]].dl != 0)
1801 // Update opt_len to include the bit length tree and counts
1802 opt_len_ += 3*(max_blindex+1) + 5+5+4;
1806 /* Send the header for a block using dynamic Huffman trees: the counts,
1807 the lengths of the bit length codes, the literal tree and the distance
1809 IN assertion: lcodes >= 257, dcodes >= 1, blcodes >= 4.
1817 int blcodes) // number of codes for each tree
1819 int rank; // index in bl_order
1821 BOOST_ASSERT(lcodes >= 257 && dcodes >= 1 && blcodes >= 4);
1822 BOOST_ASSERT(lcodes <= lCodes && dcodes <= dCodes && blcodes <= blCodes);
1823 send_bits(lcodes-257, 5); // not +255 as stated in appnote.txt
1824 send_bits(dcodes-1, 5);
1825 send_bits(blcodes-4, 4); // not -3 as stated in appnote.txt
1826 for(rank = 0; rank < blcodes; rank++)
1827 send_bits(bl_tree_[lut_.bl_order[rank]].dl, 3);
1828 send_tree((ct_data *)dyn_ltree_, lcodes-1); // literal tree
1829 send_tree((ct_data *)dyn_dtree_, dcodes-1); // distance tree
1832 /* Send the block data compressed using the given Huffman trees
1838 ct_data const* ltree, // literal tree
1839 ct_data const* dtree) // distance tree
1841 unsigned dist; /* distance of matched string */
1842 int lc; /* match length or unmatched char (if dist == 0) */
1843 unsigned lx = 0; /* running index in l_buf */
1844 unsigned code; /* the code to send */
1845 int extra; /* number of extra bits to send */
1855 send_code(lc, ltree); /* send a literal byte */
1859 /* Here, lc is the match length - minMatch */
1860 code = lut_.length_code[lc];
1861 send_code(code+literals+1, ltree); /* send the length code */
1862 extra = lut_.extra_lbits[code];
1865 lc -= lut_.base_length[code];
1866 send_bits(lc, extra); /* send the extra length bits */
1868 dist--; /* dist is now the match distance - 1 */
1869 code = d_code(dist);
1870 BOOST_ASSERT(code < dCodes);
1872 send_code(code, dtree); /* send the distance code */
1873 extra = lut_.extra_dbits[code];
1876 dist -= lut_.base_dist[code];
1877 send_bits(dist, extra); /* send the extra distance bits */
1879 } /* literal or match pair ? */
1881 /* Check that the overlay between pending_buf and d_buf+l_buf is ok: */
1882 BOOST_ASSERT((uInt)(pending_) < lit_bufsize_ + 2*lx);
1884 while(lx < last_lit_);
1887 send_code(END_BLOCK, ltree);
1890 /* Check if the data type is TEXT or BINARY, using the following algorithm:
1891 - TEXT if the two conditions below are satisfied:
1892 a) There are no non-portable control characters belonging to the
1893 "black list" (0..6, 14..25, 28..31).
1894 b) There is at least one printable character belonging to the
1895 "white list" (9 {TAB}, 10 {LF}, 13 {CR}, 32..255).
1897 - The following partially-portable control characters form a
1898 "gray list" that is ignored in this detection algorithm:
1899 (7 {BEL}, 8 {BS}, 11 {VT}, 12 {FF}, 26 {SUB}, 27 {ESC}).
1900 IN assertion: the fields fc of dyn_ltree are set.
1907 /* black_mask is the bit mask of black-listed bytes
1908 * set bits 0..6, 14..25, and 28..31
1909 * 0xf3ffc07f = binary 11110011111111111100000001111111
1911 unsigned long black_mask = 0xf3ffc07fUL;
1914 // Check for non-textual ("black-listed") bytes.
1915 for(n = 0; n <= 31; n++, black_mask >>= 1)
1916 if((black_mask & 1) && (dyn_ltree_[n].fc != 0))
1919 // Check for textual ("white-listed") bytes. */
1920 if(dyn_ltree_[9].fc != 0 || dyn_ltree_[10].fc != 0
1921 || dyn_ltree_[13].fc != 0)
1923 for(n = 32; n < literals; n++)
1924 if(dyn_ltree_[n].fc != 0)
1927 /* There are no "black-listed" or "white-listed" bytes:
1928 * this stream either is empty or has tolerated ("gray-listed") bytes only.
1933 /* Flush the bit buffer and align the output on a byte boundary
1942 else if(bi_valid_ > 0)
1943 put_byte((Byte)bi_buf_);
1948 /* Flush the bit buffer, keeping at most 7 bits in it.
1961 else if(bi_valid_ >= 8)
1963 put_byte((Byte)bi_buf_);
1969 /* Copy a stored block, storing first the length and its
1970 one's complement if requested.
1976 char *buf, // the input data
1977 unsigned len, // its length
1978 int header) // true if block header must be written
1980 bi_windup(); // align on byte boundary
1984 put_short((std::uint16_t)len);
1985 put_short((std::uint16_t)~len);
1987 // VFALCO Use memcpy?
1992 //------------------------------------------------------------------------------
1994 /* Initialize the tree data structures for a new zlib stream.
2001 l_desc_.dyn_tree = dyn_ltree_;
2002 l_desc_.stat_desc = &lut_.l_desc;
2004 d_desc_.dyn_tree = dyn_dtree_;
2005 d_desc_.stat_desc = &lut_.d_desc;
2007 bl_desc_.dyn_tree = bl_tree_;
2008 bl_desc_.stat_desc = &lut_.bl_desc;
2013 // Initialize the first block of the first file:
2017 /* Send one empty static block to give enough lookahead for inflate.
2018 This takes 10 bits, of which 7 may remain in the bit buffer.
2025 send_bits(STATIC_TREES<<1, 3);
2026 send_code(END_BLOCK, lut_.ltree);
2030 /* Flush the bits in the bit buffer to pending output (leaves at most 7 bits)
2040 /* Send a stored block
2046 char *buf, // input block
2047 std::uint32_t stored_len, // length of input block
2048 int last) // one if this is the last block for a file
2050 send_bits((STORED_BLOCK<<1)+last, 3); // send block type
2051 copy_block(buf, (unsigned)stored_len, 1); // with header
2058 tr_tally_dist(std::uint16_t dist, std::uint8_t len, bool& flush)
2060 d_buf_[last_lit_] = dist;
2061 l_buf_[last_lit_++] = len;
2063 dyn_ltree_[lut_.length_code[len]+literals+1].fc++;
2064 dyn_dtree_[d_code(dist)].fc++;
2065 flush = (last_lit_ == lit_bufsize_-1);
2072 tr_tally_lit(std::uint8_t c, bool& flush)
2074 d_buf_[last_lit_] = 0;
2075 l_buf_[last_lit_++] = c;
2077 flush = (last_lit_ == lit_bufsize_-1);
2080 //------------------------------------------------------------------------------
2082 /* Determine the best encoding for the current block: dynamic trees,
2083 static trees or store, and output the encoded block to the zip file.
2090 char *buf, // input block, or NULL if too old
2091 std::uint32_t stored_len, // length of input block
2092 int last) // one if this is the last block for a file
2094 std::uint32_t opt_lenb;
2095 std::uint32_t static_lenb; // opt_len and static_len in bytes
2096 int max_blindex = 0; // index of last bit length code of non zero freq
2098 // Build the Huffman trees unless a stored block is forced
2101 // Check if the file is binary or text
2102 if(zs.data_type == unknown)
2103 zs.data_type = detect_data_type();
2105 // Construct the literal and distance trees
2106 build_tree((tree_desc *)(&(l_desc_)));
2108 build_tree((tree_desc *)(&(d_desc_)));
2109 /* At this point, opt_len and static_len are the total bit lengths of
2110 * the compressed block data, excluding the tree representations.
2113 /* Build the bit length tree for the above two trees, and get the index
2114 * in bl_order of the last bit length code to send.
2116 max_blindex = build_bl_tree();
2118 /* Determine the best encoding. Compute the block lengths in bytes. */
2119 opt_lenb = (opt_len_+3+7)>>3;
2120 static_lenb = (static_len_+3+7)>>3;
2122 if(static_lenb <= opt_lenb)
2123 opt_lenb = static_lenb;
2127 // VFALCO This assertion fails even in the original ZLib,
2128 // happens with strategy == Z_HUFFMAN_ONLY, see:
2129 // https://github.com/madler/zlib/issues/172
2134 opt_lenb = static_lenb = stored_len + 5; // force a stored block
2138 if(buf != (char*)0) { /* force stored block */
2140 if(stored_len+4 <= opt_lenb && buf != (char*)0) {
2141 /* 4: two words for the lengths */
2143 /* The test buf != NULL is only necessary if LIT_BUFSIZE > WSIZE.
2144 * Otherwise we can't have processed more than WSIZE input bytes since
2145 * the last block flush, because compression would have been
2146 * successful. If LIT_BUFSIZE <= WSIZE, it is never too late to
2147 * transform a block into a stored block.
2149 tr_stored_block(buf, stored_len, last);
2153 else if(static_lenb >= 0)
2155 // force static trees
2158 else if(strategy_ == Strategy::fixed || static_lenb == opt_lenb)
2161 send_bits((STATIC_TREES<<1)+last, 3);
2162 compress_block(lut_.ltree, lut_.dtree);
2166 send_bits((DYN_TREES<<1)+last, 3);
2167 send_all_trees(l_desc_.max_code+1, d_desc_.max_code+1,
2169 compress_block((const ct_data *)dyn_ltree_,
2170 (const ct_data *)dyn_dtree_);
2172 /* The above check is made mod 2^32, for files larger than 512 MB
2173 * and std::size_t implemented on 32 bits.
2184 fill_window(z_params& zs)
2187 unsigned more; // Amount of free space at the end of the window.
2189 uInt wsize = w_size_;
2193 more = (unsigned)(window_size_ -
2194 (std::uint32_t)lookahead_ -(std::uint32_t)strstart_);
2196 // VFALCO We don't support systems below 32-bit
2198 // Deal with !@#$% 64K limit:
2199 if(sizeof(int) <= 2)
2201 if(more == 0 && strstart_ == 0 && lookahead_ == 0)
2205 else if(more == (unsigned)(-1))
2207 /* Very unlikely, but possible on 16 bit machine if
2208 * strstart == 0 && lookahead == 1 (input done a byte at time)
2215 /* If the window is almost full and there is insufficient lookahead,
2216 move the upper half to the lower one to make room in the upper half.
2218 if(strstart_ >= wsize+max_dist())
2220 std::memcpy(window_, window_+wsize, (unsigned)wsize);
2221 match_start_ -= wsize;
2222 strstart_ -= wsize; // we now have strstart >= max_dist
2223 block_start_ -= (long) wsize;
2225 /* Slide the hash table (could be avoided with 32 bit values
2226 at the expense of memory usage). We slide even when level == 0
2227 to keep the hash table consistent if we switch back to level > 0
2228 later. (Using level 0 permanently is not an optimal usage of
2229 zlib, so we don't care about this pathological case.)
2236 *p = (std::uint16_t)(m >= wsize ? m-wsize : 0);
2245 *p = (std::uint16_t)(m >= wsize ? m-wsize : 0);
2246 /* If n is not on any hash chain, prev[n] is garbage but
2247 its value will never be used.
2253 if(zs.avail_in == 0)
2256 /* If there was no sliding:
2257 strstart <= WSIZE+max_dist-1 && lookahead <= kMinLookahead - 1 &&
2258 more == window_size - lookahead - strstart
2259 => more >= window_size - (kMinLookahead-1 + WSIZE + max_dist-1)
2260 => more >= window_size - 2*WSIZE + 2
2261 In the BIG_MEM or MMAP case (not yet supported),
2262 window_size == input_size + kMinLookahead &&
2263 strstart + lookahead_ <= input_size => more >= kMinLookahead.
2264 Otherwise, window_size == 2*WSIZE so more >= 2.
2265 If there was sliding, more >= WSIZE. So in all cases, more >= 2.
2267 n = read_buf(zs, window_ + strstart_ + lookahead_, more);
2270 // Initialize the hash value now that we have some input:
2271 if(lookahead_ + insert_ >= minMatch)
2273 uInt str = strstart_ - insert_;
2274 ins_h_ = window_[str];
2275 update_hash(ins_h_, window_[str + 1]);
2278 update_hash(ins_h_, window_[str + minMatch-1]);
2279 prev_[str & w_mask_] = head_[ins_h_];
2280 head_[ins_h_] = (std::uint16_t)str;
2283 if(lookahead_ + insert_ < minMatch)
2287 /* If the whole input has less than minMatch bytes, ins_h is garbage,
2288 but this is not important since only literal bytes will be emitted.
2291 while(lookahead_ < kMinLookahead && zs.avail_in != 0);
2293 /* If the kWinInit bytes after the end of the current data have never been
2294 written, then zero those bytes in order to avoid memory check reports of
2295 the use of uninitialized (or uninitialised as Julian writes) bytes by
2296 the longest match routines. Update the high water mark for the next
2297 time through here. kWinInit is set to maxMatch since the longest match
2298 routines allow scanning to strstart + maxMatch, ignoring lookahead.
2300 if(high_water_ < window_size_)
2302 std::uint32_t curr = strstart_ + (std::uint32_t)(lookahead_);
2303 std::uint32_t winit;
2305 if(high_water_ < curr)
2307 /* Previous high water mark below current data -- zero kWinInit
2308 bytes or up to end of window, whichever is less.
2310 winit = window_size_ - curr;
2311 if(winit > kWinInit)
2313 std::memset(window_ + curr, 0, (unsigned)winit);
2314 high_water_ = curr + winit;
2316 else if(high_water_ < (std::uint32_t)curr + kWinInit)
2318 /* High water mark at or above current data, but below current data
2319 plus kWinInit -- zero out to current data plus kWinInit, or up
2320 to end of window, whichever is less.
2322 winit = (std::uint32_t)curr + kWinInit - high_water_;
2323 if(winit > window_size_ - high_water_)
2324 winit = window_size_ - high_water_;
2325 std::memset(window_ + high_water_, 0, (unsigned)winit);
2326 high_water_ += winit;
2331 /* Flush as much pending output as possible. All write() output goes
2332 through this function so some applications may wish to modify it
2333 to avoid allocating a large strm->next_out buffer and copying into it.
2334 (See also read_buf()).
2339 flush_pending(z_params& zs)
2342 auto len = clamp(pending_, zs.avail_out);
2346 std::memcpy(zs.next_out, pending_out_, len);
2348 static_cast<std::uint8_t*>(zs.next_out) + len;
2349 pending_out_ += len;
2350 zs.total_out += len;
2351 zs.avail_out -= len;
2354 pending_out_ = pending_buf_;
2357 /* Flush the current block, with given end-of-file flag.
2358 IN assertion: strstart is set to the end of the current match.
2364 flush_block(z_params& zs, bool last)
2367 (block_start_ >= 0L ?
2368 (char *)&window_[(unsigned)block_start_] :
2370 (std::uint32_t)((long)strstart_ - block_start_),
2372 block_start_ = strstart_;
2376 /* Read a new buffer from the current input stream, update the adler32
2377 and total number of bytes read. All write() input goes through
2378 this function so some applications may wish to modify it to avoid
2379 allocating a large strm->next_in buffer and copying from it.
2380 (See also flush_pending()).
2385 read_buf(z_params& zs, Byte *buf, unsigned size)
2387 auto len = clamp(zs.avail_in, size);
2393 std::memcpy(buf, zs.next_in, len);
2394 zs.next_in = static_cast<
2395 std::uint8_t const*>(zs.next_in) + len;
2400 /* Set match_start to the longest match starting at the given string and
2401 return its length. Matches shorter or equal to prev_length are discarded,
2402 in which case the result is equal to prev_length and match_start is
2404 IN assertions: cur_match is the head of the hash chain for the current
2405 string (strstart) and its distance is <= max_dist, and prev_length >= 1
2406 OUT assertion: the match length is not greater than s->lookahead_.
2408 For 80x86 and 680x0, an optimized version will be provided in match.asm or
2409 match.S. The code will be functionally equivalent.
2414 longest_match(IPos cur_match)
2416 unsigned chain_length = max_chain_length_;/* max hash chain length */
2417 Byte *scan = window_ + strstart_; /* current string */
2418 Byte *match; /* matched string */
2419 int len; /* length of current match */
2420 int best_len = prev_length_; /* best match length so far */
2421 int nice_match = nice_match_; /* stop if match long enough */
2422 IPos limit = strstart_ > (IPos)max_dist() ?
2423 strstart_ - (IPos)max_dist() : 0;
2424 /* Stop when cur_match becomes <= limit. To simplify the code,
2425 * we prevent matches with the string of window index 0.
2427 std::uint16_t *prev = prev_;
2428 uInt wmask = w_mask_;
2430 Byte *strend = window_ + strstart_ + maxMatch;
2431 Byte scan_end1 = scan[best_len-1];
2432 Byte scan_end = scan[best_len];
2434 /* The code is optimized for HASH_BITS >= 8 and maxMatch-2 multiple of 16.
2435 * It is easy to get rid of this optimization if necessary.
2437 BOOST_ASSERT(hash_bits_ >= 8 && maxMatch == 258);
2439 /* Do not waste too much time if we already have a good match: */
2440 if(prev_length_ >= good_match_) {
2443 /* Do not look for matches beyond the end of the input. This is necessary
2444 * to make deflate deterministic.
2446 if((uInt)nice_match > lookahead_)
2447 nice_match = lookahead_;
2449 BOOST_ASSERT((std::uint32_t)strstart_ <= window_size_-kMinLookahead);
2452 BOOST_ASSERT(cur_match < strstart_);
2453 match = window_ + cur_match;
2455 /* Skip to next match if the match length cannot increase
2456 * or if the match length is less than 2. Note that the checks below
2457 * for insufficient lookahead only occur occasionally for performance
2458 * reasons. Therefore uninitialized memory will be accessed, and
2459 * conditional jumps will be made that depend on those values.
2460 * However the length of the match is limited to the lookahead, so
2461 * the output of deflate is not affected by the uninitialized values.
2463 if( match[best_len] != scan_end ||
2464 match[best_len-1] != scan_end1 ||
2466 *++match != scan[1])
2469 /* The check at best_len-1 can be removed because it will be made
2470 * again later. (This heuristic is not always a win.)
2471 * It is not necessary to compare scan[2] and match[2] since they
2472 * are always equal when the other bytes match, given that
2473 * the hash keys are equal and that HASH_BITS >= 8.
2476 BOOST_ASSERT(*scan == *match);
2478 /* We check for insufficient lookahead only every 8th comparison;
2479 * the 256th check will be made at strstart+258.
2484 while( *++scan == *++match && *++scan == *++match &&
2485 *++scan == *++match && *++scan == *++match &&
2486 *++scan == *++match && *++scan == *++match &&
2487 *++scan == *++match && *++scan == *++match &&
2490 BOOST_ASSERT(scan <= window_+(unsigned)(window_size_-1));
2492 len = maxMatch - (int)(strend - scan);
2493 scan = strend - maxMatch;
2495 if(len > best_len) {
2496 match_start_ = cur_match;
2498 if(len >= nice_match) break;
2499 scan_end1 = scan[best_len-1];
2500 scan_end = scan[best_len];
2503 while((cur_match = prev[cur_match & wmask]) > limit
2504 && --chain_length != 0);
2506 if((uInt)best_len <= lookahead_)
2507 return (uInt)best_len;
2511 //------------------------------------------------------------------------------
2513 /* Copy without compression as much as possible from the input stream, return
2514 the current block state.
2515 This function does not insert new strings in the dictionary since
2516 uncompressible data is probably not useful. This function is used
2517 only for the level=0 compression option.
2518 NOTE: this function should be optimized to avoid extra copying from
2519 window to pending_buf.
2525 f_stored(z_params& zs, Flush flush) ->
2528 /* Stored blocks are limited to 0xffff bytes, pending_buf is limited
2529 * to pending_buf_size, and each stored block has a 5 byte header:
2531 std::uint32_t max_block_size = 0xffff;
2532 std::uint32_t max_start;
2534 if(max_block_size > pending_buf_size_ - 5) {
2535 max_block_size = pending_buf_size_ - 5;
2538 /* Copy as much as possible from input to output: */
2540 /* Fill the window as much as possible: */
2541 if(lookahead_ <= 1) {
2543 BOOST_ASSERT(strstart_ < w_size_+max_dist() ||
2544 block_start_ >= (long)w_size_);
2547 if(lookahead_ == 0 && flush == Flush::none)
2550 if(lookahead_ == 0) break; /* flush the current block */
2552 BOOST_ASSERT(block_start_ >= 0L);
2554 strstart_ += lookahead_;
2557 /* Emit a stored block if pending_buf will be full: */
2558 max_start = block_start_ + max_block_size;
2559 if(strstart_ == 0 || (std::uint32_t)strstart_ >= max_start) {
2560 /* strstart == 0 is possible when wraparound on 16-bit machine */
2561 lookahead_ = (uInt)(strstart_ - max_start);
2562 strstart_ = (uInt)max_start;
2563 flush_block(zs, false);
2564 if(zs.avail_out == 0)
2567 /* Flush if we may have to slide, otherwise block_start may become
2568 * negative and the data will be gone:
2570 if(strstart_ - (uInt)block_start_ >= max_dist()) {
2571 flush_block(zs, false);
2572 if(zs.avail_out == 0)
2577 if(flush == Flush::finish)
2579 flush_block(zs, true);
2580 if(zs.avail_out == 0)
2581 return finish_started;
2584 if((long)strstart_ > block_start_)
2586 flush_block(zs, false);
2587 if(zs.avail_out == 0)
2593 /* Compress as much as possible from the input stream, return the current
2595 This function does not perform lazy evaluation of matches and inserts
2596 new strings in the dictionary only for unmatched strings or for short
2597 matches. It is used only for the fast compression options.
2603 f_fast(z_params& zs, Flush flush) ->
2606 IPos hash_head; /* head of the hash chain */
2607 bool bflush; /* set if current block must be flushed */
2611 /* Make sure that we always have enough lookahead, except
2612 * at the end of the input file. We need maxMatch bytes
2613 * for the next match, plus minMatch bytes to insert the
2614 * string following the next match.
2616 if(lookahead_ < kMinLookahead)
2619 if(lookahead_ < kMinLookahead && flush == Flush::none)
2622 break; /* flush the current block */
2625 /* Insert the string window[strstart .. strstart+2] in the
2626 * dictionary, and set hash_head to the head of the hash chain:
2629 if(lookahead_ >= minMatch) {
2630 insert_string(hash_head);
2633 /* Find the longest match, discarding those <= prev_length.
2634 * At this point we have always match_length < minMatch
2636 if(hash_head != 0 && strstart_ - hash_head <= max_dist()) {
2637 /* To simplify the code, we prevent matches with the string
2638 * of window index 0 (in particular we have to avoid a match
2639 * of the string with itself at the start of the input file).
2641 match_length_ = longest_match (hash_head);
2642 /* longest_match() sets match_start */
2644 if(match_length_ >= minMatch)
2646 tr_tally_dist(static_cast<std::uint16_t>(strstart_ - match_start_),
2647 static_cast<std::uint8_t>(match_length_ - minMatch), bflush);
2649 lookahead_ -= match_length_;
2651 /* Insert new strings in the hash table only if the match length
2652 * is not too large. This saves time but degrades compression.
2654 if(match_length_ <= max_lazy_match_ &&
2655 lookahead_ >= minMatch) {
2656 match_length_--; /* string at strstart already in table */
2660 insert_string(hash_head);
2661 /* strstart never exceeds WSIZE-maxMatch, so there are
2662 * always minMatch bytes ahead.
2665 while(--match_length_ != 0);
2670 strstart_ += match_length_;
2672 ins_h_ = window_[strstart_];
2673 update_hash(ins_h_, window_[strstart_+1]);
2674 /* If lookahead < minMatch, ins_h is garbage, but it does not
2675 * matter since it will be recomputed at next deflate call.
2681 /* No match, output a literal byte */
2682 tr_tally_lit(window_[strstart_], bflush);
2688 flush_block(zs, false);
2689 if(zs.avail_out == 0)
2693 insert_ = strstart_ < minMatch-1 ? strstart_ : minMatch-1;
2694 if(flush == Flush::finish)
2696 flush_block(zs, true);
2697 if(zs.avail_out == 0)
2698 return finish_started;
2703 flush_block(zs, false);
2704 if(zs.avail_out == 0)
2710 /* Same as above, but achieves better compression. We use a lazy
2711 evaluation for matches: a match is finally adopted only if there is
2712 no better match at the next window position.
2718 f_slow(z_params& zs, Flush flush) ->
2721 IPos hash_head; /* head of hash chain */
2722 bool bflush; /* set if current block must be flushed */
2724 /* Process the input block. */
2727 /* Make sure that we always have enough lookahead, except
2728 * at the end of the input file. We need maxMatch bytes
2729 * for the next match, plus minMatch bytes to insert the
2730 * string following the next match.
2732 if(lookahead_ < kMinLookahead)
2735 if(lookahead_ < kMinLookahead && flush == Flush::none)
2738 break; /* flush the current block */
2741 /* Insert the string window[strstart .. strstart+2] in the
2742 * dictionary, and set hash_head to the head of the hash chain:
2745 if(lookahead_ >= minMatch)
2746 insert_string(hash_head);
2748 /* Find the longest match, discarding those <= prev_length.
2750 prev_length_ = match_length_, prev_match_ = match_start_;
2751 match_length_ = minMatch-1;
2753 if(hash_head != 0 && prev_length_ < max_lazy_match_ &&
2754 strstart_ - hash_head <= max_dist())
2756 /* To simplify the code, we prevent matches with the string
2757 * of window index 0 (in particular we have to avoid a match
2758 * of the string with itself at the start of the input file).
2760 match_length_ = longest_match(hash_head);
2761 /* longest_match() sets match_start */
2763 if(match_length_ <= 5 && (strategy_ == Strategy::filtered
2764 || (match_length_ == minMatch &&
2765 strstart_ - match_start_ > kTooFar)
2768 /* If prev_match is also minMatch, match_start is garbage
2769 * but we will ignore the current match anyway.
2771 match_length_ = minMatch-1;
2774 /* If there was a match at the previous step and the current
2775 * match is not better, output the previous match:
2777 if(prev_length_ >= minMatch && match_length_ <= prev_length_)
2779 /* Do not insert strings in hash table beyond this. */
2780 uInt max_insert = strstart_ + lookahead_ - minMatch;
2783 static_cast<std::uint16_t>(strstart_ -1 - prev_match_),
2784 static_cast<std::uint8_t>(prev_length_ - minMatch), bflush);
2786 /* Insert in hash table all strings up to the end of the match.
2787 * strstart-1 and strstart are already inserted. If there is not
2788 * enough lookahead, the last two strings are not inserted in
2791 lookahead_ -= prev_length_-1;
2794 if(++strstart_ <= max_insert)
2795 insert_string(hash_head);
2797 while(--prev_length_ != 0);
2798 match_available_ = 0;
2799 match_length_ = minMatch-1;
2804 flush_block(zs, false);
2805 if(zs.avail_out == 0)
2810 else if(match_available_)
2812 /* If there was no match at the previous position, output a
2813 * single literal. If there was a match but the current match
2814 * is longer, truncate the previous match to a single literal.
2816 tr_tally_lit(window_[strstart_-1], bflush);
2818 flush_block(zs, false);
2821 if(zs.avail_out == 0)
2826 /* There is no previous match to compare with, wait for
2827 * the next step to decide.
2829 match_available_ = 1;
2834 BOOST_ASSERT(flush != Flush::none);
2835 if(match_available_)
2837 tr_tally_lit(window_[strstart_-1], bflush);
2838 match_available_ = 0;
2840 insert_ = strstart_ < minMatch-1 ? strstart_ : minMatch-1;
2841 if(flush == Flush::finish)
2843 flush_block(zs, true);
2844 if(zs.avail_out == 0)
2845 return finish_started;
2850 flush_block(zs, false);
2851 if(zs.avail_out == 0)
2857 /* For Strategy::rle, simply look for runs of bytes, generate matches only of distance
2858 one. Do not maintain a hash table. (It will be regenerated if this run of
2859 deflate switches away from Strategy::rle.)
2865 f_rle(z_params& zs, Flush flush) ->
2868 bool bflush; // set if current block must be flushed
2869 uInt prev; // byte at distance one to match
2870 Byte *scan, *strend; // scan goes up to strend for length of run
2874 /* Make sure that we always have enough lookahead, except
2875 * at the end of the input file. We need maxMatch bytes
2876 * for the longest run, plus one for the unrolled loop.
2878 if(lookahead_ <= maxMatch) {
2880 if(lookahead_ <= maxMatch && flush == Flush::none) {
2883 if(lookahead_ == 0) break; /* flush the current block */
2886 /* See how many times the previous byte repeats */
2888 if(lookahead_ >= minMatch && strstart_ > 0) {
2889 scan = window_ + strstart_ - 1;
2891 if(prev == *++scan && prev == *++scan && prev == *++scan) {
2892 strend = window_ + strstart_ + maxMatch;
2894 } while(prev == *++scan && prev == *++scan &&
2895 prev == *++scan && prev == *++scan &&
2896 prev == *++scan && prev == *++scan &&
2897 prev == *++scan && prev == *++scan &&
2899 match_length_ = maxMatch - (int)(strend - scan);
2900 if(match_length_ > lookahead_)
2901 match_length_ = lookahead_;
2903 BOOST_ASSERT(scan <= window_+(uInt)(window_size_-1));
2906 /* Emit match if have run of minMatch or longer, else emit literal */
2907 if(match_length_ >= minMatch) {
2908 tr_tally_dist(std::uint16_t{1},
2909 static_cast<std::uint8_t>(match_length_ - minMatch),
2912 lookahead_ -= match_length_;
2913 strstart_ += match_length_;
2916 /* No match, output a literal byte */
2917 tr_tally_lit(window_[strstart_], bflush);
2923 flush_block(zs, false);
2924 if(zs.avail_out == 0)
2929 if(flush == Flush::finish)
2931 flush_block(zs, true);
2932 if(zs.avail_out == 0)
2933 return finish_started;
2938 flush_block(zs, false);
2939 if(zs.avail_out == 0)
2945 /* ===========================================================================
2946 * For Strategy::huffman, do not look for matches. Do not maintain a hash table.
2947 * (It will be regenerated if this run of deflate switches away from Huffman.)
2953 f_huff(z_params& zs, Flush flush) ->
2956 bool bflush; // set if current block must be flushed
2960 // Make sure that we have a literal to write.
2966 if(flush == Flush::none)
2968 break; // flush the current block
2972 // Output a literal byte
2974 tr_tally_lit(window_[strstart_], bflush);
2979 flush_block(zs, false);
2980 if(zs.avail_out == 0)
2985 if(flush == Flush::finish)
2987 flush_block(zs, true);
2988 if(zs.avail_out == 0)
2989 return finish_started;
2994 flush_block(zs, false);
2995 if(zs.avail_out == 0)