ceph/src/boost/boost/beast/zlib/detail/deflate_stream.hpp

   1 //
   2 // Copyright (c) 2016-2017 Vinnie Falco (vinnie dot falco at gmail dot com)
   3 //
   4 // Distributed under the Boost Software License, Version 1.0. (See accompanying
   5 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
   6 //
   7 // Official repository: https://github.com/boostorg/beast
   8 //
   9 // This is a derivative work based on Zlib, copyright below:
  10 /*
  11     Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler
  12
  13     This software is provided 'as-is', without any express or implied
  14     warranty.  In no event will the authors be held liable for any damages
  15     arising from the use of this software.
  16
  17     Permission is granted to anyone to use this software for any purpose,
  18     including commercial applications, and to alter it and redistribute it
  19     freely, subject to the following restrictions:
  20
  21     1. The origin of this software must not be misrepresented; you must not
  22        claim that you wrote the original software. If you use this software
  23        in a product, an acknowledgment in the product documentation would be
  24        appreciated but is not required.
  25     2. Altered source versions must be plainly marked as such, and must not be
  26        misrepresented as being the original software.
  27     3. This notice may not be removed or altered from any source distribution.
  28
  29     Jean-loup Gailly        Mark Adler
  30     jloup@gzip.org          madler@alumni.caltech.edu
  31
  32     The data format used by the zlib library is described by RFCs (Request for
  33     Comments) 1950 to 1952 in the files http://tools.ietf.org/html/rfc1950
  34     (zlib format), rfc1951 (deflate format) and rfc1952 (gzip format).
  35 */
  36
  37 #ifndef BOOST_BEAST_ZLIB_DETAIL_DEFLATE_STREAM_HPP
  38 #define BOOST_BEAST_ZLIB_DETAIL_DEFLATE_STREAM_HPP
  39
  40 #include <boost/beast/zlib/zlib.hpp>
  41 #include <boost/beast/zlib/detail/ranges.hpp>
  42 #include <boost/beast/core/detail/type_traits.hpp>
  43 #include <boost/assert.hpp>
  44 #include <boost/config.hpp>
  45 #include <boost/make_unique.hpp>
  46 #include <boost/optional.hpp>
  47 #include <boost/throw_exception.hpp>
  48 #include <cstdint>
  49 #include <cstdlib>
  50 #include <cstring>
  51 #include <memory>
  52 #include <stdexcept>
  53 #include <type_traits>
  54
  55 namespace boost {
  56 namespace beast {
  57 namespace zlib {
  58 namespace detail {
  59
  60 /*
  61  *  ALGORITHM
  62  *
  63  *      The "deflation" process depends on being able to identify portions
  64  *      of the input text which are identical to earlier input (within a
  65  *      sliding window trailing behind the input currently being processed).
  66  *
  67  *      Each code tree is stored in a compressed form which is itself
  68  *      a Huffman encoding of the lengths of all the code strings (in
  69  *      ascending order by source values).  The actual code strings are
  70  *      reconstructed from the lengths in the inflate process, as described
  71  *      in the deflate specification.
  72  *
  73  *      The most straightforward technique turns out to be the fastest for
  74  *      most input files: try all possible matches and select the longest.
  75  *      The key feature of this algorithm is that insertions into the string
  76  *      dictionary are very simple and thus fast, and deletions are avoided
  77  *      completely. Insertions are performed at each input character, whereas
  78  *      string matches are performed only when the previous match ends. So it
  79  *      is preferable to spend more time in matches to allow very fast string
  80  *      insertions and avoid deletions. The matching algorithm for small
  81  *      strings is inspired from that of Rabin & Karp. A brute force approach
  82  *      is used to find longer strings when a small match has been found.
  83  *      A similar algorithm is used in comic (by Jan-Mark Wams) and freeze
  84  *      (by Leonid Broukhis).
  85  *         A previous version of this file used a more sophisticated algorithm
  86  *      (by Fiala and Greene) which is guaranteed to run in linear amortized
  87  *      time, but has a larger average cost, uses more memory and is patented.
  88  *      However the F&G algorithm may be faster for some highly redundant
  89  *      files if the parameter max_chain_length (described below) is too large.
  90  *
  91  *  ACKNOWLEDGEMENTS
  92  *
  93  *      The idea of lazy evaluation of matches is due to Jan-Mark Wams, and
  94  *      I found it in 'freeze' written by Leonid Broukhis.
  95  *      Thanks to many people for bug reports and testing.
  96  *
  97  *  REFERENCES
  98  *
  99  *      Deutsch, L.P.,"DEFLATE Compressed Data Format Specification".
 100  *      Available in http://tools.ietf.org/html/rfc1951
 101  *
 102  *      A description of the Rabin and Karp algorithm is given in the book
 103  *         "Algorithms" by R. Sedgewick, Addison-Wesley, p252.
 104  *
 105  *      Fiala,E.R., and Greene,D.H.
 106  *         Data Compression with Finite Windows, Comm.ACM, 32,4 (1989) 490-595
 107  *
 108  */
 109
 110 class deflate_stream
 111 {
 112 protected:
 113     // Upper limit on code length
 114     static std::uint8_t constexpr maxBits = 15;
 115
 116     // Number of length codes, not counting the special END_BLOCK code
 117     static std::uint16_t constexpr lengthCodes = 29;
 118
 119     // Number of literal bytes 0..255
 120     static std::uint16_t constexpr literals = 256;
 121
 122     // Number of Literal or Length codes, including the END_BLOCK code
 123     static std::uint16_t constexpr lCodes = literals + 1 + lengthCodes;
 124
 125     // Number of distance code lengths
 126     static std::uint16_t constexpr dCodes = 30;
 127
 128     // Number of codes used to transfer the bit lengths
 129     static std::uint16_t constexpr blCodes = 19;
 130
 131     // Number of distance codes
 132     static std::uint16_t constexpr distCodeLen = 512;
 133
 134     // Size limit on bit length codes
 135     static std::uint8_t constexpr maxBlBits= 7;
 136
 137     static std::uint16_t constexpr minMatch = 3;
 138     static std::uint16_t constexpr maxMatch = 258;
 139
 140     // Can't change minMatch without also changing code, see original zlib
 141     BOOST_STATIC_ASSERT(minMatch == 3);
 142
 143     // end of block literal code
 144     static std::uint16_t constexpr END_BLOCK = 256;
 145
 146     // repeat previous bit length 3-6 times (2 bits of repeat count)
 147     static std::uint8_t constexpr REP_3_6 = 16;
 148
 149     // repeat a zero length 3-10 times  (3 bits of repeat count)
 150     static std::uint8_t constexpr REPZ_3_10 = 17;
 151
 152     // repeat a zero length 11-138 times  (7 bits of repeat count)
 153     static std::uint8_t constexpr REPZ_11_138 = 18;
 154
 155     // The three kinds of block type
 156     static std::uint8_t constexpr STORED_BLOCK = 0;
 157     static std::uint8_t constexpr STATIC_TREES = 1;
 158     static std::uint8_t constexpr DYN_TREES    = 2;
 159
 160     // Maximum value for memLevel in deflateInit2
 161     static std::uint8_t constexpr max_mem_level = 9;
 162
 163     // Default memLevel
 164     static std::uint8_t constexpr DEF_MEM_LEVEL = max_mem_level;
 165
 166     /*  Note: the deflate() code requires max_lazy >= minMatch and max_chain >= 4
 167         For deflate_fast() (levels <= 3) good is ignored and lazy has a different
 168         meaning.
 169     */
 170
 171     // maximum heap size
 172     static std::uint16_t constexpr HEAP_SIZE = 2 * lCodes + 1;
 173
 174     // size of bit buffer in bi_buf
 175     static std::uint8_t constexpr Buf_size = 16;
 176
 177     // Matches of length 3 are discarded if their distance exceeds kTooFar
 178     static std::size_t constexpr kTooFar = 4096;
 179
 180     /*  Minimum amount of lookahead, except at the end of the input file.
 181         See deflate.c for comments about the minMatch+1.
 182     */
 183     static std::size_t constexpr kMinLookahead = maxMatch + minMatch+1;
 184
 185     /*  Number of bytes after end of data in window to initialize in order
 186         to avoid memory checker errors from longest match routines
 187     */
 188     static std::size_t constexpr kWinInit = maxMatch;
 189
 190     // Describes a single value and its code string.
 191     struct ct_data
 192     {
 193         std::uint16_t fc; // frequency count or bit string
 194         std::uint16_t dl; // parent node in tree or length of bit string
 195
 196         bool
 197         operator==(ct_data const& rhs) const
 198         {
 199             return fc == rhs.fc && dl == rhs.dl;
 200         }
 201     };
 202
 203     struct static_desc
 204     {
 205         ct_data const*      static_tree;// static tree or NULL
 206         std::uint8_t const* extra_bits; // extra bits for each code or NULL
 207         std::uint16_t       extra_base; // base index for extra_bits
 208         std::uint16_t       elems;      //  max number of elements in the tree
 209         std::uint8_t        max_length; // max bit length for the codes
 210     };
 211
 212     struct lut_type
 213     {
 214         // Number of extra bits for each length code
 215         std::uint8_t const extra_lbits[lengthCodes] = {
 216             0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0
 217         };
 218
 219         // Number of extra bits for each distance code
 220         std::uint8_t const extra_dbits[dCodes] = {
 221             0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13
 222         };
 223
 224         // Number of extra bits for each bit length code
 225         std::uint8_t const extra_blbits[blCodes] = {
 226             0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,7
 227         };
 228
 229         // The lengths of the bit length codes are sent in order
 230         // of decreasing probability, to avoid transmitting the
 231         // lengths for unused bit length codes.
 232         std::uint8_t const bl_order[blCodes] = {
 233             16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15
 234         };
 235
 236         ct_data ltree[lCodes + 2];
 237
 238         ct_data dtree[dCodes];
 239
 240         // Distance codes. The first 256 values correspond to the distances
 241         // 3 .. 258, the last 256 values correspond to the top 8 bits of
 242         // the 15 bit distances.
 243         std::uint8_t dist_code[distCodeLen];
 244
 245         std::uint8_t length_code[maxMatch-minMatch+1];
 246
 247         std::uint8_t base_length[lengthCodes];
 248
 249         std::uint16_t base_dist[dCodes];
 250
 251         static_desc l_desc = {
 252             ltree, extra_lbits, literals+1, lCodes, maxBits
 253         };
 254
 255         static_desc d_desc = {
 256             dtree, extra_dbits, 0, dCodes, maxBits
 257         };
 258
 259         static_desc bl_desc =
 260         {
 261             nullptr, extra_blbits, 0, blCodes, maxBlBits
 262         };
 263     };
 264
 265     struct tree_desc
 266     {
 267         ct_data *dyn_tree;           /* the dynamic tree */
 268         int     max_code;            /* largest code with non zero frequency */
 269         static_desc const* stat_desc; /* the corresponding static tree */
 270     };
 271
 272     enum block_state
 273     {
 274         need_more,      /* block not completed, need more input or more output */
 275         block_done,     /* block flush performed */
 276         finish_started, /* finish started, need only more output at next deflate */
 277         finish_done     /* finish done, accept no more input or output */
 278     };
 279
 280     // VFALCO This might not be needed, e.g. for zip/gzip
 281     enum StreamStatus
 282     {
 283         EXTRA_STATE = 69,
 284         NAME_STATE = 73,
 285         COMMENT_STATE = 91,
 286         HCRC_STATE = 103,
 287         BUSY_STATE = 113,
 288         FINISH_STATE = 666
 289     };
 290
 291     /* A std::uint16_t is an index in the character window. We use short instead of int to
 292      * save space in the various tables. IPos is used only for parameter passing.
 293      */
 294     using IPos = unsigned;
 295
 296     using self = deflate_stream;
 297     typedef block_state(self::*compress_func)(z_params& zs, Flush flush);
 298
 299     //--------------------------------------------------------------------------
 300
 301     lut_type const& lut_;
 302
 303     bool inited_ = false;
 304     std::size_t buf_size_;
 305     std::unique_ptr<std::uint8_t[]> buf_;
 306
 307     int status_;                    // as the name implies
 308     Byte* pending_buf_;             // output still pending
 309     std::uint32_t
 310         pending_buf_size_;          // size of pending_buf
 311     Byte* pending_out_;             // next pending byte to output to the stream
 312     uInt pending_;                  // nb of bytes in the pending buffer
 313     boost::optional<Flush>
 314         last_flush_;                // value of flush param for previous deflate call
 315
 316     uInt w_size_;                   // LZ77 window size (32K by default)
 317     uInt w_bits_;                   // log2(w_size)  (8..16)
 318     uInt w_mask_;                   // w_size - 1
 319
 320     /*  Sliding window. Input bytes are read into the second half of the window,
 321         and move to the first half later to keep a dictionary of at least wSize
 322         bytes. With this organization, matches are limited to a distance of
 323         wSize-maxMatch bytes, but this ensures that IO is always
 324         performed with a length multiple of the block size. Also, it limits
 325         the window size to 64K.
 326         To do: use the user input buffer as sliding window.
 327     */
 328     Byte *window_ = nullptr;
 329
 330     /*  Actual size of window: 2*wSize, except when the user input buffer
 331         is directly used as sliding window.
 332     */
 333     std::uint32_t window_size_;
 334
 335     /*  Link to older string with same hash index. To limit the size of this
 336         array to 64K, this link is maintained only for the last 32K strings.
 337         An index in this array is thus a window index modulo 32K.
 338     */
 339     std::uint16_t* prev_;
 340
 341     std::uint16_t* head_;           // Heads of the hash chains or 0
 342
 343     uInt  ins_h_;                   // hash index of string to be inserted
 344     uInt  hash_size_;               // number of elements in hash table
 345     uInt  hash_bits_;               // log2(hash_size)
 346     uInt  hash_mask_;               // hash_size-1
 347
 348     /*  Number of bits by which ins_h must be shifted at each input
 349         step. It must be such that after minMatch steps,
 350         the oldest byte no longer takes part in the hash key, that is:
 351         hash_shift * minMatch >= hash_bits
 352     */
 353     uInt hash_shift_;
 354
 355     /*  Window position at the beginning of the current output block.
 356         Gets negative when the window is moved backwards.
 357     */
 358     long block_start_;
 359
 360     uInt match_length_;             // length of best match
 361     IPos prev_match_;               // previous match
 362     int match_available_;           // set if previous match exists
 363     uInt strstart_;                 // start of string to insert
 364     uInt match_start_;              // start of matching string
 365     uInt lookahead_;                // number of valid bytes ahead in window
 366
 367     /*  Length of the best match at previous step. Matches not greater
 368         than this are discarded. This is used in the lazy match evaluation.
 369     */
 370     uInt prev_length_;
 371
 372     /*  To speed up deflation, hash chains are never searched beyond
 373         this length. A higher limit improves compression ratio but
 374         degrades the speed.
 375     */
 376     uInt max_chain_length_;
 377
 378     /*  Attempt to find a better match only when the current match is strictly
 379         smaller than this value. This mechanism is used only for compression
 380         levels >= 4.
 381
 382         OR Insert new strings in the hash table only if the match length is not
 383         greater than this length. This saves time but degrades compression.
 384         used only for compression levels <= 3.
 385     */
 386     uInt max_lazy_match_;
 387
 388     int level_;                     // compression level (1..9)
 389     Strategy strategy_;             // favor or force Huffman coding
 390
 391     // Use a faster search when the previous match is longer than this
 392     uInt good_match_;
 393
 394     int nice_match_;                // Stop searching when current match exceeds this
 395
 396     ct_data dyn_ltree_[
 397         HEAP_SIZE];                 // literal and length tree
 398     ct_data dyn_dtree_[
 399         2*dCodes+1];                // distance tree
 400     ct_data bl_tree_[
 401         2*blCodes+1];               // Huffman tree for bit lengths
 402
 403     tree_desc l_desc_;              // desc. for literal tree
 404     tree_desc d_desc_;              // desc. for distance tree
 405     tree_desc bl_desc_;             // desc. for bit length tree
 406
 407     // number of codes at each bit length for an optimal tree
 408     std::uint16_t bl_count_[maxBits+1];
 409
 410     // Index within the heap array of least frequent node in the Huffman tree
 411     static std::size_t constexpr kSmallest = 1;
 412
 413     /*  The sons of heap[n] are heap[2*n] and heap[2*n+1].
 414         heap[0] is not used. The same heap array is used to build all trees.
 415     */
 416
 417     int heap_[2*lCodes+1];          // heap used to build the Huffman trees
 418     int heap_len_;                  // number of elements in the heap
 419     int heap_max_;                  // element of largest frequency
 420
 421     // Depth of each subtree used as tie breaker for trees of equal frequency
 422     std::uint8_t depth_[2*lCodes+1];
 423
 424     std::uint8_t *l_buf_;           // buffer for literals or lengths
 425
 426     /*  Size of match buffer for literals/lengths.
 427         There are 4 reasons for limiting lit_bufsize to 64K:
 428           - frequencies can be kept in 16 bit counters
 429           - if compression is not successful for the first block, all input
 430             data is still in the window so we can still emit a stored block even
 431             when input comes from standard input.  (This can also be done for
 432             all blocks if lit_bufsize is not greater than 32K.)
 433           - if compression is not successful for a file smaller than 64K, we can
 434             even emit a stored file instead of a stored block (saving 5 bytes).
 435             This is applicable only for zip (not gzip or zlib).
 436           - creating new Huffman trees less frequently may not provide fast
 437             adaptation to changes in the input data statistics. (Take for
 438             example a binary file with poorly compressible code followed by
 439             a highly compressible string table.) Smaller buffer sizes give
 440             fast adaptation but have of course the overhead of transmitting
 441             trees more frequently.
 442           - I can't count above 4
 443     */
 444     uInt lit_bufsize_;
 445     uInt last_lit_;                 // running index in l_buf_
 446
 447     /*  Buffer for distances. To simplify the code, d_buf_ and l_buf_
 448         have the same number of elements. To use different lengths, an
 449         extra flag array would be necessary.
 450     */
 451     std::uint16_t* d_buf_;
 452
 453     std::uint32_t opt_len_;         // bit length of current block with optimal trees
 454     std::uint32_t static_len_;      // bit length of current block with static trees
 455     uInt matches_;                  // number of string matches in current block
 456     uInt insert_;                   // bytes at end of window left to insert
 457
 458     /*  Output buffer.
 459         Bits are inserted starting at the bottom (least significant bits).
 460      */
 461     std::uint16_t bi_buf_;
 462
 463     /*  Number of valid bits in bi_buf._  All bits above the last valid
 464         bit are always zero.
 465     */
 466     int bi_valid_;
 467
 468     /*  High water mark offset in window for initialized bytes -- bytes
 469         above this are set to zero in order to avoid memory check warnings
 470         when longest match routines access bytes past the input.  This is
 471         then updated to the new high water mark.
 472     */
 473     std::uint32_t high_water_;
 474
 475     //--------------------------------------------------------------------------
 476
 477     deflate_stream()
 478         : lut_(get_lut())
 479     {
 480     }
 481
 482     /*  In order to simplify the code, particularly on 16 bit machines, match
 483         distances are limited to MAX_DIST instead of WSIZE.
 484     */
 485     std::size_t
 486     max_dist() const
 487     {
 488         return w_size_ - kMinLookahead;
 489     }
 490
 491     void
 492     put_byte(std::uint8_t c)
 493     {
 494         pending_buf_[pending_++] = c;
 495     }
 496
 497     void
 498     put_short(std::uint16_t w)
 499     {
 500         put_byte(w & 0xff);
 501         put_byte(w >> 8);
 502     }
 503
 504     /*  Send a value on a given number of bits.
 505         IN assertion: length <= 16 and value fits in length bits.
 506     */
 507     void
 508     send_bits(int value, int length)
 509     {
 510         if(bi_valid_ > (int)Buf_size - length)
 511         {
 512             bi_buf_ |= (std::uint16_t)value << bi_valid_;
 513             put_short(bi_buf_);
 514             bi_buf_ = (std::uint16_t)value >> (Buf_size - bi_valid_);
 515             bi_valid_ += length - Buf_size;
 516         }
 517         else
 518         {
 519             bi_buf_ |= (std::uint16_t)(value) << bi_valid_;
 520             bi_valid_ += length;
 521         }
 522     }
 523
 524     // Send a code of the given tree
 525     void
 526     send_code(int value, ct_data const* tree)
 527     {
 528         send_bits(tree[value].fc, tree[value].dl);
 529     }
 530
 531     /*  Mapping from a distance to a distance code. dist is the
 532         distance - 1 and must not have side effects. _dist_code[256]
 533         and _dist_code[257] are never used.
 534     */
 535     std::uint8_t
 536     d_code(unsigned dist)
 537     {
 538         if(dist < 256)
 539             return lut_.dist_code[dist];
 540         return lut_.dist_code[256+(dist>>7)];
 541     }
 542
 543     /*  Update a hash value with the given input byte
 544         IN  assertion: all calls to to update_hash are made with
 545             consecutive input characters, so that a running hash
 546             key can be computed from the previous key instead of
 547             complete recalculation each time.
 548     */
 549     void
 550     update_hash(uInt& h, std::uint8_t c)
 551     {
 552         h = ((h << hash_shift_) ^ c) & hash_mask_;
 553     }
 554
 555     /*  Initialize the hash table (avoiding 64K overflow for 16
 556         bit systems). prev[] will be initialized on the fly.
 557     */
 558     void
 559     clear_hash()
 560     {
 561         head_[hash_size_-1] = 0;
 562         std::memset((Byte *)head_, 0,
 563             (unsigned)(hash_size_-1)*sizeof(*head_));
 564     }
 565
 566     /*  Compares two subtrees, using the tree depth as tie breaker
 567         when the subtrees have equal frequency. This minimizes the
 568         worst case length.
 569     */
 570     bool
 571     smaller(ct_data const* tree, int n, int m)
 572     {
 573         return tree[n].fc < tree[m].fc ||
 574             (tree[n].fc == tree[m].fc &&
 575                 depth_[n] <= depth_[m]);
 576     }
 577
 578     /*  Insert string str in the dictionary and set match_head to the
 579         previous head of the hash chain (the most recent string with
 580         same hash key). Return the previous length of the hash chain.
 581         If this file is compiled with -DFASTEST, the compression level
 582         is forced to 1, and no hash chains are maintained.
 583         IN  assertion: all calls to to INSERT_STRING are made with
 584             consecutive input characters and the first minMatch
 585             bytes of str are valid (except for the last minMatch-1
 586             bytes of the input file).
 587     */
 588     void
 589     insert_string(IPos& hash_head)
 590     {
 591         update_hash(ins_h_, window_[strstart_ + (minMatch-1)]);
 592         hash_head = prev_[strstart_ & w_mask_] = head_[ins_h_];
 593         head_[ins_h_] = (std::uint16_t)strstart_;
 594     }
 595
 596     //--------------------------------------------------------------------------
 597
 598     /* Values for max_lazy_match, good_match and max_chain_length, depending on
 599      * the desired pack level (0..9). The values given below have been tuned to
 600      * exclude worst case performance for pathological files. Better values may be
 601      * found for specific files.
 602      */
 603     struct config
 604     {
 605        std::uint16_t good_length; /* reduce lazy search above this match length */
 606        std::uint16_t max_lazy;    /* do not perform lazy search above this match length */
 607        std::uint16_t nice_length; /* quit search above this match length */
 608        std::uint16_t max_chain;
 609        compress_func func;
 610
 611        config(
 612                std::uint16_t good_length_,
 613                std::uint16_t max_lazy_,
 614                std::uint16_t nice_length_,
 615                std::uint16_t max_chain_,
 616                compress_func func_)
 617            : good_length(good_length_)
 618            , max_lazy(max_lazy_)
 619            , nice_length(nice_length_)
 620            , max_chain(max_chain_)
 621            , func(func_)
 622        {
 623        }
 624     };
 625
 626     static
 627     config
 628     get_config(std::size_t level)
 629     {
 630         switch(level)
 631         {
 632         //              good lazy nice chain
 633         case 0: return {  0,   0,   0,    0, &self::deflate_stored}; // store only
 634         case 1: return {  4,   4,   8,    4, &self::deflate_fast};   // max speed, no lazy matches
 635         case 2: return {  4,   5,  16,    8, &self::deflate_fast};
 636         case 3: return {  4,   6,  32,   32, &self::deflate_fast};
 637         case 4: return {  4,   4,  16,   16, &self::deflate_slow};   // lazy matches
 638         case 5: return {  8,  16,  32,   32, &self::deflate_slow};
 639         case 6: return {  8,  16, 128,  128, &self::deflate_slow};
 640         case 7: return {  8,  32, 128,  256, &self::deflate_slow};
 641         case 8: return { 32, 128, 258, 1024, &self::deflate_slow};
 642         default:
 643         case 9: return { 32, 258, 258, 4096, &self::deflate_slow};    // max compression
 644         }
 645     }
 646
 647     void
 648     maybe_init()
 649     {
 650         if(! inited_)
 651             init();
 652     }
 653
 654     template<class Unsigned>
 655     static
 656     Unsigned
 657     bi_reverse(Unsigned code, unsigned len);
 658
 659     template<class = void>
 660     static
 661     void
 662     gen_codes(ct_data *tree, int max_code, std::uint16_t *bl_count);
 663
 664     template<class = void>
 665     static
 666     lut_type const&
 667     get_lut();
 668
 669     template<class = void> void doReset             (int level, int windowBits, int memLevel, Strategy strategy);
 670     template<class = void> void doReset             ();
 671     template<class = void> void doClear             ();
 672     template<class = void> std::size_t doUpperBound (std::size_t sourceLen) const;
 673     template<class = void> void doTune              (int good_length, int max_lazy, int nice_length, int max_chain);
 674     template<class = void> void doParams            (z_params& zs, int level, Strategy strategy, error_code& ec);
 675     template<class = void> void doWrite             (z_params& zs, boost::optional<Flush> flush, error_code& ec);
 676     template<class = void> void doDictionary        (Byte const* dict, uInt dictLength, error_code& ec);
 677     template<class = void> void doPrime             (int bits, int value, error_code& ec);
 678     template<class = void> void doPending           (unsigned* value, int* bits);
 679
 680     template<class = void> void init                ();
 681     template<class = void> void lm_init             ();
 682     template<class = void> void init_block          ();
 683     template<class = void> void pqdownheap          (ct_data const* tree, int k);
 684     template<class = void> void pqremove            (ct_data const* tree, int& top);
 685     template<class = void> void gen_bitlen          (tree_desc *desc);
 686     template<class = void> void build_tree          (tree_desc *desc);
 687     template<class = void> void scan_tree           (ct_data *tree, int max_code);
 688     template<class = void> void send_tree           (ct_data *tree, int max_code);
 689     template<class = void> int  build_bl_tree       ();
 690     template<class = void> void send_all_trees      (int lcodes, int dcodes, int blcodes);
 691     template<class = void> void compress_block      (ct_data const* ltree, ct_data const* dtree);
 692     template<class = void> int  detect_data_type    ();
 693     template<class = void> void bi_windup           ();
 694     template<class = void> void bi_flush            ();
 695     template<class = void> void copy_block          (char *buf, unsigned len, int header);
 696
 697     template<class = void> void tr_init             ();
 698     template<class = void> void tr_align            ();
 699     template<class = void> void tr_flush_bits       ();
 700     template<class = void> void tr_stored_block     (char *bu, std::uint32_t stored_len, int last);
 701     template<class = void> void tr_tally_dist       (std::uint16_t dist, std::uint8_t len, bool& flush);
 702     template<class = void> void tr_tally_lit        (std::uint8_t c, bool& flush);
 703
 704     template<class = void> void tr_flush_block      (z_params& zs, char *buf, std::uint32_t stored_len, int last);
 705     template<class = void> void fill_window         (z_params& zs);
 706     template<class = void> void flush_pending       (z_params& zs);
 707     template<class = void> void flush_block         (z_params& zs, bool last);
 708     template<class = void> int  read_buf            (z_params& zs, Byte *buf, unsigned size);
 709     template<class = void> uInt longest_match       (IPos cur_match);
 710
 711     template<class = void> block_state f_stored     (z_params& zs, Flush flush);
 712     template<class = void> block_state f_fast       (z_params& zs, Flush flush);
 713     template<class = void> block_state f_slow       (z_params& zs, Flush flush);
 714     template<class = void> block_state f_rle        (z_params& zs, Flush flush);
 715     template<class = void> block_state f_huff       (z_params& zs, Flush flush);
 716
 717     block_state
 718     deflate_stored(z_params& zs, Flush flush)
 719     {
 720         return f_stored(zs, flush);
 721     }
 722
 723     block_state
 724     deflate_fast(z_params& zs, Flush flush)
 725     {
 726         return f_fast(zs, flush);
 727     }
 728
 729     block_state
 730     deflate_slow(z_params& zs, Flush flush)
 731     {
 732         return f_slow(zs, flush);
 733     }
 734
 735     block_state
 736     deflate_rle(z_params& zs, Flush flush)
 737     {
 738         return f_rle(zs, flush);
 739     }
 740
 741     block_state
 742     deflate_huff(z_params& zs, Flush flush)
 743     {
 744         return f_huff(zs, flush);
 745     }
 746 };
 747
 748 //--------------------------------------------------------------------------
 749
 750 // Reverse the first len bits of a code
 751 template<class Unsigned>
 752 inline
 753 Unsigned
 754 deflate_stream::
 755 bi_reverse(Unsigned code, unsigned len)
 756 {
 757     BOOST_STATIC_ASSERT(std::is_unsigned<Unsigned>::value);
 758     BOOST_ASSERT(len <= 8 * sizeof(unsigned));
 759     Unsigned res = 0;
 760     do
 761     {
 762         res |= code & 1;
 763         code >>= 1;
 764         res <<= 1;
 765     }
 766     while(--len > 0);
 767     return res >> 1;
 768 }
 769
 770 /*  Generate the codes for a given tree and bit counts (which need not be optimal).
 771     IN assertion: the array bl_count contains the bit length statistics for
 772        the given tree and the field len is set for all tree elements.
 773     OUT assertion: the field code is set for all tree elements of non
 774         zero code length.
 775 */
 776 template<class>
 777 void
 778 deflate_stream::
 779 gen_codes(ct_data *tree, int max_code, std::uint16_t *bl_count)
 780 {
 781     std::uint16_t next_code[maxBits+1]; /* next code value for each bit length */
 782     std::uint16_t code = 0;              /* running code value */
 783     int bits;                  /* bit index */
 784     int n;                     /* code index */
 785
 786     // The distribution counts are first used to
 787     // generate the code values without bit reversal.
 788     for(bits = 1; bits <= maxBits; bits++)
 789     {
 790         code = (code + bl_count[bits-1]) << 1;
 791         next_code[bits] = code;
 792     }
 793     // Check that the bit counts in bl_count are consistent.
 794     // The last code must be all ones.
 795     BOOST_ASSERT(code + bl_count[maxBits]-1 == (1<<maxBits)-1);
 796     for(n = 0; n <= max_code; n++)
 797     {
 798         int len = tree[n].dl;
 799         if(len == 0)
 800             continue;
 801         tree[n].fc = bi_reverse(next_code[len]++, len);
 802     }
 803 }
 804
 805 template<class>
 806 auto
 807 deflate_stream::get_lut() ->
 808     lut_type const&
 809 {
 810     struct init
 811     {
 812         lut_type tables;
 813
 814         init()
 815         {
 816             // number of codes at each bit length for an optimal tree
 817             //std::uint16_t bl_count[maxBits+1];
 818
 819             // Initialize the mapping length (0..255) -> length code (0..28)
 820             std::uint8_t length = 0;
 821             for(std::uint8_t code = 0; code < lengthCodes-1; ++code)
 822             {
 823                 tables.base_length[code] = length;
 824                 auto const run = 1U << tables.extra_lbits[code];
 825                 for(unsigned n = 0; n < run; ++n)
 826                     tables.length_code[length++] = code;
 827             }
 828             BOOST_ASSERT(length == 0);
 829             // Note that the length 255 (match length 258) can be represented
 830             // in two different ways: code 284 + 5 bits or code 285, so we
 831             // overwrite length_code[255] to use the best encoding:
 832             tables.length_code[255] = lengthCodes-1;
 833
 834             // Initialize the mapping dist (0..32K) -> dist code (0..29)
 835             {
 836                 std::uint8_t code;
 837                 std::uint16_t dist = 0;
 838                 for(code = 0; code < 16; code++)
 839                 {
 840                     tables.base_dist[code] = dist;
 841                     auto const run = 1U << tables.extra_dbits[code];
 842                     for(unsigned n = 0; n < run; ++n)
 843                         tables.dist_code[dist++] = code;
 844                 }
 845                 BOOST_ASSERT(dist == 256);
 846                 // from now on, all distances are divided by 128
 847                 dist >>= 7;
 848                 for(; code < dCodes; ++code)
 849                 {
 850                     tables.base_dist[code] = dist << 7;
 851                     auto const run = 1U << (tables.extra_dbits[code]-7);
 852                     for(std::size_t n = 0; n < run; ++n)
 853                         tables.dist_code[256 + dist++] = code;
 854                 }
 855                 BOOST_ASSERT(dist == 256);
 856             }
 857
 858             // Construct the codes of the static literal tree
 859             std::uint16_t bl_count[maxBits+1];
 860             std::memset(bl_count, 0, sizeof(bl_count));
 861             unsigned n = 0;
 862             while (n <= 143)
 863                 tables.ltree[n++].dl = 8;
 864             bl_count[8] += 144;
 865             while (n <= 255)
 866                 tables.ltree[n++].dl = 9;
 867             bl_count[9] += 112;
 868             while (n <= 279)
 869                 tables.ltree[n++].dl = 7;
 870             bl_count[7] += 24;
 871             while (n <= 287)
 872                 tables.ltree[n++].dl = 8;
 873             bl_count[8] += 8;
 874             // Codes 286 and 287 do not exist, but we must include them in the tree
 875             // construction to get a canonical Huffman tree (longest code all ones)
 876             gen_codes(tables.ltree, lCodes+1, bl_count);
 877
 878             for(n = 0; n < dCodes; ++n)
 879             {
 880                 tables.dtree[n].dl = 5;
 881                 tables.dtree[n].fc =
 882                     static_cast<std::uint16_t>(bi_reverse(n, 5));
 883             }
 884         }
 885     };
 886     static init const data;
 887     return data.tables;
 888 }
 889
 890 template<class>
 891 void
 892 deflate_stream::
 893 doReset(
 894     int level,
 895     int windowBits,
 896     int memLevel,
 897     Strategy strategy)
 898 {
 899     if(level == default_size)
 900         level = 6;
 901
 902     // VFALCO What do we do about this?
 903     // until 256-byte window bug fixed
 904     if(windowBits == 8)
 905         windowBits = 9;
 906
 907     if(level < 0 || level > 9)
 908         BOOST_THROW_EXCEPTION(std::invalid_argument{
 909             "invalid level"});
 910
 911     if(windowBits < 8 || windowBits > 15)
 912         BOOST_THROW_EXCEPTION(std::invalid_argument{
 913             "invalid windowBits"});
 914
 915     if(memLevel < 1 || memLevel > max_mem_level)
 916         BOOST_THROW_EXCEPTION(std::invalid_argument{
 917             "invalid memLevel"});
 918
 919     w_bits_ = windowBits;
 920
 921     hash_bits_ = memLevel + 7;
 922
 923     // 16K elements by default
 924     lit_bufsize_ = 1 << (memLevel + 6);
 925
 926     level_ = level;
 927     strategy_ = strategy;
 928     inited_ = false;
 929 }
 930
 931 template<class>
 932 void
 933 deflate_stream::
 934 doReset()
 935 {
 936     inited_ = false;
 937 }
 938
 939 template<class>
 940 void
 941 deflate_stream::
 942 doClear()
 943 {
 944     inited_ = false;
 945     buf_.reset();
 946 }
 947
 948 template<class>
 949 std::size_t
 950 deflate_stream::
 951 doUpperBound(std::size_t sourceLen) const
 952 {
 953     std::size_t complen;
 954     std::size_t wraplen;
 955
 956     /* conservative upper bound for compressed data */
 957     complen = sourceLen +
 958               ((sourceLen + 7) >> 3) + ((sourceLen + 63) >> 6) + 5;
 959
 960     /* compute wrapper length */
 961     wraplen = 0;
 962
 963     /* if not default parameters, return conservative bound */
 964     if(w_bits_ != 15 || hash_bits_ != 8 + 7)
 965         return complen + wraplen;
 966
 967     /* default settings: return tight bound for that case */
 968     return sourceLen + (sourceLen >> 12) + (sourceLen >> 14) +
 969            (sourceLen >> 25) + 13 - 6 + wraplen;
 970 }
 971
 972 template<class>
 973 void
 974 deflate_stream::
 975 doTune(
 976     int good_length,
 977     int max_lazy,
 978     int nice_length,
 979     int max_chain)
 980 {
 981     good_match_ = good_length;
 982     nice_match_ = nice_length;
 983     max_lazy_match_ = max_lazy;
 984     max_chain_length_ = max_chain;
 985 }
 986
 987 template<class>
 988 void
 989 deflate_stream::
 990 doParams(z_params& zs, int level, Strategy strategy, error_code& ec)
 991 {
 992     compress_func func;
 993
 994     if(level == default_size)
 995         level = 6;
 996     if(level < 0 || level > 9)
 997     {
 998         ec = error::stream_error;
 999         return;
1000     }
1001     func = get_config(level_).func;
1002
1003     if((strategy != strategy_ || func != get_config(level).func) &&
1004         zs.total_in != 0)
1005     {
1006         // Flush the last buffer:
1007         doWrite(zs, Flush::block, ec);
1008         if(ec == error::need_buffers && pending_ == 0)
1009             ec.assign(0, ec.category());
1010     }
1011     if(level_ != level)
1012     {
1013         level_ = level;
1014         max_lazy_match_   = get_config(level).max_lazy;
1015         good_match_       = get_config(level).good_length;
1016         nice_match_       = get_config(level).nice_length;
1017         max_chain_length_ = get_config(level).max_chain;
1018     }
1019     strategy_ = strategy;
1020 }
1021
1022 // VFALCO boost::optional param is a workaround for
1023 //        gcc "maybe uninitialized" warning
1024 //        https://github.com/boostorg/beast/issues/532
1025 //
1026 template<class>
1027 void
1028 deflate_stream::
1029 doWrite(z_params& zs, boost::optional<Flush> flush, error_code& ec)
1030 {
1031     maybe_init();
1032
1033     if(zs.next_out == 0 || (zs.next_in == 0 && zs.avail_in != 0) ||
1034         (status_ == FINISH_STATE && flush != Flush::finish))
1035     {
1036         ec = error::stream_error;
1037         return;
1038     }
1039     if(zs.avail_out == 0)
1040     {
1041         ec = error::need_buffers;
1042         return;
1043     }
1044
1045     // value of flush param for previous deflate call
1046     auto old_flush = boost::make_optional<Flush>(
1047         last_flush_.is_initialized(),
1048         last_flush_ ? *last_flush_ : Flush::none);
1049
1050     last_flush_ = flush;
1051
1052     // Flush as much pending output as possible
1053     if(pending_ != 0)
1054     {
1055         flush_pending(zs);
1056         if(zs.avail_out == 0)
1057         {
1058             /* Since avail_out is 0, deflate will be called again with
1059              * more output space, but possibly with both pending and
1060              * avail_in equal to zero. There won't be anything to do,
1061              * but this is not an error situation so make sure we
1062              * return OK instead of BUF_ERROR at next call of deflate:
1063              */
1064             last_flush_ = boost::none;
1065             return;
1066         }
1067     }
1068     else if(zs.avail_in == 0 && (
1069             old_flush && flush <= *old_flush
1070         ) && flush != Flush::finish)
1071     {
1072         /* Make sure there is something to do and avoid duplicate consecutive
1073          * flushes. For repeated and useless calls with Flush::finish, we keep
1074          * returning Z_STREAM_END instead of Z_BUF_ERROR.
1075          */
1076         ec = error::need_buffers;
1077         return;
1078     }
1079
1080     // User must not provide more input after the first FINISH:
1081     if(status_ == FINISH_STATE && zs.avail_in != 0)
1082     {
1083         ec = error::need_buffers;
1084         return;
1085     }
1086
1087     /* Start a new block or continue the current one.
1088      */
1089     if(zs.avail_in != 0 || lookahead_ != 0 ||
1090         (flush != Flush::none && status_ != FINISH_STATE))
1091     {
1092         block_state bstate;
1093
1094         switch(strategy_)
1095         {
1096         case Strategy::huffman:
1097             bstate = deflate_huff(zs, flush.get());
1098             break;
1099         case Strategy::rle:
1100             bstate = deflate_rle(zs, flush.get());
1101             break;
1102         default:
1103         {
1104             bstate = (this->*(get_config(level_).func))(zs, flush.get());
1105             break;
1106         }
1107         }
1108
1109         if(bstate == finish_started || bstate == finish_done)
1110         {
1111             status_ = FINISH_STATE;
1112         }
1113         if(bstate == need_more || bstate == finish_started)
1114         {
1115             if(zs.avail_out == 0)
1116             {
1117                 last_flush_ = boost::none; /* avoid BUF_ERROR next call, see above */
1118             }
1119             return;
1120             /*  If flush != Flush::none && avail_out == 0, the next call
1121                 of deflate should use the same flush parameter to make sure
1122                 that the flush is complete. So we don't have to output an
1123                 empty block here, this will be done at next call. This also
1124                 ensures that for a very small output buffer, we emit at most
1125                 one empty block.
1126             */
1127         }
1128         if(bstate == block_done)
1129         {
1130             if(flush == Flush::partial)
1131             {
1132                 tr_align();
1133             }
1134             else if(flush != Flush::block)
1135             {
1136                 /* FULL_FLUSH or SYNC_FLUSH */
1137                 tr_stored_block((char*)0, 0L, 0);
1138                 /* For a full flush, this empty block will be recognized
1139                  * as a special marker by inflate_sync().
1140                  */
1141                 if(flush == Flush::full)
1142                 {
1143                     clear_hash(); // forget history
1144                     if(lookahead_ == 0)
1145                     {
1146                         strstart_ = 0;
1147                         block_start_ = 0L;
1148                         insert_ = 0;
1149                     }
1150                 }
1151             }
1152             flush_pending(zs);
1153             if(zs.avail_out == 0)
1154             {
1155                 last_flush_ = boost::none; /* avoid BUF_ERROR at next call, see above */
1156                 return;
1157             }
1158         }
1159     }
1160
1161     if(flush == Flush::finish)
1162     {
1163         ec = error::end_of_stream;
1164         return;
1165     }
1166 }
1167
1168 // VFALCO Warning: untested
1169 template<class>
1170 void
1171 deflate_stream::
1172 doDictionary(Byte const* dict, uInt dictLength, error_code& ec)
1173 {
1174     if(lookahead_)
1175     {
1176         ec = error::stream_error;
1177         return;
1178     }
1179
1180     maybe_init();
1181
1182     /* if dict would fill window, just replace the history */
1183     if(dictLength >= w_size_)
1184     {
1185         clear_hash();
1186         strstart_ = 0;
1187         block_start_ = 0L;
1188         insert_ = 0;
1189         dict += dictLength - w_size_;  /* use the tail */
1190         dictLength = w_size_;
1191     }
1192
1193     /* insert dict into window and hash */
1194     z_params zs;
1195     zs.avail_in = dictLength;
1196     zs.next_in = (const Byte *)dict;
1197     zs.avail_out = 0;
1198     zs.next_out = 0;
1199     fill_window(zs);
1200     while(lookahead_ >= minMatch)
1201     {
1202         uInt str = strstart_;
1203         uInt n = lookahead_ - (minMatch-1);
1204         do
1205         {
1206             update_hash(ins_h_, window_[str + minMatch-1]);
1207             prev_[str & w_mask_] = head_[ins_h_];
1208             head_[ins_h_] = (std::uint16_t)str;
1209             str++;
1210         }
1211         while(--n);
1212         strstart_ = str;
1213         lookahead_ = minMatch-1;
1214         fill_window(zs);
1215     }
1216     strstart_ += lookahead_;
1217     block_start_ = (long)strstart_;
1218     insert_ = lookahead_;
1219     lookahead_ = 0;
1220     match_length_ = prev_length_ = minMatch-1;
1221     match_available_ = 0;
1222 }
1223
1224 template<class>
1225 void
1226 deflate_stream::
1227 doPrime(int bits, int value, error_code& ec)
1228 {
1229     maybe_init();
1230
1231     if((Byte *)(d_buf_) < pending_out_ + ((Buf_size + 7) >> 3))
1232     {
1233         ec = error::need_buffers;
1234         return;
1235     }
1236
1237     do
1238     {
1239         int put = Buf_size - bi_valid_;
1240         if(put > bits)
1241             put = bits;
1242         bi_buf_ |= (std::uint16_t)((value & ((1 << put) - 1)) << bi_valid_);
1243         bi_valid_ += put;
1244         tr_flush_bits();
1245         value >>= put;
1246         bits -= put;
1247     }
1248     while(bits);
1249 }
1250
1251 template<class>
1252 void
1253 deflate_stream::
1254 doPending(unsigned* value, int* bits)
1255 {
1256     if(value != 0)
1257         *value = pending_;
1258     if(bits != 0)
1259         *bits = bi_valid_;
1260 }
1261
1262 //--------------------------------------------------------------------------
1263
1264 // Do lazy initialization
1265 template<class>
1266 void
1267 deflate_stream::
1268 init()
1269 {
1270     //  Caller must set these:
1271     //      w_bits_
1272     //      hash_bits_
1273     //      lit_bufsize_
1274     //      level_
1275     //      strategy_
1276
1277     w_size_ = 1 << w_bits_;
1278     w_mask_ = w_size_ - 1;
1279
1280     hash_size_ = 1 << hash_bits_;
1281     hash_mask_ = hash_size_ - 1;
1282     hash_shift_ =  ((hash_bits_+minMatch-1)/minMatch);
1283
1284     auto const nwindow  = w_size_ * 2*sizeof(Byte);
1285     auto const nprev    = w_size_ * sizeof(std::uint16_t);
1286     auto const nhead    = hash_size_ * sizeof(std::uint16_t);
1287     auto const noverlay = lit_bufsize_ * (sizeof(std::uint16_t)+2);
1288     auto const needed   = nwindow + nprev + nhead + noverlay;
1289
1290     if(! buf_ || buf_size_ != needed)
1291     {
1292         buf_ = boost::make_unique_noinit<
1293             std::uint8_t[]>(needed);
1294         buf_size_ = needed;
1295     }
1296
1297     window_ = reinterpret_cast<Byte*>(buf_.get());
1298     prev_   = reinterpret_cast<std::uint16_t*>(buf_.get() + nwindow);
1299     head_   = reinterpret_cast<std::uint16_t*>(buf_.get() + nwindow + nprev);
1300
1301     /*  We overlay pending_buf_ and d_buf_ + l_buf_. This works
1302         since the average output size for(length, distance)
1303         codes is <= 24 bits.
1304     */
1305     auto overlay = reinterpret_cast<std::uint16_t*>(
1306         buf_.get() + nwindow + nprev + nhead);
1307
1308     // nothing written to window_ yet
1309     high_water_ = 0;
1310
1311     pending_buf_ =
1312         reinterpret_cast<std::uint8_t*>(overlay);
1313     pending_buf_size_ =
1314         static_cast<std::uint32_t>(lit_bufsize_) *
1315             (sizeof(std::uint16_t) + 2L);
1316
1317     d_buf_ = overlay + lit_bufsize_ / sizeof(std::uint16_t);
1318     l_buf_ = pending_buf_ + (1 + sizeof(std::uint16_t)) * lit_bufsize_;
1319
1320     pending_ = 0;
1321     pending_out_ = pending_buf_;
1322
1323     status_ = BUSY_STATE;
1324     last_flush_ = Flush::none;
1325
1326     tr_init();
1327     lm_init();
1328
1329     inited_ = true;
1330 }
1331
1332 /*  Initialize the "longest match" routines for a new zlib stream
1333 */
1334 template<class>
1335 void
1336 deflate_stream::
1337 lm_init()
1338 {
1339     window_size_ = (std::uint32_t)2L*w_size_;
1340
1341     clear_hash();
1342
1343     /* Set the default configuration parameters:
1344      */
1345     // VFALCO TODO just copy the config struct
1346     max_lazy_match_   = get_config(level_).max_lazy;
1347     good_match_       = get_config(level_).good_length;
1348     nice_match_       = get_config(level_).nice_length;
1349     max_chain_length_ = get_config(level_).max_chain;
1350
1351     strstart_ = 0;
1352     block_start_ = 0L;
1353     lookahead_ = 0;
1354     insert_ = 0;
1355     match_length_ = prev_length_ = minMatch-1;
1356     match_available_ = 0;
1357     ins_h_ = 0;
1358 }
1359
1360 // Initialize a new block.
1361 //
1362 template<class>
1363 void
1364 deflate_stream::
1365 init_block()
1366 {
1367     for(int n = 0; n < lCodes;  n++)
1368         dyn_ltree_[n].fc = 0;
1369     for(int n = 0; n < dCodes;  n++)
1370         dyn_dtree_[n].fc = 0;
1371     for(int n = 0; n < blCodes; n++)
1372         bl_tree_[n].fc = 0;
1373     dyn_ltree_[END_BLOCK].fc = 1;
1374     opt_len_ = 0L;
1375     static_len_ = 0L;
1376     last_lit_ = 0;
1377     matches_ = 0;
1378 }
1379
1380 /*  Restore the heap property by moving down the tree starting at node k,
1381     exchanging a node with the smallest of its two sons if necessary,
1382     stopping when the heap property is re-established (each father smaller
1383     than its two sons).
1384 */
1385 template<class>
1386 void
1387 deflate_stream::
1388 pqdownheap(
1389     ct_data const* tree,    // the tree to restore
1390     int k)                          // node to move down
1391 {
1392     int v = heap_[k];
1393     int j = k << 1;  // left son of k
1394     while(j <= heap_len_)
1395     {
1396         // Set j to the smallest of the two sons:
1397         if(j < heap_len_ &&
1398                 smaller(tree, heap_[j+1], heap_[j]))
1399             j++;
1400         // Exit if v is smaller than both sons
1401         if(smaller(tree, v, heap_[j]))
1402             break;
1403
1404         // Exchange v with the smallest son
1405         heap_[k] = heap_[j];
1406         k = j;
1407
1408         // And continue down the tree,
1409         // setting j to the left son of k
1410         j <<= 1;
1411     }
1412     heap_[k] = v;
1413 }
1414
1415 /*  Remove the smallest element from the heap and recreate the heap
1416     with one less element. Updates heap and heap_len.
1417 */
1418 template<class>
1419 inline
1420 void
1421 deflate_stream::
1422 pqremove(ct_data const* tree, int& top)
1423 {
1424     top = heap_[kSmallest];
1425     heap_[kSmallest] = heap_[heap_len_--];
1426     pqdownheap(tree, kSmallest);
1427 }
1428
1429 /*  Compute the optimal bit lengths for a tree and update the total bit length
1430     for the current block.
1431     IN assertion: the fields freq and dad are set, heap[heap_max] and
1432        above are the tree nodes sorted by increasing frequency.
1433     OUT assertions: the field len is set to the optimal bit length, the
1434         array bl_count contains the frequencies for each bit length.
1435         The length opt_len is updated; static_len is also updated if stree is
1436         not null.
1437 */
1438 template<class>
1439 void
1440 deflate_stream::
1441 gen_bitlen(tree_desc *desc)
1442 {
1443     ct_data *tree           = desc->dyn_tree;
1444     int max_code                    = desc->max_code;
1445     ct_data const* stree    = desc->stat_desc->static_tree;
1446     std::uint8_t const *extra       = desc->stat_desc->extra_bits;
1447     int base                        = desc->stat_desc->extra_base;
1448     int max_length                  = desc->stat_desc->max_length;
1449     int h;                          // heap index
1450     int n, m;                       // iterate over the tree elements
1451     int bits;                       // bit length
1452     int xbits;                      // extra bits
1453     std::uint16_t f;                // frequency
1454     int overflow = 0;               // number of elements with bit length too large
1455
1456     std::fill(&bl_count_[0], &bl_count_[maxBits+1], std::uint16_t{0});
1457
1458     /* In a first pass, compute the optimal bit lengths (which may
1459      * overflow in the case of the bit length tree).
1460      */
1461     tree[heap_[heap_max_]].dl = 0; // root of the heap
1462
1463     for(h = heap_max_+1; h < HEAP_SIZE; h++) {
1464         n = heap_[h];
1465         bits = tree[tree[n].dl].dl + 1;
1466         if(bits > max_length) bits = max_length, overflow++;
1467         // We overwrite tree[n].dl which is no longer needed
1468         tree[n].dl = (std::uint16_t)bits;
1469
1470         if(n > max_code)
1471             continue; // not a leaf node
1472
1473         bl_count_[bits]++;
1474         xbits = 0;
1475         if(n >= base)
1476             xbits = extra[n-base];
1477         f = tree[n].fc;
1478         opt_len_ += (std::uint32_t)f * (bits + xbits);
1479         if(stree)
1480             static_len_ += (std::uint32_t)f * (stree[n].dl + xbits);
1481     }
1482     if(overflow == 0)
1483         return;
1484
1485     // Find the first bit length which could increase:
1486     do
1487     {
1488         bits = max_length-1;
1489         while(bl_count_[bits] == 0)
1490             bits--;
1491         bl_count_[bits]--;      // move one leaf down the tree
1492         bl_count_[bits+1] += 2; // move one overflow item as its brother
1493         bl_count_[max_length]--;
1494         /* The brother of the overflow item also moves one step up,
1495          * but this does not affect bl_count[max_length]
1496          */
1497         overflow -= 2;
1498     }
1499     while(overflow > 0);
1500
1501     /* Now recompute all bit lengths, scanning in increasing frequency.
1502      * h is still equal to HEAP_SIZE. (It is simpler to reconstruct all
1503      * lengths instead of fixing only the wrong ones. This idea is taken
1504      * from 'ar' written by Haruhiko Okumura.)
1505      */
1506     for(bits = max_length; bits != 0; bits--)
1507     {
1508         n = bl_count_[bits];
1509         while(n != 0)
1510         {
1511             m = heap_[--h];
1512             if(m > max_code)
1513                 continue;
1514             if((unsigned) tree[m].dl != (unsigned) bits)
1515             {
1516                 opt_len_ += ((long)bits - (long)tree[m].dl) *(long)tree[m].fc;
1517                 tree[m].dl = (std::uint16_t)bits;
1518             }
1519             n--;
1520         }
1521     }
1522 }
1523
1524 /*  Construct one Huffman tree and assigns the code bit strings and lengths.
1525     Update the total bit length for the current block.
1526     IN assertion: the field freq is set for all tree elements.
1527     OUT assertions: the fields len and code are set to the optimal bit length
1528         and corresponding code. The length opt_len is updated; static_len is
1529         also updated if stree is not null. The field max_code is set.
1530 */
1531 template<class>
1532 void
1533 deflate_stream::
1534 build_tree(tree_desc *desc)
1535 {
1536     ct_data *tree         = desc->dyn_tree;
1537     ct_data const* stree  = desc->stat_desc->static_tree;
1538     int elems                     = desc->stat_desc->elems;
1539     int n, m;          // iterate over heap elements
1540     int max_code = -1; // largest code with non zero frequency
1541     int node;          // new node being created
1542
1543     /* Construct the initial heap, with least frequent element in
1544      * heap[kSmallest]. The sons of heap[n] are heap[2*n] and heap[2*n+1].
1545      * heap[0] is not used.
1546      */
1547     heap_len_ = 0;
1548     heap_max_ = HEAP_SIZE;
1549
1550     for(n = 0; n < elems; n++)
1551     {
1552         if(tree[n].fc != 0)
1553         {
1554             heap_[++(heap_len_)] = max_code = n;
1555             depth_[n] = 0;
1556         }
1557         else
1558         {
1559             tree[n].dl = 0;
1560         }
1561     }
1562
1563     /* The pkzip format requires that at least one distance code exists,
1564      * and that at least one bit should be sent even if there is only one
1565      * possible code. So to avoid special checks later on we force at least
1566      * two codes of non zero frequency.
1567      */
1568     while(heap_len_ < 2)
1569     {
1570         node = heap_[++(heap_len_)] = (max_code < 2 ? ++max_code : 0);
1571         tree[node].fc = 1;
1572         depth_[node] = 0;
1573         opt_len_--;
1574         if(stree)
1575             static_len_ -= stree[node].dl;
1576         // node is 0 or 1 so it does not have extra bits
1577     }
1578     desc->max_code = max_code;
1579
1580     /* The elements heap[heap_len/2+1 .. heap_len] are leaves of the tree,
1581      * establish sub-heaps of increasing lengths:
1582      */
1583     for(n = heap_len_/2; n >= 1; n--)
1584         pqdownheap(tree, n);
1585
1586     /* Construct the Huffman tree by repeatedly combining the least two
1587      * frequent nodes.
1588      */
1589     node = elems;              /* next internal node of the tree */
1590     do
1591     {
1592         pqremove(tree, n);  /* n = node of least frequency */
1593         m = heap_[kSmallest]; /* m = node of next least frequency */
1594
1595         heap_[--(heap_max_)] = n; /* keep the nodes sorted by frequency */
1596         heap_[--(heap_max_)] = m;
1597
1598         /* Create a new node father of n and m */
1599         tree[node].fc = tree[n].fc + tree[m].fc;
1600         depth_[node] = (std::uint8_t)((depth_[n] >= depth_[m] ?
1601                                 depth_[n] : depth_[m]) + 1);
1602         tree[n].dl = tree[m].dl = (std::uint16_t)node;
1603         /* and insert the new node in the heap */
1604         heap_[kSmallest] = node++;
1605         pqdownheap(tree, kSmallest);
1606
1607     }
1608     while(heap_len_ >= 2);
1609
1610     heap_[--(heap_max_)] = heap_[kSmallest];
1611
1612     /* At this point, the fields freq and dad are set. We can now
1613      * generate the bit lengths.
1614      */
1615     gen_bitlen((tree_desc *)desc);
1616
1617     /* The field len is now set, we can generate the bit codes */
1618     gen_codes(tree, max_code, bl_count_);
1619 }
1620
1621 /*  Scan a literal or distance tree to determine the frequencies
1622     of the codes in the bit length tree.
1623 */
1624 template<class>
1625 void
1626 deflate_stream::
1627 scan_tree(
1628     ct_data *tree,      // the tree to be scanned
1629     int max_code)               // and its largest code of non zero frequency
1630 {
1631     int n;                      // iterates over all tree elements
1632     int prevlen = -1;           // last emitted length
1633     int curlen;                 // length of current code
1634     int nextlen = tree[0].dl;   // length of next code
1635     std::uint16_t count = 0;    // repeat count of the current code
1636     int max_count = 7;          // max repeat count
1637     int min_count = 4;          // min repeat count
1638
1639     if(nextlen == 0)
1640     {
1641         max_count = 138;
1642         min_count = 3;
1643     }
1644     tree[max_code+1].dl = (std::uint16_t)0xffff; // guard
1645
1646     for(n = 0; n <= max_code; n++)
1647     {
1648         curlen = nextlen; nextlen = tree[n+1].dl;
1649         if(++count < max_count && curlen == nextlen)
1650         {
1651             continue;
1652         }
1653         else if(count < min_count)
1654         {
1655             bl_tree_[curlen].fc += count;
1656         }
1657         else if(curlen != 0)
1658         {
1659             if(curlen != prevlen) bl_tree_[curlen].fc++;
1660                 bl_tree_[REP_3_6].fc++;
1661         }
1662         else if(count <= 10)
1663         {
1664             bl_tree_[REPZ_3_10].fc++;
1665         }
1666         else
1667         {
1668             bl_tree_[REPZ_11_138].fc++;
1669         }
1670         count = 0;
1671         prevlen = curlen;
1672         if(nextlen == 0)
1673         {
1674             max_count = 138;
1675             min_count = 3;
1676         }
1677         else if(curlen == nextlen)
1678         {
1679             max_count = 6;
1680             min_count = 3;
1681         }
1682         else
1683         {
1684             max_count = 7;
1685             min_count = 4;
1686         }
1687     }
1688 }
1689
1690 /*  Send a literal or distance tree in compressed form,
1691     using the codes in bl_tree.
1692 */
1693 template<class>
1694 void
1695 deflate_stream::
1696 send_tree(
1697     ct_data *tree,      // the tree to be scanned
1698     int max_code)               // and its largest code of non zero frequency
1699 {
1700     int n;                      // iterates over all tree elements
1701     int prevlen = -1;           // last emitted length
1702     int curlen;                 // length of current code
1703     int nextlen = tree[0].dl;   // length of next code
1704     int count = 0;              // repeat count of the current code
1705     int max_count = 7;          // max repeat count
1706     int min_count = 4;          // min repeat count
1707
1708     // tree[max_code+1].dl = -1; // guard already set
1709     if(nextlen == 0)
1710     {
1711         max_count = 138;
1712         min_count = 3;
1713     }
1714
1715     for(n = 0; n <= max_code; n++)
1716     {
1717         curlen = nextlen;
1718         nextlen = tree[n+1].dl;
1719         if(++count < max_count && curlen == nextlen)
1720         {
1721             continue;
1722         }
1723         else if(count < min_count)
1724         {
1725             do
1726             {
1727                 send_code(curlen, bl_tree_);
1728             }
1729             while (--count != 0);
1730         }
1731         else if(curlen != 0)
1732         {
1733             if(curlen != prevlen)
1734             {
1735                 send_code(curlen, bl_tree_);
1736                 count--;
1737             }
1738             BOOST_ASSERT(count >= 3 && count <= 6);
1739             send_code(REP_3_6, bl_tree_);
1740             send_bits(count-3, 2);
1741         }
1742         else if(count <= 10)
1743         {
1744             send_code(REPZ_3_10, bl_tree_);
1745             send_bits(count-3, 3);
1746         }
1747         else
1748         {
1749             send_code(REPZ_11_138, bl_tree_);
1750             send_bits(count-11, 7);
1751         }
1752         count = 0;
1753         prevlen = curlen;
1754         if(nextlen == 0)
1755         {
1756             max_count = 138;
1757             min_count = 3;
1758         }
1759         else if(curlen == nextlen)
1760         {
1761             max_count = 6;
1762             min_count = 3;
1763         }
1764         else
1765         {
1766             max_count = 7;
1767             min_count = 4;
1768         }
1769     }
1770 }
1771
1772 /*  Construct the Huffman tree for the bit lengths and return
1773     the index in bl_order of the last bit length code to send.
1774 */
1775 template<class>
1776 int
1777 deflate_stream::
1778 build_bl_tree()
1779 {
1780     int max_blindex;  // index of last bit length code of non zero freq
1781
1782     // Determine the bit length frequencies for literal and distance trees
1783     scan_tree((ct_data *)dyn_ltree_, l_desc_.max_code);
1784     scan_tree((ct_data *)dyn_dtree_, d_desc_.max_code);
1785
1786     // Build the bit length tree:
1787     build_tree((tree_desc *)(&(bl_desc_)));
1788     /* opt_len now includes the length of the tree representations, except
1789      * the lengths of the bit lengths codes and the 5+5+4 bits for the counts.
1790      */
1791
1792     /* Determine the number of bit length codes to send. The pkzip format
1793      * requires that at least 4 bit length codes be sent. (appnote.txt says
1794      * 3 but the actual value used is 4.)
1795      */
1796     for(max_blindex = blCodes-1; max_blindex >= 3; max_blindex--)
1797     {
1798         if(bl_tree_[lut_.bl_order[max_blindex]].dl != 0)
1799             break;
1800     }
1801     // Update opt_len to include the bit length tree and counts
1802     opt_len_ += 3*(max_blindex+1) + 5+5+4;
1803     return max_blindex;
1804 }
1805
1806 /*  Send the header for a block using dynamic Huffman trees: the counts,
1807     the lengths of the bit length codes, the literal tree and the distance
1808     tree.
1809     IN assertion: lcodes >= 257, dcodes >= 1, blcodes >= 4.
1810 */
1811 template<class>
1812 void
1813 deflate_stream::
1814 send_all_trees(
1815     int lcodes,
1816     int dcodes,
1817     int blcodes)    // number of codes for each tree
1818 {
1819     int rank;       // index in bl_order
1820
1821     BOOST_ASSERT(lcodes >= 257 && dcodes >= 1 && blcodes >= 4);
1822     BOOST_ASSERT(lcodes <= lCodes && dcodes <= dCodes && blcodes <= blCodes);
1823     send_bits(lcodes-257, 5); // not +255 as stated in appnote.txt
1824     send_bits(dcodes-1,   5);
1825     send_bits(blcodes-4,  4); // not -3 as stated in appnote.txt
1826     for(rank = 0; rank < blcodes; rank++)
1827         send_bits(bl_tree_[lut_.bl_order[rank]].dl, 3);
1828     send_tree((ct_data *)dyn_ltree_, lcodes-1); // literal tree
1829     send_tree((ct_data *)dyn_dtree_, dcodes-1); // distance tree
1830 }
1831
1832 /*  Send the block data compressed using the given Huffman trees
1833 */
1834 template<class>
1835 void
1836 deflate_stream::
1837 compress_block(
1838     ct_data const* ltree, // literal tree
1839     ct_data const* dtree) // distance tree
1840 {
1841     unsigned dist;      /* distance of matched string */
1842     int lc;             /* match length or unmatched char (if dist == 0) */
1843     unsigned lx = 0;    /* running index in l_buf */
1844     unsigned code;      /* the code to send */
1845     int extra;          /* number of extra bits to send */
1846
1847     if(last_lit_ != 0)
1848     {
1849         do
1850         {
1851             dist = d_buf_[lx];
1852             lc = l_buf_[lx++];
1853             if(dist == 0)
1854             {
1855                 send_code(lc, ltree); /* send a literal byte */
1856             }
1857             else
1858             {
1859                 /* Here, lc is the match length - minMatch */
1860                 code = lut_.length_code[lc];
1861                 send_code(code+literals+1, ltree); /* send the length code */
1862                 extra = lut_.extra_lbits[code];
1863                 if(extra != 0)
1864                 {
1865                     lc -= lut_.base_length[code];
1866                     send_bits(lc, extra);       /* send the extra length bits */
1867                 }
1868                 dist--; /* dist is now the match distance - 1 */
1869                 code = d_code(dist);
1870                 BOOST_ASSERT(code < dCodes);
1871
1872                 send_code(code, dtree);       /* send the distance code */
1873                 extra = lut_.extra_dbits[code];
1874                 if(extra != 0)
1875                 {
1876                     dist -= lut_.base_dist[code];
1877                     send_bits(dist, extra);   /* send the extra distance bits */
1878                 }
1879             } /* literal or match pair ? */
1880
1881             /* Check that the overlay between pending_buf and d_buf+l_buf is ok: */
1882             BOOST_ASSERT((uInt)(pending_) < lit_bufsize_ + 2*lx);
1883         }
1884         while(lx < last_lit_);
1885     }
1886
1887     send_code(END_BLOCK, ltree);
1888 }
1889
1890 /*  Check if the data type is TEXT or BINARY, using the following algorithm:
1891     - TEXT if the two conditions below are satisfied:
1892         a) There are no non-portable control characters belonging to the
1893             "black list" (0..6, 14..25, 28..31).
1894         b) There is at least one printable character belonging to the
1895             "white list" (9 {TAB}, 10 {LF}, 13 {CR}, 32..255).
1896     - BINARY otherwise.
1897     - The following partially-portable control characters form a
1898         "gray list" that is ignored in this detection algorithm:
1899         (7 {BEL}, 8 {BS}, 11 {VT}, 12 {FF}, 26 {SUB}, 27 {ESC}).
1900     IN assertion: the fields fc of dyn_ltree are set.
1901 */
1902 template<class>
1903 int
1904 deflate_stream::
1905 detect_data_type()
1906 {
1907     /* black_mask is the bit mask of black-listed bytes
1908      * set bits 0..6, 14..25, and 28..31
1909      * 0xf3ffc07f = binary 11110011111111111100000001111111
1910      */
1911     unsigned long black_mask = 0xf3ffc07fUL;
1912     int n;
1913
1914     // Check for non-textual ("black-listed") bytes.
1915     for(n = 0; n <= 31; n++, black_mask >>= 1)
1916         if((black_mask & 1) && (dyn_ltree_[n].fc != 0))
1917             return binary;
1918
1919     // Check for textual ("white-listed") bytes. */
1920     if(dyn_ltree_[9].fc != 0 || dyn_ltree_[10].fc != 0
1921             || dyn_ltree_[13].fc != 0)
1922         return text;
1923     for(n = 32; n < literals; n++)
1924         if(dyn_ltree_[n].fc != 0)
1925             return text;
1926
1927     /* There are no "black-listed" or "white-listed" bytes:
1928      * this stream either is empty or has tolerated ("gray-listed") bytes only.
1929      */
1930     return binary;
1931 }
1932
1933 /*  Flush the bit buffer and align the output on a byte boundary
1934 */
1935 template<class>
1936 void
1937 deflate_stream::
1938 bi_windup()
1939 {
1940     if(bi_valid_ > 8)
1941         put_short(bi_buf_);
1942     else if(bi_valid_ > 0)
1943         put_byte((Byte)bi_buf_);
1944     bi_buf_ = 0;
1945     bi_valid_ = 0;
1946 }
1947
1948 /*  Flush the bit buffer, keeping at most 7 bits in it.
1949 */
1950 template<class>
1951 void
1952 deflate_stream::
1953 bi_flush()
1954 {
1955     if(bi_valid_ == 16)
1956     {
1957         put_short(bi_buf_);
1958         bi_buf_ = 0;
1959         bi_valid_ = 0;
1960     }
1961     else if(bi_valid_ >= 8)
1962     {
1963         put_byte((Byte)bi_buf_);
1964         bi_buf_ >>= 8;
1965         bi_valid_ -= 8;
1966     }
1967 }
1968
1969 /*  Copy a stored block, storing first the length and its
1970     one's complement if requested.
1971 */
1972 template<class>
1973 void
1974 deflate_stream::
1975 copy_block(
1976     char    *buf,       // the input data
1977     unsigned len,       // its length
1978     int      header)    // true if block header must be written
1979 {
1980     bi_windup();        // align on byte boundary
1981
1982     if(header)
1983     {
1984         put_short((std::uint16_t)len);
1985         put_short((std::uint16_t)~len);
1986     }
1987     // VFALCO Use memcpy?
1988     while (len--)
1989         put_byte(*buf++);
1990 }
1991
1992 //------------------------------------------------------------------------------
1993
1994 /* Initialize the tree data structures for a new zlib stream.
1995 */
1996 template<class>
1997 void
1998 deflate_stream::
1999 tr_init()
2000 {
2001     l_desc_.dyn_tree = dyn_ltree_;
2002     l_desc_.stat_desc = &lut_.l_desc;
2003
2004     d_desc_.dyn_tree = dyn_dtree_;
2005     d_desc_.stat_desc = &lut_.d_desc;
2006
2007     bl_desc_.dyn_tree = bl_tree_;
2008     bl_desc_.stat_desc = &lut_.bl_desc;
2009
2010     bi_buf_ = 0;
2011     bi_valid_ = 0;
2012
2013     // Initialize the first block of the first file:
2014     init_block();
2015 }
2016
2017 /*  Send one empty static block to give enough lookahead for inflate.
2018     This takes 10 bits, of which 7 may remain in the bit buffer.
2019 */
2020 template<class>
2021 void
2022 deflate_stream::
2023 tr_align()
2024 {
2025     send_bits(STATIC_TREES<<1, 3);
2026     send_code(END_BLOCK, lut_.ltree);
2027     bi_flush();
2028 }
2029
2030 /* Flush the bits in the bit buffer to pending output (leaves at most 7 bits)
2031 */
2032 template<class>
2033 void
2034 deflate_stream::
2035 tr_flush_bits()
2036 {
2037     bi_flush();
2038 }
2039
2040 /* Send a stored block
2041 */
2042 template<class>
2043 void
2044 deflate_stream::
2045 tr_stored_block(
2046     char *buf,                  // input block
2047     std::uint32_t stored_len,   // length of input block
2048     int last)                   // one if this is the last block for a file
2049 {
2050     send_bits((STORED_BLOCK<<1)+last, 3);       // send block type
2051     copy_block(buf, (unsigned)stored_len, 1);   // with header
2052 }
2053
2054 template<class>
2055 inline
2056 void
2057 deflate_stream::
2058 tr_tally_dist(std::uint16_t dist, std::uint8_t len, bool& flush)
2059 {
2060     d_buf_[last_lit_] = dist;
2061     l_buf_[last_lit_++] = len;
2062     dist--;
2063     dyn_ltree_[lut_.length_code[len]+literals+1].fc++;
2064     dyn_dtree_[d_code(dist)].fc++;
2065     flush = (last_lit_ == lit_bufsize_-1);
2066 }
2067
2068 template<class>
2069 inline
2070 void
2071 deflate_stream::
2072 tr_tally_lit(std::uint8_t c, bool& flush)
2073 {
2074     d_buf_[last_lit_] = 0;
2075     l_buf_[last_lit_++] = c;
2076     dyn_ltree_[c].fc++;
2077     flush = (last_lit_ == lit_bufsize_-1);
2078 }
2079
2080 //------------------------------------------------------------------------------
2081
2082 /*  Determine the best encoding for the current block: dynamic trees,
2083     static trees or store, and output the encoded block to the zip file.
2084 */
2085 template<class>
2086 void
2087 deflate_stream::
2088 tr_flush_block(
2089     z_params& zs,
2090     char *buf,                  // input block, or NULL if too old
2091     std::uint32_t stored_len,   // length of input block
2092     int last)                   // one if this is the last block for a file
2093 {
2094     std::uint32_t opt_lenb;
2095     std::uint32_t static_lenb;  // opt_len and static_len in bytes
2096     int max_blindex = 0;        // index of last bit length code of non zero freq
2097
2098     // Build the Huffman trees unless a stored block is forced
2099     if(level_ > 0)
2100     {
2101         // Check if the file is binary or text
2102         if(zs.data_type == unknown)
2103             zs.data_type = detect_data_type();
2104
2105         // Construct the literal and distance trees
2106         build_tree((tree_desc *)(&(l_desc_)));
2107
2108         build_tree((tree_desc *)(&(d_desc_)));
2109         /* At this point, opt_len and static_len are the total bit lengths of
2110          * the compressed block data, excluding the tree representations.
2111          */
2112
2113         /* Build the bit length tree for the above two trees, and get the index
2114          * in bl_order of the last bit length code to send.
2115          */
2116         max_blindex = build_bl_tree();
2117
2118         /* Determine the best encoding. Compute the block lengths in bytes. */
2119         opt_lenb = (opt_len_+3+7)>>3;
2120         static_lenb = (static_len_+3+7)>>3;
2121
2122         if(static_lenb <= opt_lenb)
2123             opt_lenb = static_lenb;
2124     }
2125     else
2126     {
2127         // VFALCO This assertion fails even in the original ZLib,
2128         //        happens with strategy == Z_HUFFMAN_ONLY, see:
2129         //        https://github.com/madler/zlib/issues/172
2130
2131     #if 0
2132         BOOST_ASSERT(buf);
2133     #endif
2134         opt_lenb = static_lenb = stored_len + 5; // force a stored block
2135     }
2136
2137 #ifdef FORCE_STORED
2138     if(buf != (char*)0) { /* force stored block */
2139 #else
2140     if(stored_len+4 <= opt_lenb && buf != (char*)0) {
2141                        /* 4: two words for the lengths */
2142 #endif
2143         /* The test buf != NULL is only necessary if LIT_BUFSIZE > WSIZE.
2144          * Otherwise we can't have processed more than WSIZE input bytes since
2145          * the last block flush, because compression would have been
2146          * successful. If LIT_BUFSIZE <= WSIZE, it is never too late to
2147          * transform a block into a stored block.
2148          */
2149         tr_stored_block(buf, stored_len, last);
2150
2151 #ifdef FORCE_STATIC
2152     }
2153     else if(static_lenb >= 0)
2154     {
2155         // force static trees
2156 #else
2157     }
2158     else if(strategy_ == Strategy::fixed || static_lenb == opt_lenb)
2159     {
2160 #endif
2161         send_bits((STATIC_TREES<<1)+last, 3);
2162         compress_block(lut_.ltree, lut_.dtree);
2163     }
2164     else
2165     {
2166         send_bits((DYN_TREES<<1)+last, 3);
2167         send_all_trees(l_desc_.max_code+1, d_desc_.max_code+1,
2168                        max_blindex+1);
2169         compress_block((const ct_data *)dyn_ltree_,
2170                        (const ct_data *)dyn_dtree_);
2171     }
2172     /* The above check is made mod 2^32, for files larger than 512 MB
2173      * and std::size_t implemented on 32 bits.
2174      */
2175     init_block();
2176
2177     if(last)
2178         bi_windup();
2179 }
2180
2181 template<class>
2182 void
2183 deflate_stream::
2184 fill_window(z_params& zs)
2185 {
2186     unsigned n, m;
2187     unsigned more;    // Amount of free space at the end of the window.
2188     std::uint16_t *p;
2189     uInt wsize = w_size_;
2190
2191     do
2192     {
2193         more = (unsigned)(window_size_ -
2194             (std::uint32_t)lookahead_ -(std::uint32_t)strstart_);
2195
2196         // VFALCO We don't support systems below 32-bit
2197     #if 0
2198         // Deal with !@#$% 64K limit:
2199         if(sizeof(int) <= 2)
2200         {
2201             if(more == 0 && strstart_ == 0 && lookahead_ == 0)
2202             {
2203                 more = wsize;
2204             }
2205             else if(more == (unsigned)(-1))
2206             {
2207                 /* Very unlikely, but possible on 16 bit machine if
2208                  * strstart == 0 && lookahead == 1 (input done a byte at time)
2209                  */
2210                 more--;
2211             }
2212         }
2213     #endif
2214
2215         /*  If the window is almost full and there is insufficient lookahead,
2216             move the upper half to the lower one to make room in the upper half.
2217         */
2218         if(strstart_ >= wsize+max_dist())
2219         {
2220             std::memcpy(window_, window_+wsize, (unsigned)wsize);
2221             match_start_ -= wsize;
2222             strstart_    -= wsize; // we now have strstart >= max_dist
2223             block_start_ -= (long) wsize;
2224
2225             /* Slide the hash table (could be avoided with 32 bit values
2226                at the expense of memory usage). We slide even when level == 0
2227                to keep the hash table consistent if we switch back to level > 0
2228                later. (Using level 0 permanently is not an optimal usage of
2229                zlib, so we don't care about this pathological case.)
2230             */
2231             n = hash_size_;
2232             p = &head_[n];
2233             do
2234             {
2235                 m = *--p;
2236                 *p = (std::uint16_t)(m >= wsize ? m-wsize : 0);
2237             }
2238             while(--n);
2239
2240             n = wsize;
2241             p = &prev_[n];
2242             do
2243             {
2244                 m = *--p;
2245                 *p = (std::uint16_t)(m >= wsize ? m-wsize : 0);
2246                 /*  If n is not on any hash chain, prev[n] is garbage but
2247                     its value will never be used.
2248                 */
2249             }
2250             while(--n);
2251             more += wsize;
2252         }
2253         if(zs.avail_in == 0)
2254             break;
2255
2256         /*  If there was no sliding:
2257                strstart <= WSIZE+max_dist-1 && lookahead <= kMinLookahead - 1 &&
2258                more == window_size - lookahead - strstart
2259             => more >= window_size - (kMinLookahead-1 + WSIZE + max_dist-1)
2260             => more >= window_size - 2*WSIZE + 2
2261             In the BIG_MEM or MMAP case (not yet supported),
2262               window_size == input_size + kMinLookahead  &&
2263               strstart + lookahead_ <= input_size => more >= kMinLookahead.
2264             Otherwise, window_size == 2*WSIZE so more >= 2.
2265             If there was sliding, more >= WSIZE. So in all cases, more >= 2.
2266         */
2267         n = read_buf(zs, window_ + strstart_ + lookahead_, more);
2268         lookahead_ += n;
2269
2270         // Initialize the hash value now that we have some input:
2271         if(lookahead_ + insert_ >= minMatch)
2272         {
2273             uInt str = strstart_ - insert_;
2274             ins_h_ = window_[str];
2275             update_hash(ins_h_, window_[str + 1]);
2276             while(insert_)
2277             {
2278                 update_hash(ins_h_, window_[str + minMatch-1]);
2279                 prev_[str & w_mask_] = head_[ins_h_];
2280                 head_[ins_h_] = (std::uint16_t)str;
2281                 str++;
2282                 insert_--;
2283                 if(lookahead_ + insert_ < minMatch)
2284                     break;
2285             }
2286         }
2287         /*  If the whole input has less than minMatch bytes, ins_h is garbage,
2288             but this is not important since only literal bytes will be emitted.
2289         */
2290     }
2291     while(lookahead_ < kMinLookahead && zs.avail_in != 0);
2292
2293     /*  If the kWinInit bytes after the end of the current data have never been
2294         written, then zero those bytes in order to avoid memory check reports of
2295         the use of uninitialized (or uninitialised as Julian writes) bytes by
2296         the longest match routines.  Update the high water mark for the next
2297         time through here.  kWinInit is set to maxMatch since the longest match
2298         routines allow scanning to strstart + maxMatch, ignoring lookahead.
2299     */
2300     if(high_water_ < window_size_)
2301     {
2302         std::uint32_t curr = strstart_ + (std::uint32_t)(lookahead_);
2303         std::uint32_t winit;
2304
2305         if(high_water_ < curr)
2306         {
2307             /*  Previous high water mark below current data -- zero kWinInit
2308                 bytes or up to end of window, whichever is less.
2309             */
2310             winit = window_size_ - curr;
2311             if(winit > kWinInit)
2312                 winit = kWinInit;
2313             std::memset(window_ + curr, 0, (unsigned)winit);
2314             high_water_ = curr + winit;
2315         }
2316         else if(high_water_ < (std::uint32_t)curr + kWinInit)
2317         {
2318             /*  High water mark at or above current data, but below current data
2319                 plus kWinInit -- zero out to current data plus kWinInit, or up
2320                 to end of window, whichever is less.
2321             */
2322             winit = (std::uint32_t)curr + kWinInit - high_water_;
2323             if(winit > window_size_ - high_water_)
2324                 winit = window_size_ - high_water_;
2325             std::memset(window_ + high_water_, 0, (unsigned)winit);
2326             high_water_ += winit;
2327         }
2328     }
2329 }
2330
2331 /*  Flush as much pending output as possible. All write() output goes
2332     through this function so some applications may wish to modify it
2333     to avoid allocating a large strm->next_out buffer and copying into it.
2334     (See also read_buf()).
2335 */
2336 template<class>
2337 void
2338 deflate_stream::
2339 flush_pending(z_params& zs)
2340 {
2341     tr_flush_bits();
2342     auto len = clamp(pending_, zs.avail_out);
2343     if(len == 0)
2344         return;
2345
2346     std::memcpy(zs.next_out, pending_out_, len);
2347     zs.next_out =
2348         static_cast<std::uint8_t*>(zs.next_out) + len;
2349     pending_out_  += len;
2350     zs.total_out += len;
2351     zs.avail_out  -= len;
2352     pending_ -= len;
2353     if(pending_ == 0)
2354         pending_out_ = pending_buf_;
2355 }
2356
2357 /*  Flush the current block, with given end-of-file flag.
2358     IN assertion: strstart is set to the end of the current match.
2359 */
2360 template<class>
2361 inline
2362 void
2363 deflate_stream::
2364 flush_block(z_params& zs, bool last)
2365 {
2366     tr_flush_block(zs,
2367         (block_start_ >= 0L ?
2368             (char *)&window_[(unsigned)block_start_] :
2369             (char *)0),
2370         (std::uint32_t)((long)strstart_ - block_start_),
2371         last);
2372    block_start_ = strstart_;
2373    flush_pending(zs);
2374 }
2375
2376 /*  Read a new buffer from the current input stream, update the adler32
2377     and total number of bytes read.  All write() input goes through
2378     this function so some applications may wish to modify it to avoid
2379     allocating a large strm->next_in buffer and copying from it.
2380     (See also flush_pending()).
2381 */
2382 template<class>
2383 int
2384 deflate_stream::
2385 read_buf(z_params& zs, Byte *buf, unsigned size)
2386 {
2387     auto len = clamp(zs.avail_in, size);
2388     if(len == 0)
2389         return 0;
2390
2391     zs.avail_in  -= len;
2392
2393     std::memcpy(buf, zs.next_in, len);
2394     zs.next_in = static_cast<
2395         std::uint8_t const*>(zs.next_in) + len;
2396     zs.total_in += len;
2397     return (int)len;
2398 }
2399
2400 /*  Set match_start to the longest match starting at the given string and
2401     return its length. Matches shorter or equal to prev_length are discarded,
2402     in which case the result is equal to prev_length and match_start is
2403     garbage.
2404     IN assertions: cur_match is the head of the hash chain for the current
2405         string (strstart) and its distance is <= max_dist, and prev_length >= 1
2406     OUT assertion: the match length is not greater than s->lookahead_.
2407
2408     For 80x86 and 680x0, an optimized version will be provided in match.asm or
2409     match.S. The code will be functionally equivalent.
2410 */
2411 template<class>
2412 uInt
2413 deflate_stream::
2414 longest_match(IPos cur_match)
2415 {
2416     unsigned chain_length = max_chain_length_;/* max hash chain length */
2417     Byte *scan = window_ + strstart_; /* current string */
2418     Byte *match;                       /* matched string */
2419     int len;                           /* length of current match */
2420     int best_len = prev_length_;              /* best match length so far */
2421     int nice_match = nice_match_;             /* stop if match long enough */
2422     IPos limit = strstart_ > (IPos)max_dist() ?
2423         strstart_ - (IPos)max_dist() : 0;
2424     /* Stop when cur_match becomes <= limit. To simplify the code,
2425      * we prevent matches with the string of window index 0.
2426      */
2427     std::uint16_t *prev = prev_;
2428     uInt wmask = w_mask_;
2429
2430     Byte *strend = window_ + strstart_ + maxMatch;
2431     Byte scan_end1  = scan[best_len-1];
2432     Byte scan_end   = scan[best_len];
2433
2434     /* The code is optimized for HASH_BITS >= 8 and maxMatch-2 multiple of 16.
2435      * It is easy to get rid of this optimization if necessary.
2436      */
2437     BOOST_ASSERT(hash_bits_ >= 8 && maxMatch == 258);
2438
2439     /* Do not waste too much time if we already have a good match: */
2440     if(prev_length_ >= good_match_) {
2441         chain_length >>= 2;
2442     }
2443     /* Do not look for matches beyond the end of the input. This is necessary
2444      * to make deflate deterministic.
2445      */
2446     if((uInt)nice_match > lookahead_)
2447         nice_match = lookahead_;
2448
2449     BOOST_ASSERT((std::uint32_t)strstart_ <= window_size_-kMinLookahead);
2450
2451     do {
2452         BOOST_ASSERT(cur_match < strstart_);
2453         match = window_ + cur_match;
2454
2455         /* Skip to next match if the match length cannot increase
2456          * or if the match length is less than 2.  Note that the checks below
2457          * for insufficient lookahead only occur occasionally for performance
2458          * reasons.  Therefore uninitialized memory will be accessed, and
2459          * conditional jumps will be made that depend on those values.
2460          * However the length of the match is limited to the lookahead, so
2461          * the output of deflate is not affected by the uninitialized values.
2462          */
2463         if(     match[best_len]   != scan_end  ||
2464                 match[best_len-1] != scan_end1 ||
2465                 *match            != *scan     ||
2466                 *++match          != scan[1])
2467             continue;
2468
2469         /* The check at best_len-1 can be removed because it will be made
2470          * again later. (This heuristic is not always a win.)
2471          * It is not necessary to compare scan[2] and match[2] since they
2472          * are always equal when the other bytes match, given that
2473          * the hash keys are equal and that HASH_BITS >= 8.
2474          */
2475         scan += 2, match++;
2476         BOOST_ASSERT(*scan == *match);
2477
2478         /* We check for insufficient lookahead only every 8th comparison;
2479          * the 256th check will be made at strstart+258.
2480          */
2481         do
2482         {
2483         }
2484         while(  *++scan == *++match && *++scan == *++match &&
2485                 *++scan == *++match && *++scan == *++match &&
2486                 *++scan == *++match && *++scan == *++match &&
2487                 *++scan == *++match && *++scan == *++match &&
2488                 scan < strend);
2489
2490         BOOST_ASSERT(scan <= window_+(unsigned)(window_size_-1));
2491
2492         len = maxMatch - (int)(strend - scan);
2493         scan = strend - maxMatch;
2494
2495         if(len > best_len) {
2496             match_start_ = cur_match;
2497             best_len = len;
2498             if(len >= nice_match) break;
2499             scan_end1  = scan[best_len-1];
2500             scan_end   = scan[best_len];
2501         }
2502     }
2503     while((cur_match = prev[cur_match & wmask]) > limit
2504         && --chain_length != 0);
2505
2506     if((uInt)best_len <= lookahead_)
2507         return (uInt)best_len;
2508     return lookahead_;
2509 }
2510
2511 //------------------------------------------------------------------------------
2512
2513 /*  Copy without compression as much as possible from the input stream, return
2514     the current block state.
2515     This function does not insert new strings in the dictionary since
2516     uncompressible data is probably not useful. This function is used
2517     only for the level=0 compression option.
2518     NOTE: this function should be optimized to avoid extra copying from
2519     window to pending_buf.
2520 */
2521 template<class>
2522 inline
2523 auto
2524 deflate_stream::
2525 f_stored(z_params& zs, Flush flush) ->
2526     block_state
2527 {
2528     /* Stored blocks are limited to 0xffff bytes, pending_buf is limited
2529      * to pending_buf_size, and each stored block has a 5 byte header:
2530      */
2531     std::uint32_t max_block_size = 0xffff;
2532     std::uint32_t max_start;
2533
2534     if(max_block_size > pending_buf_size_ - 5) {
2535         max_block_size = pending_buf_size_ - 5;
2536     }
2537
2538     /* Copy as much as possible from input to output: */
2539     for(;;) {
2540         /* Fill the window as much as possible: */
2541         if(lookahead_ <= 1) {
2542
2543             BOOST_ASSERT(strstart_ < w_size_+max_dist() ||
2544                    block_start_ >= (long)w_size_);
2545
2546             fill_window(zs);
2547             if(lookahead_ == 0 && flush == Flush::none)
2548                 return need_more;
2549
2550             if(lookahead_ == 0) break; /* flush the current block */
2551         }
2552         BOOST_ASSERT(block_start_ >= 0L);
2553
2554         strstart_ += lookahead_;
2555         lookahead_ = 0;
2556
2557         /* Emit a stored block if pending_buf will be full: */
2558         max_start = block_start_ + max_block_size;
2559         if(strstart_ == 0 || (std::uint32_t)strstart_ >= max_start) {
2560             /* strstart == 0 is possible when wraparound on 16-bit machine */
2561             lookahead_ = (uInt)(strstart_ - max_start);
2562             strstart_ = (uInt)max_start;
2563             flush_block(zs, false);
2564             if(zs.avail_out == 0)
2565                 return need_more;
2566         }
2567         /* Flush if we may have to slide, otherwise block_start may become
2568          * negative and the data will be gone:
2569          */
2570         if(strstart_ - (uInt)block_start_ >= max_dist()) {
2571             flush_block(zs, false);
2572             if(zs.avail_out == 0)
2573                 return need_more;
2574         }
2575     }
2576     insert_ = 0;
2577     if(flush == Flush::finish)
2578     {
2579         flush_block(zs, true);
2580         if(zs.avail_out == 0)
2581             return finish_started;
2582         return finish_done;
2583     }
2584     if((long)strstart_ > block_start_)
2585     {
2586         flush_block(zs, false);
2587         if(zs.avail_out == 0)
2588             return need_more;
2589     }
2590     return block_done;
2591 }
2592
2593 /*  Compress as much as possible from the input stream, return the current
2594     block state.
2595     This function does not perform lazy evaluation of matches and inserts
2596     new strings in the dictionary only for unmatched strings or for short
2597     matches. It is used only for the fast compression options.
2598 */
2599 template<class>
2600 inline
2601 auto
2602 deflate_stream::
2603 f_fast(z_params& zs, Flush flush) ->
2604     block_state
2605 {
2606     IPos hash_head;       /* head of the hash chain */
2607     bool bflush;           /* set if current block must be flushed */
2608
2609     for(;;)
2610     {
2611         /* Make sure that we always have enough lookahead, except
2612          * at the end of the input file. We need maxMatch bytes
2613          * for the next match, plus minMatch bytes to insert the
2614          * string following the next match.
2615          */
2616         if(lookahead_ < kMinLookahead)
2617         {
2618             fill_window(zs);
2619             if(lookahead_ < kMinLookahead && flush == Flush::none)
2620                 return need_more;
2621             if(lookahead_ == 0)
2622                 break; /* flush the current block */
2623         }
2624
2625         /* Insert the string window[strstart .. strstart+2] in the
2626          * dictionary, and set hash_head to the head of the hash chain:
2627          */
2628         hash_head = 0;
2629         if(lookahead_ >= minMatch) {
2630             insert_string(hash_head);
2631         }
2632
2633         /* Find the longest match, discarding those <= prev_length.
2634          * At this point we have always match_length < minMatch
2635          */
2636         if(hash_head != 0 && strstart_ - hash_head <= max_dist()) {
2637             /* To simplify the code, we prevent matches with the string
2638              * of window index 0 (in particular we have to avoid a match
2639              * of the string with itself at the start of the input file).
2640              */
2641             match_length_ = longest_match (hash_head);
2642             /* longest_match() sets match_start */
2643         }
2644         if(match_length_ >= minMatch)
2645         {
2646             tr_tally_dist(static_cast<std::uint16_t>(strstart_ - match_start_),
2647                 static_cast<std::uint8_t>(match_length_ - minMatch), bflush);
2648
2649             lookahead_ -= match_length_;
2650
2651             /* Insert new strings in the hash table only if the match length
2652              * is not too large. This saves time but degrades compression.
2653              */
2654             if(match_length_ <= max_lazy_match_ &&
2655                 lookahead_ >= minMatch) {
2656                 match_length_--; /* string at strstart already in table */
2657                 do
2658                 {
2659                     strstart_++;
2660                     insert_string(hash_head);
2661                     /* strstart never exceeds WSIZE-maxMatch, so there are
2662                      * always minMatch bytes ahead.
2663                      */
2664                 }
2665                 while(--match_length_ != 0);
2666                 strstart_++;
2667             }
2668             else
2669             {
2670                 strstart_ += match_length_;
2671                 match_length_ = 0;
2672                 ins_h_ = window_[strstart_];
2673                 update_hash(ins_h_, window_[strstart_+1]);
2674                 /* If lookahead < minMatch, ins_h is garbage, but it does not
2675                  * matter since it will be recomputed at next deflate call.
2676                  */
2677             }
2678         }
2679         else
2680         {
2681             /* No match, output a literal byte */
2682             tr_tally_lit(window_[strstart_], bflush);
2683             lookahead_--;
2684             strstart_++;
2685         }
2686         if(bflush)
2687         {
2688             flush_block(zs, false);
2689             if(zs.avail_out == 0)
2690                 return need_more;
2691         }
2692     }
2693     insert_ = strstart_ < minMatch-1 ? strstart_ : minMatch-1;
2694     if(flush == Flush::finish)
2695     {
2696         flush_block(zs, true);
2697         if(zs.avail_out == 0)
2698             return finish_started;
2699         return finish_done;
2700     }
2701     if(last_lit_)
2702     {
2703         flush_block(zs, false);
2704         if(zs.avail_out == 0)
2705             return need_more;
2706     }
2707     return block_done;
2708 }
2709
2710 /*  Same as above, but achieves better compression. We use a lazy
2711     evaluation for matches: a match is finally adopted only if there is
2712     no better match at the next window position.
2713 */
2714 template<class>
2715 inline
2716 auto
2717 deflate_stream::
2718 f_slow(z_params& zs, Flush flush) ->
2719     block_state
2720 {
2721     IPos hash_head;          /* head of hash chain */
2722     bool bflush;              /* set if current block must be flushed */
2723
2724     /* Process the input block. */
2725     for(;;)
2726     {
2727         /* Make sure that we always have enough lookahead, except
2728          * at the end of the input file. We need maxMatch bytes
2729          * for the next match, plus minMatch bytes to insert the
2730          * string following the next match.
2731          */
2732         if(lookahead_ < kMinLookahead)
2733         {
2734             fill_window(zs);
2735             if(lookahead_ < kMinLookahead && flush == Flush::none)
2736                 return need_more;
2737             if(lookahead_ == 0)
2738                 break; /* flush the current block */
2739         }
2740
2741         /* Insert the string window[strstart .. strstart+2] in the
2742          * dictionary, and set hash_head to the head of the hash chain:
2743          */
2744         hash_head = 0;
2745         if(lookahead_ >= minMatch)
2746             insert_string(hash_head);
2747
2748         /* Find the longest match, discarding those <= prev_length.
2749          */
2750         prev_length_ = match_length_, prev_match_ = match_start_;
2751         match_length_ = minMatch-1;
2752
2753         if(hash_head != 0 && prev_length_ < max_lazy_match_ &&
2754             strstart_ - hash_head <= max_dist())
2755         {
2756             /* To simplify the code, we prevent matches with the string
2757              * of window index 0 (in particular we have to avoid a match
2758              * of the string with itself at the start of the input file).
2759              */
2760             match_length_ = longest_match(hash_head);
2761             /* longest_match() sets match_start */
2762
2763             if(match_length_ <= 5 && (strategy_ == Strategy::filtered
2764                 || (match_length_ == minMatch &&
2765                     strstart_ - match_start_ > kTooFar)
2766                 ))
2767             {
2768                 /* If prev_match is also minMatch, match_start is garbage
2769                  * but we will ignore the current match anyway.
2770                  */
2771                 match_length_ = minMatch-1;
2772             }
2773         }
2774         /* If there was a match at the previous step and the current
2775          * match is not better, output the previous match:
2776          */
2777         if(prev_length_ >= minMatch && match_length_ <= prev_length_)
2778         {
2779             /* Do not insert strings in hash table beyond this. */
2780             uInt max_insert = strstart_ + lookahead_ - minMatch;
2781
2782             tr_tally_dist(
2783                 static_cast<std::uint16_t>(strstart_ -1 - prev_match_),
2784                 static_cast<std::uint8_t>(prev_length_ - minMatch), bflush);
2785
2786             /* Insert in hash table all strings up to the end of the match.
2787              * strstart-1 and strstart are already inserted. If there is not
2788              * enough lookahead, the last two strings are not inserted in
2789              * the hash table.
2790              */
2791             lookahead_ -= prev_length_-1;
2792             prev_length_ -= 2;
2793             do {
2794                 if(++strstart_ <= max_insert)
2795                     insert_string(hash_head);
2796             }
2797             while(--prev_length_ != 0);
2798             match_available_ = 0;
2799             match_length_ = minMatch-1;
2800             strstart_++;
2801
2802             if(bflush)
2803             {
2804                 flush_block(zs, false);
2805                 if(zs.avail_out == 0)
2806                     return need_more;
2807             }
2808
2809         }
2810         else if(match_available_)
2811         {
2812             /* If there was no match at the previous position, output a
2813              * single literal. If there was a match but the current match
2814              * is longer, truncate the previous match to a single literal.
2815              */
2816             tr_tally_lit(window_[strstart_-1], bflush);
2817             if(bflush)
2818                 flush_block(zs, false);
2819             strstart_++;
2820             lookahead_--;
2821             if(zs.avail_out == 0)
2822                 return need_more;
2823         }
2824         else
2825         {
2826             /* There is no previous match to compare with, wait for
2827              * the next step to decide.
2828              */
2829             match_available_ = 1;
2830             strstart_++;
2831             lookahead_--;
2832         }
2833     }
2834     BOOST_ASSERT(flush != Flush::none);
2835     if(match_available_)
2836     {
2837         tr_tally_lit(window_[strstart_-1], bflush);
2838         match_available_ = 0;
2839     }
2840     insert_ = strstart_ < minMatch-1 ? strstart_ : minMatch-1;
2841     if(flush == Flush::finish)
2842     {
2843         flush_block(zs, true);
2844         if(zs.avail_out == 0)
2845             return finish_started;
2846         return finish_done;
2847     }
2848     if(last_lit_)
2849     {
2850         flush_block(zs, false);
2851         if(zs.avail_out == 0)
2852             return need_more;
2853     }
2854     return block_done;
2855 }
2856
2857 /*  For Strategy::rle, simply look for runs of bytes, generate matches only of distance
2858     one.  Do not maintain a hash table.  (It will be regenerated if this run of
2859     deflate switches away from Strategy::rle.)
2860 */
2861 template<class>
2862 inline
2863 auto
2864 deflate_stream::
2865 f_rle(z_params& zs, Flush flush) ->
2866     block_state
2867 {
2868     bool bflush;            // set if current block must be flushed
2869     uInt prev;              // byte at distance one to match
2870     Byte *scan, *strend;    // scan goes up to strend for length of run
2871
2872     for(;;)
2873     {
2874         /* Make sure that we always have enough lookahead, except
2875          * at the end of the input file. We need maxMatch bytes
2876          * for the longest run, plus one for the unrolled loop.
2877          */
2878         if(lookahead_ <= maxMatch) {
2879             fill_window(zs);
2880             if(lookahead_ <= maxMatch && flush == Flush::none) {
2881                 return need_more;
2882             }
2883             if(lookahead_ == 0) break; /* flush the current block */
2884         }
2885
2886         /* See how many times the previous byte repeats */
2887         match_length_ = 0;
2888         if(lookahead_ >= minMatch && strstart_ > 0) {
2889             scan = window_ + strstart_ - 1;
2890             prev = *scan;
2891             if(prev == *++scan && prev == *++scan && prev == *++scan) {
2892                 strend = window_ + strstart_ + maxMatch;
2893                 do {
2894                 } while(prev == *++scan && prev == *++scan &&
2895                          prev == *++scan && prev == *++scan &&
2896                          prev == *++scan && prev == *++scan &&
2897                          prev == *++scan && prev == *++scan &&
2898                          scan < strend);
2899                 match_length_ = maxMatch - (int)(strend - scan);
2900                 if(match_length_ > lookahead_)
2901                     match_length_ = lookahead_;
2902             }
2903             BOOST_ASSERT(scan <= window_+(uInt)(window_size_-1));
2904         }
2905
2906         /* Emit match if have run of minMatch or longer, else emit literal */
2907         if(match_length_ >= minMatch) {
2908             tr_tally_dist(std::uint16_t{1},
2909                 static_cast<std::uint8_t>(match_length_ - minMatch),
2910                 bflush);
2911
2912             lookahead_ -= match_length_;
2913             strstart_ += match_length_;
2914             match_length_ = 0;
2915         } else {
2916             /* No match, output a literal byte */
2917             tr_tally_lit(window_[strstart_], bflush);
2918             lookahead_--;
2919             strstart_++;
2920         }
2921         if(bflush)
2922         {
2923             flush_block(zs, false);
2924             if(zs.avail_out == 0)
2925                 return need_more;
2926         }
2927     }
2928     insert_ = 0;
2929     if(flush == Flush::finish)
2930     {
2931         flush_block(zs, true);
2932         if(zs.avail_out == 0)
2933             return finish_started;
2934         return finish_done;
2935     }
2936     if(last_lit_)
2937     {
2938         flush_block(zs, false);
2939         if(zs.avail_out == 0)
2940             return need_more;
2941     }
2942     return block_done;
2943 }
2944
2945 /* ===========================================================================
2946  * For Strategy::huffman, do not look for matches.  Do not maintain a hash table.
2947  * (It will be regenerated if this run of deflate switches away from Huffman.)
2948  */
2949 template<class>
2950 inline
2951 auto
2952 deflate_stream::
2953 f_huff(z_params& zs, Flush flush) ->
2954     block_state
2955 {
2956     bool bflush;             // set if current block must be flushed
2957
2958     for(;;)
2959     {
2960         // Make sure that we have a literal to write.
2961         if(lookahead_ == 0)
2962         {
2963             fill_window(zs);
2964             if(lookahead_ == 0)
2965             {
2966                 if(flush == Flush::none)
2967                     return need_more;
2968                 break;      // flush the current block
2969             }
2970         }
2971
2972         // Output a literal byte
2973         match_length_ = 0;
2974         tr_tally_lit(window_[strstart_], bflush);
2975         lookahead_--;
2976         strstart_++;
2977         if(bflush)
2978         {
2979             flush_block(zs, false);
2980             if(zs.avail_out == 0)
2981                 return need_more;
2982         }
2983     }
2984     insert_ = 0;
2985     if(flush == Flush::finish)
2986     {
2987         flush_block(zs, true);
2988         if(zs.avail_out == 0)
2989             return finish_started;
2990         return finish_done;
2991     }
2992     if(last_lit_)
2993     {
2994         flush_block(zs, false);
2995         if(zs.avail_out == 0)
2996             return need_more;
2997     }
2998     return block_done;
2999 }
3000
3001 } // detail
3002 } // zlib
3003 } // beast
3004 } // boost
3005
3006 #endif