]> git.proxmox.com Git - ceph.git/blob - ceph/src/boost/boost/beast/zlib/detail/inflate_stream.hpp
update sources to v12.2.3
[ceph.git] / ceph / src / boost / boost / beast / zlib / detail / inflate_stream.hpp
1 //
2 // Copyright (c) 2016-2017 Vinnie Falco (vinnie dot falco at gmail dot com)
3 //
4 // Distributed under the Boost Software License, Version 1.0. (See accompanying
5 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6 //
7 // Official repository: https://github.com/boostorg/beast
8 //
9 // This is a derivative work based on Zlib, copyright below:
10 /*
11 Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler
12
13 This software is provided 'as-is', without any express or implied
14 warranty. In no event will the authors be held liable for any damages
15 arising from the use of this software.
16
17 Permission is granted to anyone to use this software for any purpose,
18 including commercial applications, and to alter it and redistribute it
19 freely, subject to the following restrictions:
20
21 1. The origin of this software must not be misrepresented; you must not
22 claim that you wrote the original software. If you use this software
23 in a product, an acknowledgment in the product documentation would be
24 appreciated but is not required.
25 2. Altered source versions must be plainly marked as such, and must not be
26 misrepresented as being the original software.
27 3. This notice may not be removed or altered from any source distribution.
28
29 Jean-loup Gailly Mark Adler
30 jloup@gzip.org madler@alumni.caltech.edu
31
32 The data format used by the zlib library is described by RFCs (Request for
33 Comments) 1950 to 1952 in the files http://tools.ietf.org/html/rfc1950
34 (zlib format), rfc1951 (deflate format) and rfc1952 (gzip format).
35 */
36
37 #ifndef BOOST_BEAST_ZLIB_DETAIL_INFLATE_STREAM_HPP
38 #define BOOST_BEAST_ZLIB_DETAIL_INFLATE_STREAM_HPP
39
40 #include <boost/beast/zlib/error.hpp>
41 #include <boost/beast/zlib/zlib.hpp>
42 #include <boost/beast/zlib/detail/bitstream.hpp>
43 #include <boost/beast/zlib/detail/ranges.hpp>
44 #include <boost/beast/zlib/detail/window.hpp>
45 #include <boost/beast/core/detail/type_traits.hpp>
46 #include <boost/throw_exception.hpp>
47 #include <algorithm>
48 #include <array>
49 #include <cstdint>
50 #include <cstring>
51 #include <stdexcept>
52
53 namespace boost {
54 namespace beast {
55 namespace zlib {
56 namespace detail {
57
58 class inflate_stream
59 {
60 protected:
61 inflate_stream()
62 {
63 w_.reset(15);
64 }
65
66 template<class = void> void doClear();
67 template<class = void> void doReset(int windowBits);
68 template<class = void> void doWrite(z_params& zs, Flush flush, error_code& ec);
69
70 void
71 doReset()
72 {
73 doReset(w_.bits());
74 }
75
76 private:
77 enum Mode
78 {
79 HEAD, // i: waiting for magic header
80 FLAGS, // i: waiting for method and flags (gzip)
81 TIME, // i: waiting for modification time (gzip)
82 OS, // i: waiting for extra flags and operating system (gzip)
83 EXLEN, // i: waiting for extra length (gzip)
84 EXTRA, // i: waiting for extra bytes (gzip)
85 NAME, // i: waiting for end of file name (gzip)
86 COMMENT, // i: waiting for end of comment (gzip)
87 HCRC, // i: waiting for header crc (gzip)
88 TYPE, // i: waiting for type bits, including last-flag bit
89 TYPEDO, // i: same, but skip check to exit inflate on new block
90 STORED, // i: waiting for stored size (length and complement)
91 COPY_, // i/o: same as COPY below, but only first time in
92 COPY, // i/o: waiting for input or output to copy stored block
93 TABLE, // i: waiting for dynamic block table lengths
94 LENLENS, // i: waiting for code length code lengths
95 CODELENS, // i: waiting for length/lit and distance code lengths
96 LEN_, // i: same as LEN below, but only first time in
97 LEN, // i: waiting for length/lit/eob code
98 LENEXT, // i: waiting for length extra bits
99 DIST, // i: waiting for distance code
100 DISTEXT,// i: waiting for distance extra bits
101 MATCH, // o: waiting for output space to copy string
102 LIT, // o: waiting for output space to write literal
103 CHECK, // i: waiting for 32-bit check value
104 LENGTH, // i: waiting for 32-bit length (gzip)
105 DONE, // finished check, done -- remain here until reset
106 BAD, // got a data error -- remain here until reset
107 SYNC // looking for synchronization bytes to restart inflate()
108 };
109
110 /* Structure for decoding tables. Each entry provides either the
111 information needed to do the operation requested by the code that
112 indexed that table entry, or it provides a pointer to another
113 table that indexes more bits of the code. op indicates whether
114 the entry is a pointer to another table, a literal, a length or
115 distance, an end-of-block, or an invalid code. For a table
116 pointer, the low four bits of op is the number of index bits of
117 that table. For a length or distance, the low four bits of op
118 is the number of extra bits to get after the code. bits is
119 the number of bits in this code or part of the code to drop off
120 of the bit buffer. val is the actual byte to output in the case
121 of a literal, the base length or distance, or the offset from
122 the current table to the next table. Each entry is four bytes.
123
124 op values as set by inflate_table():
125
126 00000000 - literal
127 0000tttt - table link, tttt != 0 is the number of table index bits
128 0001eeee - length or distance, eeee is the number of extra bits
129 01100000 - end of block
130 01000000 - invalid code
131 */
132 struct code
133 {
134 std::uint8_t op; // operation, extra bits, table bits
135 std::uint8_t bits; // bits in this part of the code
136 std::uint16_t val; // offset in table or code value
137 };
138
139 /* Maximum size of the dynamic table. The maximum number of code
140 structures is 1444, which is the sum of 852 for literal/length codes
141 and 592 for distance codes. These values were found by exhaustive
142 searches using the program examples/enough.c found in the zlib
143 distribtution. The arguments to that program are the number of
144 symbols, the initial root table size, and the maximum bit length
145 of a code. "enough 286 9 15" for literal/length codes returns
146 returns 852, and "enough 30 6 15" for distance codes returns 592.
147 The initial root table size (9 or 6) is found in the fifth argument
148 of the inflate_table() calls in inflate.c and infback.c. If the
149 root table size is changed, then these maximum sizes would be need
150 to be recalculated and updated.
151 */
152 static std::uint16_t constexpr kEnoughLens = 852;
153 static std::uint16_t constexpr kEnoughDists = 592;
154 static std::uint16_t constexpr kEnough = kEnoughLens + kEnoughDists;
155
156 struct codes
157 {
158 code const* lencode;
159 code const* distcode;
160 unsigned lenbits; // VFALCO use std::uint8_t
161 unsigned distbits;
162 };
163
164 // Type of code to build for inflate_table()
165 enum class build
166 {
167 codes,
168 lens,
169 dists
170 };
171
172 template<class = void>
173 static
174 void
175 inflate_table(
176 build type,
177 std::uint16_t* lens,
178 std::size_t codes,
179 code** table,
180 unsigned *bits,
181 std::uint16_t* work,
182 error_code& ec);
183
184 template<class = void>
185 static
186 codes const&
187 get_fixed_tables();
188
189 template<class = void>
190 void
191 fixedTables();
192
193 template<class = void>
194 void
195 inflate_fast(ranges& r, error_code& ec);
196
197 bitstream bi_;
198
199 Mode mode_ = HEAD; // current inflate mode
200 int last_ = 0; // true if processing last block
201 unsigned dmax_ = 32768U; // zlib header max distance (INFLATE_STRICT)
202
203 // sliding window
204 window w_;
205
206 // for string and stored block copying
207 unsigned length_; // literal or length of data to copy
208 unsigned offset_; // distance back to copy string from
209
210 // for table and code decoding
211 unsigned extra_; // extra bits needed
212
213 // dynamic table building
214 unsigned ncode_; // number of code length code lengths
215 unsigned nlen_; // number of length code lengths
216 unsigned ndist_; // number of distance code lengths
217 unsigned have_; // number of code lengths in lens[]
218 unsigned short lens_[320]; // temporary storage for code lengths
219 unsigned short work_[288]; // work area for code table building
220 code codes_[kEnough]; // space for code tables
221 code *next_ = codes_; // next available space in codes[]
222 int back_ = -1; // bits back of last unprocessed length/lit
223 unsigned was_; // initial length of match
224
225 // fixed and dynamic code tables
226 code const* lencode_ = codes_ ; // starting table for length/literal codes
227 code const* distcode_ = codes_; // starting table for distance codes
228 unsigned lenbits_; // index bits for lencode
229 unsigned distbits_; // index bits for distcode
230 };
231
232 //------------------------------------------------------------------------------
233
234 template<class>
235 void
236 inflate_stream::
237 doReset(int windowBits)
238 {
239 if(windowBits < 8 || windowBits > 15)
240 BOOST_THROW_EXCEPTION(std::domain_error{
241 "windowBits out of range"});
242 w_.reset(windowBits);
243
244 bi_.flush();
245 mode_ = HEAD;
246 last_ = 0;
247 dmax_ = 32768U;
248 lencode_ = codes_;
249 distcode_ = codes_;
250 next_ = codes_;
251 back_ = -1;
252 }
253
254 template<class>
255 void
256 inflate_stream::
257 doClear()
258 {
259 }
260
261 template<class>
262 void
263 inflate_stream::
264 doWrite(z_params& zs, Flush flush, error_code& ec)
265 {
266 ranges r;
267 r.in.first = reinterpret_cast<
268 std::uint8_t const*>(zs.next_in);
269 r.in.last = r.in.first + zs.avail_in;
270 r.in.next = r.in.first;
271 r.out.first = reinterpret_cast<
272 std::uint8_t*>(zs.next_out);
273 r.out.last = r.out.first + zs.avail_out;
274 r.out.next = r.out.first;
275
276 auto const done =
277 [&]
278 {
279 /*
280 Return from inflate(), updating the total counts and the check value.
281 If there was no progress during the inflate() call, return a buffer
282 error. Call updatewindow() to create and/or update the window state.
283 Note: a memory error from inflate() is non-recoverable.
284 */
285
286
287 // VFALCO TODO Don't allocate update the window unless necessary
288 if(/*wsize_ ||*/ (r.out.used() && mode_ < BAD &&
289 (mode_ < CHECK || flush != Flush::finish)))
290 w_.write(r.out.first, r.out.used());
291
292 zs.next_in = r.in.next;
293 zs.avail_in = r.in.avail();
294 zs.next_out = r.out.next;
295 zs.avail_out = r.out.avail();
296 zs.total_in += r.in.used();
297 zs.total_out += r.out.used();
298 zs.data_type = bi_.size() + (last_ ? 64 : 0) +
299 (mode_ == TYPE ? 128 : 0) +
300 (mode_ == LEN_ || mode_ == COPY_ ? 256 : 0);
301
302 if(((! r.in.used() && ! r.out.used()) ||
303 flush == Flush::finish) && ! ec)
304 ec = error::need_buffers;
305 };
306 auto const err =
307 [&](error e)
308 {
309 ec = e;
310 mode_ = BAD;
311 };
312
313 if(mode_ == TYPE)
314 mode_ = TYPEDO;
315
316 for(;;)
317 {
318 switch(mode_)
319 {
320 case HEAD:
321 mode_ = TYPEDO;
322 break;
323
324 case TYPE:
325 if(flush == Flush::block || flush == Flush::trees)
326 return done();
327 // fall through
328
329 case TYPEDO:
330 {
331 if(last_)
332 {
333 bi_.flush_byte();
334 mode_ = CHECK;
335 break;
336 }
337 if(! bi_.fill(3, r.in.next, r.in.last))
338 return done();
339 std::uint8_t v;
340 bi_.read(v, 1);
341 last_ = v != 0;
342 bi_.read(v, 2);
343 switch(v)
344 {
345 case 0:
346 // uncompressed block
347 mode_ = STORED;
348 break;
349 case 1:
350 // fixed Huffman table
351 fixedTables();
352 mode_ = LEN_; /* decode codes */
353 if(flush == Flush::trees)
354 return done();
355 break;
356 case 2:
357 // dynamic Huffman table
358 mode_ = TABLE;
359 break;
360
361 default:
362 return err(error::invalid_block_type);
363 }
364 break;
365 }
366
367 case STORED:
368 {
369 bi_.flush_byte();
370 std::uint32_t v;
371 if(! bi_.fill(32, r.in.next, r.in.last))
372 return done();
373 bi_.peek(v, 32);
374 length_ = v & 0xffff;
375 if(length_ != ((v >> 16) ^ 0xffff))
376 return err(error::invalid_stored_length);
377 // flush instead of read, otherwise
378 // undefined right shift behavior.
379 bi_.flush();
380 mode_ = COPY_;
381 if(flush == Flush::trees)
382 return done();
383 BOOST_BEAST_FALLTHROUGH;
384 }
385
386 case COPY_:
387 mode_ = COPY;
388 BOOST_BEAST_FALLTHROUGH;
389
390 case COPY:
391 {
392 auto copy = length_;
393 if(copy == 0)
394 {
395 mode_ = TYPE;
396 break;
397 }
398 copy = clamp(copy, r.in.avail());
399 copy = clamp(copy, r.out.avail());
400 if(copy == 0)
401 return done();
402 std::memcpy(r.out.next, r.in.next, copy);
403 r.in.next += copy;
404 r.out.next += copy;
405 length_ -= copy;
406 break;
407 }
408
409 case TABLE:
410 if(! bi_.fill(5 + 5 + 4, r.in.next, r.in.last))
411 return done();
412 bi_.read(nlen_, 5);
413 nlen_ += 257;
414 bi_.read(ndist_, 5);
415 ndist_ += 1;
416 bi_.read(ncode_, 4);
417 ncode_ += 4;
418 if(nlen_ > 286 || ndist_ > 30)
419 return err(error::too_many_symbols);
420 have_ = 0;
421 mode_ = LENLENS;
422 BOOST_BEAST_FALLTHROUGH;
423
424 case LENLENS:
425 {
426 static std::array<std::uint8_t, 19> constexpr order = {{
427 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}};
428 while(have_ < ncode_)
429 {
430 if(! bi_.fill(3, r.in.next, r.in.last))
431 return done();
432 bi_.read(lens_[order[have_]], 3);
433 ++have_;
434 }
435 while(have_ < order.size())
436 lens_[order[have_++]] = 0;
437
438 next_ = &codes_[0];
439 lencode_ = next_;
440 lenbits_ = 7;
441 inflate_table(build::codes, &lens_[0],
442 order.size(), &next_, &lenbits_, work_, ec);
443 if(ec)
444 {
445 mode_ = BAD;
446 break;
447 }
448 have_ = 0;
449 mode_ = CODELENS;
450 BOOST_BEAST_FALLTHROUGH;
451 }
452
453 case CODELENS:
454 {
455 while(have_ < nlen_ + ndist_)
456 {
457 std::uint16_t v;
458 if(! bi_.fill(lenbits_, r.in.next, r.in.last))
459 return done();
460 bi_.peek(v, lenbits_);
461 auto cp = &lencode_[v];
462 if(cp->val < 16)
463 {
464 bi_.drop(cp->bits);
465 lens_[have_++] = cp->val;
466 }
467 else
468 {
469 std::uint16_t len;
470 std::uint16_t copy;
471 if(cp->val == 16)
472 {
473 if(! bi_.fill(cp->bits + 2, r.in.next, r.in.last))
474 return done();
475 bi_.drop(cp->bits);
476 if(have_ == 0)
477 return err(error::invalid_bit_length_repeat);
478 bi_.read(copy, 2);
479 len = lens_[have_ - 1];
480 copy += 3;
481
482 }
483 else if(cp->val == 17)
484 {
485 if(! bi_.fill(cp->bits + 3, r.in.next, r.in.last))
486 return done();
487 bi_.drop(cp->bits);
488 bi_.read(copy, 3);
489 len = 0;
490 copy += 3;
491 }
492 else
493 {
494 if(! bi_.fill(cp->bits + 7, r.in.next, r.in.last))
495 return done();
496 bi_.drop(cp->bits);
497 bi_.read(copy, 7);
498 len = 0;
499 copy += 11;
500 }
501 if(have_ + copy > nlen_ + ndist_)
502 return err(error::invalid_bit_length_repeat);
503 std::fill(&lens_[have_], &lens_[have_ + copy], len);
504 have_ += copy;
505 copy = 0;
506 }
507 }
508 // handle error breaks in while
509 if(mode_ == BAD)
510 break;
511 // check for end-of-block code (better have one)
512 if(lens_[256] == 0)
513 return err(error::missing_eob);
514 /* build code tables -- note: do not change the lenbits or distbits
515 values here (9 and 6) without reading the comments in inftrees.hpp
516 concerning the kEnough constants, which depend on those values */
517 next_ = &codes_[0];
518 lencode_ = next_;
519 lenbits_ = 9;
520 inflate_table(build::lens, &lens_[0],
521 nlen_, &next_, &lenbits_, work_, ec);
522 if(ec)
523 {
524 mode_ = BAD;
525 return;
526 }
527 distcode_ = next_;
528 distbits_ = 6;
529 inflate_table(build::dists, lens_ + nlen_,
530 ndist_, &next_, &distbits_, work_, ec);
531 if(ec)
532 {
533 mode_ = BAD;
534 return;
535 }
536 mode_ = LEN_;
537 if(flush == Flush::trees)
538 return done();
539 BOOST_BEAST_FALLTHROUGH;
540 }
541
542 case LEN_:
543 mode_ = LEN;
544 BOOST_BEAST_FALLTHROUGH;
545
546 case LEN:
547 {
548 if(r.in.avail() >= 6 && r.out.avail() >= 258)
549 {
550 inflate_fast(r, ec);
551 if(ec)
552 {
553 mode_ = BAD;
554 return;
555 }
556 if(mode_ == TYPE)
557 back_ = -1;
558 break;
559 }
560 if(! bi_.fill(lenbits_, r.in.next, r.in.last))
561 return done();
562 std::uint16_t v;
563 back_ = 0;
564 bi_.peek(v, lenbits_);
565 auto cp = &lencode_[v];
566 if(cp->op && (cp->op & 0xf0) == 0)
567 {
568 auto prev = cp;
569 if(! bi_.fill(prev->bits + prev->op, r.in.next, r.in.last))
570 return done();
571 bi_.peek(v, prev->bits + prev->op);
572 cp = &lencode_[prev->val + (v >> prev->bits)];
573 bi_.drop(prev->bits + cp->bits);
574 back_ += prev->bits + cp->bits;
575 }
576 else
577 {
578 bi_.drop(cp->bits);
579 back_ += cp->bits;
580 }
581 length_ = cp->val;
582 if(cp->op == 0)
583 {
584 mode_ = LIT;
585 break;
586 }
587 if(cp->op & 32)
588 {
589 back_ = -1;
590 mode_ = TYPE;
591 break;
592 }
593 if(cp->op & 64)
594 return err(error::invalid_literal_length);
595 extra_ = cp->op & 15;
596 mode_ = LENEXT;
597 BOOST_BEAST_FALLTHROUGH;
598 }
599
600 case LENEXT:
601 if(extra_)
602 {
603 if(! bi_.fill(extra_, r.in.next, r.in.last))
604 return done();
605 std::uint16_t v;
606 bi_.read(v, extra_);
607 length_ += v;
608 back_ += extra_;
609 }
610 was_ = length_;
611 mode_ = DIST;
612 BOOST_BEAST_FALLTHROUGH;
613
614 case DIST:
615 {
616 if(! bi_.fill(distbits_, r.in.next, r.in.last))
617 return done();
618 std::uint16_t v;
619 bi_.peek(v, distbits_);
620 auto cp = &distcode_[v];
621 if((cp->op & 0xf0) == 0)
622 {
623 auto prev = cp;
624 if(! bi_.fill(prev->bits + prev->op, r.in.next, r.in.last))
625 return done();
626 bi_.peek(v, prev->bits + prev->op);
627 cp = &distcode_[prev->val + (v >> prev->bits)];
628 bi_.drop(prev->bits + cp->bits);
629 back_ += prev->bits + cp->bits;
630 }
631 else
632 {
633 bi_.drop(cp->bits);
634 back_ += cp->bits;
635 }
636 if(cp->op & 64)
637 return err(error::invalid_distance_code);
638 offset_ = cp->val;
639 extra_ = cp->op & 15;
640 mode_ = DISTEXT;
641 BOOST_BEAST_FALLTHROUGH;
642 }
643
644 case DISTEXT:
645 if(extra_)
646 {
647 std::uint16_t v;
648 if(! bi_.fill(extra_, r.in.next, r.in.last))
649 return done();
650 bi_.read(v, extra_);
651 offset_ += v;
652 back_ += extra_;
653 }
654 #ifdef INFLATE_STRICT
655 if(offset_ > dmax_)
656 return err(error::invalid_distance);
657 #endif
658 mode_ = MATCH;
659 BOOST_BEAST_FALLTHROUGH;
660
661 case MATCH:
662 {
663 if(! r.out.avail())
664 return done();
665 if(offset_ > r.out.used())
666 {
667 // copy from window
668 auto offset = static_cast<std::uint16_t>(
669 offset_ - r.out.used());
670 if(offset > w_.size())
671 return err(error::invalid_distance);
672 auto const n = clamp(clamp(
673 length_, offset), r.out.avail());
674 w_.read(r.out.next, offset, n);
675 r.out.next += n;
676 length_ -= n;
677 }
678 else
679 {
680 // copy from output
681 auto in = r.out.next - offset_;
682 auto n = clamp(length_, r.out.avail());
683 length_ -= n;
684 while(n--)
685 *r.out.next++ = *in++;
686 }
687 if(length_ == 0)
688 mode_ = LEN;
689 break;
690 }
691
692 case LIT:
693 {
694 if(! r.out.avail())
695 return done();
696 auto const v = static_cast<std::uint8_t>(length_);
697 *r.out.next++ = v;
698 mode_ = LEN;
699 break;
700 }
701
702 case CHECK:
703 mode_ = DONE;
704 BOOST_BEAST_FALLTHROUGH;
705
706 case DONE:
707 ec = error::end_of_stream;
708 return done();
709
710 case BAD:
711 return done();
712
713 case SYNC:
714 default:
715 BOOST_THROW_EXCEPTION(std::logic_error{
716 "stream error"});
717 }
718 }
719 }
720
721 //------------------------------------------------------------------------------
722
723 /* Build a set of tables to decode the provided canonical Huffman code.
724 The code lengths are lens[0..codes-1]. The result starts at *table,
725 whose indices are 0..2^bits-1. work is a writable array of at least
726 lens shorts, which is used as a work area. type is the type of code
727 to be generated, build::codes, build::lens, or build::dists. On
728 return, zero is success, -1 is an invalid code, and +1 means that
729 kEnough isn't enough. table on return points to the next available
730 entry's address. bits is the requested root table index bits, and
731 on return it is the actual root table index bits. It will differ if
732 the request is greater than the longest code or if it is less than
733 the shortest code.
734 */
735 template<class>
736 void
737 inflate_stream::
738 inflate_table(
739 build type,
740 std::uint16_t* lens,
741 std::size_t codes,
742 code** table,
743 unsigned *bits,
744 std::uint16_t* work,
745 error_code& ec)
746 {
747 unsigned len; // a code's length in bits
748 unsigned sym; // index of code symbols
749 unsigned min, max; // minimum and maximum code lengths
750 unsigned root; // number of index bits for root table
751 unsigned curr; // number of index bits for current table
752 unsigned drop; // code bits to drop for sub-table
753 int left; // number of prefix codes available
754 unsigned used; // code entries in table used
755 unsigned huff; // Huffman code
756 unsigned incr; // for incrementing code, index
757 unsigned fill; // index for replicating entries
758 unsigned low; // low bits for current root entry
759 unsigned mask; // mask for low root bits
760 code here; // table entry for duplication
761 code *next; // next available space in table
762 std::uint16_t const* base; // base value table to use
763 std::uint16_t const* extra; // extra bits table to use
764 int end; // use base and extra for symbol > end
765 std::uint16_t count[15+1]; // number of codes of each length
766 std::uint16_t offs[15+1]; // offsets in table for each length
767
768 // Length codes 257..285 base
769 static std::uint16_t constexpr lbase[31] = {
770 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
771 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0};
772
773 // Length codes 257..285 extra
774 static std::uint16_t constexpr lext[31] = {
775 16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18,
776 19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 72, 78};
777
778 // Distance codes 0..29 base
779 static std::uint16_t constexpr dbase[32] = {
780 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
781 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
782 8193, 12289, 16385, 24577, 0, 0};
783
784 // Distance codes 0..29 extra
785 static std::uint16_t constexpr dext[32] = {
786 16, 16, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22,
787 23, 23, 24, 24, 25, 25, 26, 26, 27, 27,
788 28, 28, 29, 29, 64, 64};
789
790 /*
791 Process a set of code lengths to create a canonical Huffman code. The
792 code lengths are lens[0..codes-1]. Each length corresponds to the
793 symbols 0..codes-1. The Huffman code is generated by first sorting the
794 symbols by length from short to long, and retaining the symbol order
795 for codes with equal lengths. Then the code starts with all zero bits
796 for the first code of the shortest length, and the codes are integer
797 increments for the same length, and zeros are appended as the length
798 increases. For the deflate format, these bits are stored backwards
799 from their more natural integer increment ordering, and so when the
800 decoding tables are built in the large loop below, the integer codes
801 are incremented backwards.
802
803 This routine assumes, but does not check, that all of the entries in
804 lens[] are in the range 0..15. The caller must assure this.
805 1..15 is interpreted as that code length. zero means that that
806 symbol does not occur in this code.
807
808 The codes are sorted by computing a count of codes for each length,
809 creating from that a table of starting indices for each length in the
810 sorted table, and then entering the symbols in order in the sorted
811 table. The sorted table is work[], with that space being provided by
812 the caller.
813
814 The length counts are used for other purposes as well, i.e. finding
815 the minimum and maximum length codes, determining if there are any
816 codes at all, checking for a valid set of lengths, and looking ahead
817 at length counts to determine sub-table sizes when building the
818 decoding tables.
819 */
820
821 /* accumulate lengths for codes (assumes lens[] all in 0..15) */
822 for (len = 0; len <= 15; len++)
823 count[len] = 0;
824 for (sym = 0; sym < codes; sym++)
825 count[lens[sym]]++;
826
827 /* bound code lengths, force root to be within code lengths */
828 root = *bits;
829 for (max = 15; max >= 1; max--)
830 if (count[max] != 0)
831 break;
832 if (root > max)
833 root = max;
834 if (max == 0)
835 { /* no symbols to code at all */
836 here.op = (std::uint8_t)64; /* invalid code marker */
837 here.bits = (std::uint8_t)1;
838 here.val = (std::uint16_t)0;
839 *(*table)++ = here; /* make a table to force an error */
840 *(*table)++ = here;
841 *bits = 1;
842 return; /* no symbols, but wait for decoding to report error */
843 }
844 for (min = 1; min < max; min++)
845 if (count[min] != 0)
846 break;
847 if (root < min)
848 root = min;
849
850 /* check for an over-subscribed or incomplete set of lengths */
851 left = 1;
852 for (len = 1; len <= 15; len++)
853 {
854 left <<= 1;
855 left -= count[len];
856 if (left < 0)
857 {
858 ec = error::over_subscribed_length;
859 return;
860 }
861 }
862 if (left > 0 && (type == build::codes || max != 1))
863 {
864 ec = error::incomplete_length_set;
865 return;
866 }
867
868 /* generate offsets into symbol table for each length for sorting */
869 offs[1] = 0;
870 for (len = 1; len < 15; len++)
871 offs[len + 1] = offs[len] + count[len];
872
873 /* sort symbols by length, by symbol order within each length */
874 for (sym = 0; sym < codes; sym++)
875 if (lens[sym] != 0)
876 work[offs[lens[sym]]++] = (std::uint16_t)sym;
877
878 /*
879 Create and fill in decoding tables. In this loop, the table being
880 filled is at next and has curr index bits. The code being used is huff
881 with length len. That code is converted to an index by dropping drop
882 bits off of the bottom. For codes where len is less than drop + curr,
883 those top drop + curr - len bits are incremented through all values to
884 fill the table with replicated entries.
885
886 root is the number of index bits for the root table. When len exceeds
887 root, sub-tables are created pointed to by the root entry with an index
888 of the low root bits of huff. This is saved in low to check for when a
889 new sub-table should be started. drop is zero when the root table is
890 being filled, and drop is root when sub-tables are being filled.
891
892 When a new sub-table is needed, it is necessary to look ahead in the
893 code lengths to determine what size sub-table is needed. The length
894 counts are used for this, and so count[] is decremented as codes are
895 entered in the tables.
896
897 used keeps track of how many table entries have been allocated from the
898 provided *table space. It is checked for build::lens and DIST tables against
899 the constants kEnoughLens and kEnoughDists to guard against changes in
900 the initial root table size constants. See the comments in inftrees.hpp
901 for more information.
902
903 sym increments through all symbols, and the loop terminates when
904 all codes of length max, i.e. all codes, have been processed. This
905 routine permits incomplete codes, so another loop after this one fills
906 in the rest of the decoding tables with invalid code markers.
907 */
908
909 /* set up for code type */
910 switch (type)
911 {
912 case build::codes:
913 base = extra = work; /* dummy value--not used */
914 end = 19;
915 break;
916 case build::lens:
917 base = lbase;
918 base -= 257;
919 extra = lext;
920 extra -= 257;
921 end = 256;
922 break;
923 default: /* build::dists */
924 base = dbase;
925 extra = dext;
926 end = -1;
927 }
928
929 /* initialize state for loop */
930 huff = 0; /* starting code */
931 sym = 0; /* starting code symbol */
932 len = min; /* starting code length */
933 next = *table; /* current table to fill in */
934 curr = root; /* current table index bits */
935 drop = 0; /* current bits to drop from code for index */
936 low = (unsigned)(-1); /* trigger new sub-table when len > root */
937 used = 1U << root; /* use root table entries */
938 mask = used - 1; /* mask for comparing low */
939
940 auto const not_enough = []
941 {
942 BOOST_THROW_EXCEPTION(std::logic_error{
943 "insufficient output size when inflating tables"});
944 };
945
946 // check available table space
947 if ((type == build::lens && used > kEnoughLens) ||
948 (type == build::dists && used > kEnoughDists))
949 return not_enough();
950
951 /* process all codes and make table entries */
952 for (;;)
953 {
954 /* create table entry */
955 here.bits = (std::uint8_t)(len - drop);
956 if ((int)(work[sym]) < end)
957 {
958 here.op = (std::uint8_t)0;
959 here.val = work[sym];
960 }
961 else if ((int)(work[sym]) > end)
962 {
963 here.op = (std::uint8_t)(extra[work[sym]]);
964 here.val = base[work[sym]];
965 }
966 else
967 {
968 here.op = (std::uint8_t)(32 + 64); /* end of block */
969 here.val = 0;
970 }
971
972 /* replicate for those indices with low len bits equal to huff */
973 incr = 1U << (len - drop);
974 fill = 1U << curr;
975 min = fill; /* save offset to next table */
976 do
977 {
978 fill -= incr;
979 next[(huff >> drop) + fill] = here;
980 } while (fill != 0);
981
982 /* backwards increment the len-bit code huff */
983 incr = 1U << (len - 1);
984 while (huff & incr)
985 incr >>= 1;
986 if (incr != 0)
987 {
988 huff &= incr - 1;
989 huff += incr;
990 }
991 else
992 huff = 0;
993
994 /* go to next symbol, update count, len */
995 sym++;
996 if (--(count[len]) == 0)
997 {
998 if (len == max) break;
999 len = lens[work[sym]];
1000 }
1001
1002 /* create new sub-table if needed */
1003 if (len > root && (huff & mask) != low)
1004 {
1005 /* if first time, transition to sub-tables */
1006 if (drop == 0)
1007 drop = root;
1008
1009 /* increment past last table */
1010 next += min; /* here min is 1 << curr */
1011
1012 /* determine length of next table */
1013 curr = len - drop;
1014 left = (int)(1 << curr);
1015 while (curr + drop < max)
1016 {
1017 left -= count[curr + drop];
1018 if (left <= 0) break;
1019 curr++;
1020 left <<= 1;
1021 }
1022
1023 /* check for enough space */
1024 used += 1U << curr;
1025 if ((type == build::lens && used > kEnoughLens) ||
1026 (type == build::dists && used > kEnoughDists))
1027 return not_enough();
1028
1029 /* point entry in root table to sub-table */
1030 low = huff & mask;
1031 (*table)[low].op = (std::uint8_t)curr;
1032 (*table)[low].bits = (std::uint8_t)root;
1033 (*table)[low].val = (std::uint16_t)(next - *table);
1034 }
1035 }
1036
1037 /* fill in remaining table entry if code is incomplete (guaranteed to have
1038 at most one remaining entry, since if the code is incomplete, the
1039 maximum code length that was allowed to get this far is one bit) */
1040 if (huff != 0)
1041 {
1042 here.op = 64; // invalid code marker
1043 here.bits = (std::uint8_t)(len - drop);
1044 here.val = 0;
1045 next[huff] = here;
1046 }
1047
1048 *table += used;
1049 *bits = root;
1050 }
1051
1052 template<class>
1053 auto
1054 inflate_stream::
1055 get_fixed_tables() ->
1056 codes const&
1057 {
1058 struct fixed_codes : codes
1059 {
1060 code len_[512];
1061 code dist_[32];
1062
1063 fixed_codes()
1064 {
1065 lencode = len_;
1066 lenbits = 9;
1067 distcode = dist_;
1068 distbits = 5;
1069
1070 std::uint16_t lens[320];
1071 std::uint16_t work[288];
1072
1073 std::fill(&lens[ 0], &lens[144], std::uint16_t{8});
1074 std::fill(&lens[144], &lens[256], std::uint16_t{9});
1075 std::fill(&lens[256], &lens[280], std::uint16_t{7});
1076 std::fill(&lens[280], &lens[288], std::uint16_t{8});
1077
1078 {
1079 error_code ec;
1080 auto next = &len_[0];
1081 inflate_table(build::lens,
1082 lens, 288, &next, &lenbits, work, ec);
1083 if(ec)
1084 BOOST_THROW_EXCEPTION(std::logic_error{ec.message()});
1085 }
1086
1087 // VFALCO These fixups are from ZLib
1088 len_[ 99].op = 64;
1089 len_[227].op = 64;
1090 len_[355].op = 64;
1091 len_[483].op = 64;
1092
1093 {
1094 error_code ec;
1095 auto next = &dist_[0];
1096 std::fill(&lens[0], &lens[32], std::uint16_t{5});
1097 inflate_table(build::dists,
1098 lens, 32, &next, &distbits, work, ec);
1099 if(ec)
1100 BOOST_THROW_EXCEPTION(std::logic_error{ec.message()});
1101 }
1102 }
1103 };
1104
1105 static fixed_codes const fc;
1106 return fc;
1107 }
1108
1109 template<class>
1110 void
1111 inflate_stream::
1112 fixedTables()
1113 {
1114 auto const fc = get_fixed_tables();
1115 lencode_ = fc.lencode;
1116 lenbits_ = fc.lenbits;
1117 distcode_ = fc.distcode;
1118 distbits_ = fc.distbits;
1119 }
1120
1121 /*
1122 Decode literal, length, and distance codes and write out the resulting
1123 literal and match bytes until either not enough input or output is
1124 available, an end-of-block is encountered, or a data error is encountered.
1125 When large enough input and output buffers are supplied to inflate(), for
1126 example, a 16K input buffer and a 64K output buffer, more than 95% of the
1127 inflate execution time is spent in this routine.
1128
1129 Entry assumptions:
1130
1131 state->mode_ == LEN
1132 zs.avail_in >= 6
1133 zs.avail_out >= 258
1134 start >= zs.avail_out
1135 state->bits_ < 8
1136
1137 On return, state->mode_ is one of:
1138
1139 LEN -- ran out of enough output space or enough available input
1140 TYPE -- reached end of block code, inflate() to interpret next block
1141 BAD -- error in block data
1142
1143 Notes:
1144
1145 - The maximum input bits used by a length/distance pair is 15 bits for the
1146 length code, 5 bits for the length extra, 15 bits for the distance code,
1147 and 13 bits for the distance extra. This totals 48 bits, or six bytes.
1148 Therefore if zs.avail_in >= 6, then there is enough input to avoid
1149 checking for available input while decoding.
1150
1151 - The maximum bytes that a single length/distance pair can output is 258
1152 bytes, which is the maximum length that can be coded. inflate_fast()
1153 requires zs.avail_out >= 258 for each loop to avoid checking for
1154 output space.
1155
1156 inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe):
1157 - Using bit fields for code structure
1158 - Different op definition to avoid & for extra bits (do & for table bits)
1159 - Three separate decoding do-loops for direct, window, and wnext == 0
1160 - Special case for distance > 1 copies to do overlapped load and store copy
1161 - Explicit branch predictions (based on measured branch probabilities)
1162 - Deferring match copy and interspersed it with decoding subsequent codes
1163 - Swapping literal/length else
1164 - Swapping window/direct else
1165 - Larger unrolled copy loops (three is about right)
1166 - Moving len -= 3 statement into middle of loop
1167 */
1168 template<class>
1169 void
1170 inflate_stream::
1171 inflate_fast(ranges& r, error_code& ec)
1172 {
1173 unsigned char const* last; // have enough input while in < last
1174 unsigned char *end; // while out < end, enough space available
1175 std::size_t op; // code bits, operation, extra bits, or window position, window bytes to copy
1176 unsigned len; // match length, unused bytes
1177 unsigned dist; // match distance
1178 unsigned const lmask =
1179 (1U << lenbits_) - 1; // mask for first level of length codes
1180 unsigned const dmask =
1181 (1U << distbits_) - 1; // mask for first level of distance codes
1182
1183 last = r.in.next + (r.in.avail() - 5);
1184 end = r.out.next + (r.out.avail() - 257);
1185
1186 /* decode literals and length/distances until end-of-block or not enough
1187 input data or output space */
1188 do
1189 {
1190 if(bi_.size() < 15)
1191 bi_.fill_16(r.in.next);
1192 auto cp = &lencode_[bi_.peek_fast() & lmask];
1193 dolen:
1194 bi_.drop(cp->bits);
1195 op = (unsigned)(cp->op);
1196 if(op == 0)
1197 {
1198 // literal
1199 *r.out.next++ = (unsigned char)(cp->val);
1200 }
1201 else if(op & 16)
1202 {
1203 // length base
1204 len = (unsigned)(cp->val);
1205 op &= 15; // number of extra bits
1206 if(op)
1207 {
1208 if(bi_.size() < op)
1209 bi_.fill_8(r.in.next);
1210 len += (unsigned)bi_.peek_fast() & ((1U << op) - 1);
1211 bi_.drop(op);
1212 }
1213 if(bi_.size() < 15)
1214 bi_.fill_16(r.in.next);
1215 cp = &distcode_[bi_.peek_fast() & dmask];
1216 dodist:
1217 bi_.drop(cp->bits);
1218 op = (unsigned)(cp->op);
1219 if(op & 16)
1220 {
1221 // distance base
1222 dist = (unsigned)(cp->val);
1223 op &= 15; // number of extra bits
1224 if(bi_.size() < op)
1225 {
1226 bi_.fill_8(r.in.next);
1227 if(bi_.size() < op)
1228 bi_.fill_8(r.in.next);
1229 }
1230 dist += (unsigned)bi_.peek_fast() & ((1U << op) - 1);
1231 #ifdef INFLATE_STRICT
1232 if(dist > dmax_)
1233 {
1234 ec = error::invalid_distance;
1235 mode_ = BAD;
1236 break;
1237 }
1238 #endif
1239 bi_.drop(op);
1240
1241 op = r.out.used();
1242 if(dist > op)
1243 {
1244 // copy from window
1245 op = dist - op; // distance back in window
1246 if(op > w_.size())
1247 {
1248 ec = error::invalid_distance;
1249 mode_ = BAD;
1250 break;
1251 }
1252 auto const n = clamp(len, op);
1253 w_.read(r.out.next, op, n);
1254 r.out.next += n;
1255 len -= n;
1256 }
1257 if(len > 0)
1258 {
1259 // copy from output
1260 auto in = r.out.next - dist;
1261 auto n = clamp(len, r.out.avail());
1262 len -= n;
1263 while(n--)
1264 *r.out.next++ = *in++;
1265 }
1266 }
1267 else if((op & 64) == 0)
1268 {
1269 // 2nd level distance code
1270 cp = &distcode_[cp->val + (bi_.peek_fast() & ((1U << op) - 1))];
1271 goto dodist;
1272 }
1273 else
1274 {
1275 ec = error::invalid_distance_code;
1276 mode_ = BAD;
1277 break;
1278 }
1279 }
1280 else if((op & 64) == 0)
1281 {
1282 // 2nd level length code
1283 cp = &lencode_[cp->val + (bi_.peek_fast() & ((1U << op) - 1))];
1284 goto dolen;
1285 }
1286 else if(op & 32)
1287 {
1288 // end-of-block
1289 mode_ = TYPE;
1290 break;
1291 }
1292 else
1293 {
1294 ec = error::invalid_literal_length;
1295 mode_ = BAD;
1296 break;
1297 }
1298 }
1299 while(r.in.next < last && r.out.next < end);
1300
1301 // return unused bytes (on entry, bits < 8, so in won't go too far back)
1302 bi_.rewind(r.in.next);
1303 }
1304
1305 } // detail
1306 } // zlib
1307 } // beast
1308 } // boost
1309
1310 #endif