3 use std
::io
::prelude
::*;
6 use super::{GzBuilder, GzHeader}
;
7 use super::{FCOMMENT, FEXTRA, FHCRC, FNAME}
;
8 use crate::crc
::{Crc, CrcReader}
;
10 use crate::Compression
;
12 fn copy(into
: &mut [u8], from
: &[u8], pos
: &mut usize) -> usize {
13 let min
= cmp
::min(into
.len(), from
.len() - *pos
);
14 for (slot
, val
) in into
.iter_mut().zip(from
[*pos
..*pos
+ min
].iter()) {
21 pub(crate) fn corrupt() -> io
::Error
{
23 io
::ErrorKind
::InvalidInput
,
24 "corrupt gzip stream does not have a matching checksum",
28 fn bad_header() -> io
::Error
{
29 io
::Error
::new(io
::ErrorKind
::InvalidInput
, "invalid gzip header")
32 fn read_le_u16
<R
: Read
>(r
: &mut Buffer
<R
>) -> io
::Result
<u16> {
34 r
.read_and_forget(&mut b
)?
;
35 Ok((b
[0] as u16) | ((b
[1] as u16) << 8))
38 fn read_gz_header_part
<'a
, R
: Read
>(r
: &'a
mut Buffer
<'a
, R
>) -> io
::Result
<()> {
41 GzHeaderParsingState
::Start
=> {
42 let mut header
= [0; 10];
43 r
.read_and_forget(&mut header
)?
;
45 if header
[0] != 0x1f || header
[1] != 0x8b {
46 return Err(bad_header());
49 return Err(bad_header());
52 r
.part
.flg
= header
[3];
53 r
.part
.header
.mtime
= ((header
[4] as u32) << 0)
54 | ((header
[5] as u32) << 8)
55 | ((header
[6] as u32) << 16)
56 | ((header
[7] as u32) << 24);
58 r
.part
.header
.operating_system
= header
[9];
59 r
.part
.state
= GzHeaderParsingState
::Xlen
;
61 GzHeaderParsingState
::Xlen
=> {
62 if r
.part
.flg
& FEXTRA
!= 0 {
63 r
.part
.xlen
= read_le_u16(r
)?
;
65 r
.part
.state
= GzHeaderParsingState
::Extra
;
67 GzHeaderParsingState
::Extra
=> {
68 if r
.part
.flg
& FEXTRA
!= 0 {
69 let mut extra
= vec
![0; r
.part
.xlen
as usize];
70 r
.read_and_forget(&mut extra
)?
;
71 r
.part
.header
.extra
= Some(extra
);
73 r
.part
.state
= GzHeaderParsingState
::Filename
;
75 GzHeaderParsingState
::Filename
=> {
76 if r
.part
.flg
& FNAME
!= 0 {
77 if None
== r
.part
.header
.filename
{
78 r
.part
.header
.filename
= Some(Vec
::new());
80 for byte
in r
.bytes() {
87 r
.part
.state
= GzHeaderParsingState
::Comment
;
89 GzHeaderParsingState
::Comment
=> {
90 if r
.part
.flg
& FCOMMENT
!= 0 {
91 if None
== r
.part
.header
.comment
{
92 r
.part
.header
.comment
= Some(Vec
::new());
94 for byte
in r
.bytes() {
101 r
.part
.state
= GzHeaderParsingState
::Crc
;
103 GzHeaderParsingState
::Crc
=> {
104 if r
.part
.flg
& FHCRC
!= 0 {
105 let stored_crc
= read_le_u16(r
)?
;
106 let calced_crc
= r
.part
.crc
.sum() as u16;
107 if stored_crc
!= calced_crc
{
108 return Err(corrupt());
117 pub(crate) fn read_gz_header
<R
: Read
>(r
: &mut R
) -> io
::Result
<GzHeader
> {
118 let mut part
= GzHeaderPartial
::new();
121 let mut reader
= Buffer
::new(&mut part
, r
);
122 read_gz_header_part(&mut reader
)
124 result
.map(|()| part
.take_header())
127 /// A gzip streaming encoder
129 /// This structure exposes a [`BufRead`] interface that will read uncompressed data
130 /// from the underlying reader and expose the compressed version as a [`BufRead`]
133 /// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html
138 /// use std::io::prelude::*;
140 /// use flate2::Compression;
141 /// use flate2::bufread::GzEncoder;
142 /// use std::fs::File;
143 /// use std::io::BufReader;
145 /// // Opens sample file, compresses the contents and returns a Vector or error
146 /// // File wrapped in a BufReader implements BufRead
148 /// fn open_hello_world() -> io::Result<Vec<u8>> {
149 /// let f = File::open("examples/hello_world.txt")?;
150 /// let b = BufReader::new(f);
151 /// let mut gz = GzEncoder::new(b, Compression::fast());
152 /// let mut buffer = Vec::new();
153 /// gz.read_to_end(&mut buffer)?;
158 pub struct GzEncoder
<R
> {
159 inner
: deflate
::bufread
::DeflateEncoder
<CrcReader
<R
>>,
165 pub fn gz_encoder
<R
: BufRead
>(header
: Vec
<u8>, r
: R
, lvl
: Compression
) -> GzEncoder
<R
> {
166 let crc
= CrcReader
::new(r
);
168 inner
: deflate
::bufread
::DeflateEncoder
::new(crc
, lvl
),
175 impl<R
: BufRead
> GzEncoder
<R
> {
176 /// Creates a new encoder which will use the given compression level.
178 /// The encoder is not configured specially for the emitted header. For
179 /// header configuration, see the `GzBuilder` type.
181 /// The data read from the stream `r` will be compressed and available
182 /// through the returned reader.
183 pub fn new(r
: R
, level
: Compression
) -> GzEncoder
<R
> {
184 GzBuilder
::new().buf_read(r
, level
)
187 fn read_footer(&mut self, into
: &mut [u8]) -> io
::Result
<usize> {
191 let crc
= self.inner
.get_ref().crc();
193 (crc
.sum() >> 0) as u8,
194 (crc
.sum() >> 8) as u8,
195 (crc
.sum() >> 16) as u8,
196 (crc
.sum() >> 24) as u8,
197 (crc
.amount() >> 0) as u8,
198 (crc
.amount() >> 8) as u8,
199 (crc
.amount() >> 16) as u8,
200 (crc
.amount() >> 24) as u8,
202 Ok(copy(into
, arr
, &mut self.pos
))
206 impl<R
> GzEncoder
<R
> {
207 /// Acquires a reference to the underlying reader.
208 pub fn get_ref(&self) -> &R
{
209 self.inner
.get_ref().get_ref()
212 /// Acquires a mutable reference to the underlying reader.
214 /// Note that mutation of the reader may result in surprising results if
215 /// this encoder is continued to be used.
216 pub fn get_mut(&mut self) -> &mut R
{
217 self.inner
.get_mut().get_mut()
220 /// Returns the underlying stream, consuming this encoder
221 pub fn into_inner(self) -> R
{
222 self.inner
.into_inner().into_inner()
227 fn finish(buf
: &[u8; 8]) -> (u32, u32) {
228 let crc
= ((buf
[0] as u32) << 0)
229 | ((buf
[1] as u32) << 8)
230 | ((buf
[2] as u32) << 16)
231 | ((buf
[3] as u32) << 24);
232 let amt
= ((buf
[4] as u32) << 0)
233 | ((buf
[5] as u32) << 8)
234 | ((buf
[6] as u32) << 16)
235 | ((buf
[7] as u32) << 24);
239 impl<R
: BufRead
> Read
for GzEncoder
<R
> {
240 fn read(&mut self, mut into
: &mut [u8]) -> io
::Result
<usize> {
243 return self.read_footer(into
);
244 } else if self.pos
< self.header
.len() {
245 amt
+= copy(into
, &self.header
, &mut self.pos
);
246 if amt
== into
.len() {
250 into
= &mut tmp
[amt
..];
252 match self.inner
.read(into
)?
{
256 self.read_footer(into
)
263 impl<R
: BufRead
+ Write
> Write
for GzEncoder
<R
> {
264 fn write(&mut self, buf
: &[u8]) -> io
::Result
<usize> {
265 self.get_mut().write(buf
)
268 fn flush(&mut self) -> io
::Result
<()> {
269 self.get_mut().flush()
273 /// A gzip streaming decoder
275 /// This structure consumes a [`BufRead`] interface, reading compressed data
276 /// from the underlying reader, and emitting uncompressed data.
278 /// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html
283 /// use std::io::prelude::*;
285 /// # use flate2::Compression;
286 /// # use flate2::write::GzEncoder;
287 /// use flate2::bufread::GzDecoder;
290 /// # let mut e = GzEncoder::new(Vec::new(), Compression::default());
291 /// # e.write_all(b"Hello World").unwrap();
292 /// # let bytes = e.finish().unwrap();
293 /// # println!("{}", decode_reader(bytes).unwrap());
296 /// // Uncompresses a Gz Encoded vector of bytes and returns a string or error
297 /// // Here &[u8] implements BufRead
299 /// fn decode_reader(bytes: Vec<u8>) -> io::Result<String> {
300 /// let mut gz = GzDecoder::new(&bytes[..]);
301 /// let mut s = String::new();
302 /// gz.read_to_string(&mut s)?;
307 pub struct GzDecoder
<R
> {
309 header
: Option
<GzHeader
>,
310 reader
: CrcReader
<deflate
::bufread
::DeflateDecoder
<R
>>,
315 pub enum GzHeaderParsingState
{
325 pub struct GzHeaderPartial
{
327 state
: GzHeaderParsingState
,
334 impl GzHeaderPartial
{
335 fn new() -> GzHeaderPartial
{
337 buf
: Vec
::with_capacity(10), // minimum header length
338 state
: GzHeaderParsingState
::Start
,
352 pub fn take_header(self) -> GzHeader
{
359 Header(GzHeaderPartial
),
361 Finished(usize, [u8; 8]),
366 /// A small adapter which reads data originally from `buf` and then reads all
367 /// further data from `reader`. This will also buffer all data read from
368 /// `reader` into `buf` for reuse on a further call.
369 struct Buffer
<'a
, T
: 'a
> {
370 part
: &'a
mut GzHeaderPartial
,
376 impl<'a
, T
> Buffer
<'a
, T
> {
377 fn new(part
: &'a
mut GzHeaderPartial
, reader
: &'a
mut T
) -> Buffer
<'a
, T
> {
381 buf_max
: part
.buf
.len(),
387 impl<'a
, T
: Read
> Read
for Buffer
<'a
, T
> {
388 fn read(&mut self, buf
: &mut [u8]) -> io
::Result
<usize> {
389 let mut bufref
= match self.part
.state
{
390 GzHeaderParsingState
::Filename
=> self.part
.header
.filename
.as_mut(),
391 GzHeaderParsingState
::Comment
=> self.part
.header
.comment
.as_mut(),
394 if let Some(ref mut b
) = bufref
{
395 // we have a direct reference to a buffer where to write
396 let len
= self.reader
.read(buf
)?
;
397 if len
> 0 && buf
[len
- 1] == 0 {
398 // we do not append the final 0
399 b
.extend_from_slice(&buf
[..len
- 1]);
401 b
.extend_from_slice(&buf
[..len
]);
403 self.part
.crc
.update(&buf
[..len
]);
405 } else if self.buf_cur
== self.buf_max
{
406 // we read new bytes and also save them in self.part.buf
407 let len
= self.reader
.read(buf
)?
;
408 self.part
.buf
.extend_from_slice(&buf
[..len
]);
409 self.part
.crc
.update(&buf
[..len
]);
412 // we first read the previously saved bytes
413 let len
= (&self.part
.buf
[self.buf_cur
..self.buf_max
]).read(buf
)?
;
420 impl<'a
, T
> Buffer
<'a
, T
>
424 // If we manage to read all the bytes, we reset the buffer
425 fn read_and_forget(&mut self, buf
: &mut [u8]) -> io
::Result
<usize> {
426 self.read_exact(buf
)?
;
427 // we managed to read the whole buf
428 // we will no longer need the previously saved bytes in self.part.buf
429 let rlen
= buf
.len();
430 self.part
.buf
.truncate(0);
437 impl<R
: BufRead
> GzDecoder
<R
> {
438 /// Creates a new decoder from the given reader, immediately parsing the
440 pub fn new(mut r
: R
) -> GzDecoder
<R
> {
441 let mut part
= GzHeaderPartial
::new();
442 let mut header
= None
;
445 let mut reader
= Buffer
::new(&mut part
, &mut r
);
446 read_gz_header_part(&mut reader
)
449 let state
= match result
{
451 header
= Some(part
.take_header());
454 Err(ref err
) if io
::ErrorKind
::WouldBlock
== err
.kind() => GzState
::Header(part
),
455 Err(err
) => GzState
::Err(err
),
460 reader
: CrcReader
::new(deflate
::bufread
::DeflateDecoder
::new(r
)),
466 fn multi(mut self, flag
: bool
) -> GzDecoder
<R
> {
472 impl<R
> GzDecoder
<R
> {
473 /// Returns the header associated with this stream, if it was valid
474 pub fn header(&self) -> Option
<&GzHeader
> {
478 /// Acquires a reference to the underlying reader.
479 pub fn get_ref(&self) -> &R
{
480 self.reader
.get_ref().get_ref()
483 /// Acquires a mutable reference to the underlying stream.
485 /// Note that mutation of the stream may result in surprising results if
486 /// this encoder is continued to be used.
487 pub fn get_mut(&mut self) -> &mut R
{
488 self.reader
.get_mut().get_mut()
491 /// Consumes this decoder, returning the underlying reader.
492 pub fn into_inner(self) -> R
{
493 self.reader
.into_inner().into_inner()
497 impl<R
: BufRead
> Read
for GzDecoder
<R
> {
498 fn read(&mut self, into
: &mut [u8]) -> io
::Result
<usize> {
507 *inner
= match mem
::replace(inner
, GzState
::End
) {
508 GzState
::Header(mut part
) => {
510 let mut reader
= Buffer
::new(&mut part
, reader
.get_mut().get_mut());
511 read_gz_header_part(&mut reader
)
515 *header
= Some(part
.take_header());
518 Err(err
) if io
::ErrorKind
::WouldBlock
== err
.kind() => {
519 *inner
= GzState
::Header(part
);
522 Err(err
) => return Err(err
),
527 *inner
= GzState
::Body
;
531 let n
= reader
.read(into
).map_err(|err
| {
532 if io
::ErrorKind
::WouldBlock
== err
.kind() {
533 *inner
= GzState
::Body
;
540 0 => GzState
::Finished(0, [0; 8]),
542 *inner
= GzState
::Body
;
547 GzState
::Finished(pos
, mut buf
) => {
552 .read(&mut buf
[pos
..])
555 Err(io
::ErrorKind
::UnexpectedEof
.into())
561 if io
::ErrorKind
::WouldBlock
== err
.kind() {
562 *inner
= GzState
::Finished(pos
, buf
);
568 GzState
::Finished(pos
+ n
, buf
)
570 let (crc
, amt
) = finish(&buf
);
572 if crc
!= reader
.crc().sum() || amt
!= reader
.crc().amount() {
573 return Err(corrupt());
579 .map(|buf
| buf
.is_empty())
581 if io
::ErrorKind
::WouldBlock
== err
.kind() {
582 *inner
= GzState
::Finished(pos
, buf
);
592 reader
.get_mut().reset_data();
594 GzState
::Header(GzHeaderPartial
::new())
601 GzState
::Err(err
) => return Err(err
),
602 GzState
::End
=> return Ok(0),
608 impl<R
: BufRead
+ Write
> Write
for GzDecoder
<R
> {
609 fn write(&mut self, buf
: &[u8]) -> io
::Result
<usize> {
610 self.get_mut().write(buf
)
613 fn flush(&mut self) -> io
::Result
<()> {
614 self.get_mut().flush()
618 /// A gzip streaming decoder that decodes all members of a multistream
620 /// A gzip member consists of a header, compressed data and a trailer. The [gzip
621 /// specification](https://tools.ietf.org/html/rfc1952), however, allows multiple
622 /// gzip members to be joined in a single stream. `MultiGzDecoder` will
623 /// decode all consecutive members while `GzDecoder` will only decompress
624 /// the first gzip member. The multistream format is commonly used in
625 /// bioinformatics, for example when using the BGZF compressed data.
627 /// This structure exposes a [`BufRead`] interface that will consume all gzip members
628 /// from the underlying reader and emit uncompressed data.
630 /// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html
635 /// use std::io::prelude::*;
637 /// # use flate2::Compression;
638 /// # use flate2::write::GzEncoder;
639 /// use flate2::bufread::MultiGzDecoder;
642 /// # let mut e = GzEncoder::new(Vec::new(), Compression::default());
643 /// # e.write_all(b"Hello World").unwrap();
644 /// # let bytes = e.finish().unwrap();
645 /// # println!("{}", decode_reader(bytes).unwrap());
648 /// // Uncompresses a Gz Encoded vector of bytes and returns a string or error
649 /// // Here &[u8] implements BufRead
651 /// fn decode_reader(bytes: Vec<u8>) -> io::Result<String> {
652 /// let mut gz = MultiGzDecoder::new(&bytes[..]);
653 /// let mut s = String::new();
654 /// gz.read_to_string(&mut s)?;
659 pub struct MultiGzDecoder
<R
>(GzDecoder
<R
>);
661 impl<R
: BufRead
> MultiGzDecoder
<R
> {
662 /// Creates a new decoder from the given reader, immediately parsing the
663 /// (first) gzip header. If the gzip stream contains multiple members all will
665 pub fn new(r
: R
) -> MultiGzDecoder
<R
> {
666 MultiGzDecoder(GzDecoder
::new(r
).multi(true))
670 impl<R
> MultiGzDecoder
<R
> {
671 /// Returns the current header associated with this stream, if it's valid
672 pub fn header(&self) -> Option
<&GzHeader
> {
676 /// Acquires a reference to the underlying reader.
677 pub fn get_ref(&self) -> &R
{
681 /// Acquires a mutable reference to the underlying stream.
683 /// Note that mutation of the stream may result in surprising results if
684 /// this encoder is continued to be used.
685 pub fn get_mut(&mut self) -> &mut R
{
689 /// Consumes this decoder, returning the underlying reader.
690 pub fn into_inner(self) -> R
{
695 impl<R
: BufRead
> Read
for MultiGzDecoder
<R
> {
696 fn read(&mut self, into
: &mut [u8]) -> io
::Result
<usize> {
703 use crate::gz
::bufread
::*;
705 use std
::io
::{Cursor, Read, Write}
;
707 //a cursor turning EOF into blocking errors
709 pub struct BlockingCursor
{
710 pub cursor
: Cursor
<Vec
<u8>>,
713 impl BlockingCursor
{
714 pub fn new() -> BlockingCursor
{
716 cursor
: Cursor
::new(Vec
::new()),
720 pub fn set_position(&mut self, pos
: u64) {
721 return self.cursor
.set_position(pos
);
724 pub fn position(&mut self) -> u64 {
725 return self.cursor
.position();
729 impl Write
for BlockingCursor
{
730 fn write(&mut self, buf
: &[u8]) -> io
::Result
<usize> {
731 return self.cursor
.write(buf
);
733 fn flush(&mut self) -> io
::Result
<()> {
734 return self.cursor
.flush();
738 impl Read
for BlockingCursor
{
739 fn read(&mut self, buf
: &mut [u8]) -> io
::Result
<usize> {
740 //use the cursor, except it turns eof into blocking error
741 let r
= self.cursor
.read(buf
);
744 if err
.kind() == io
::ErrorKind
::UnexpectedEof
{
745 return Err(io
::ErrorKind
::WouldBlock
.into());
749 //regular EOF turned into blocking error
750 return Err(io
::ErrorKind
::WouldBlock
.into());
758 // test function read_and_forget of Buffer
759 fn buffer_read_and_forget() {
760 // this is unused except for the buffering
761 let mut part
= GzHeaderPartial
::new();
762 // this is a reader which receives data afterwards
763 let mut r
= BlockingCursor
::new();
764 let data
= vec
![1, 2, 3];
765 let mut out
= Vec
::with_capacity(7);
767 match r
.write_all(&data
) {
770 panic
!("Unexpected result for write_all");
775 // First read : successful for one byte
776 let mut reader
= Buffer
::new(&mut part
, &mut r
);
778 match reader
.read_and_forget(&mut out
) {
781 panic
!("Unexpected result for read_and_forget with data");
785 // Second read : incomplete for 7 bytes (we have only 2)
787 match reader
.read_and_forget(&mut out
) {
789 assert_eq
!(io
::ErrorKind
::WouldBlock
, err
.kind());
792 panic
!("Unexpected result for read_and_forget with incomplete");
796 // 3 more data bytes have arrived
797 let pos
= r
.position();
798 let data2
= vec
![4, 5, 6];
799 match r
.write_all(&data2
) {
802 panic
!("Unexpected result for write_all");
807 // Third read : still incomplete for 7 bytes (we have 5)
808 let mut reader2
= Buffer
::new(&mut part
, &mut r
);
809 match reader2
.read_and_forget(&mut out
) {
811 assert_eq
!(io
::ErrorKind
::WouldBlock
, err
.kind());
814 panic
!("Unexpected result for read_and_forget with more incomplete");
818 // 3 more data bytes have arrived again
819 let pos2
= r
.position();
820 let data3
= vec
![7, 8, 9];
821 match r
.write_all(&data3
) {
824 panic
!("Unexpected result for write_all");
827 r
.set_position(pos2
);
829 // Fourth read : now successful for 7 bytes
830 let mut reader3
= Buffer
::new(&mut part
, &mut r
);
831 match reader3
.read_and_forget(&mut out
) {
833 assert_eq
!(out
[0], 2);
834 assert_eq
!(out
[6], 8);
837 panic
!("Unexpected result for read_and_forget with data");
841 // Fifth read : successful for one more byte
843 match reader3
.read_and_forget(&mut out
) {
845 assert_eq
!(out
[0], 9);
848 panic
!("Unexpected result for read_and_forget with data");