]>
Commit | Line | Data |
---|---|---|
ea8adc8c | 1 | use std::cmp; |
ea8adc8c | 2 | use std::io; |
b7449926 | 3 | use std::io::prelude::*; |
ea8adc8c XL |
4 | use std::mem; |
5 | ||
ff7c6d11 | 6 | use super::{GzBuilder, GzHeader}; |
ea8adc8c | 7 | use super::{FCOMMENT, FEXTRA, FHCRC, FNAME}; |
c295e0f8 | 8 | use crate::crc::{Crc, CrcReader}; |
60c5eb7d XL |
9 | use crate::deflate; |
10 | use crate::Compression; | |
ea8adc8c XL |
11 | |
12 | fn copy(into: &mut [u8], from: &[u8], pos: &mut usize) -> usize { | |
13 | let min = cmp::min(into.len(), from.len() - *pos); | |
14 | for (slot, val) in into.iter_mut().zip(from[*pos..*pos + min].iter()) { | |
15 | *slot = *val; | |
16 | } | |
17 | *pos += min; | |
04454e1e | 18 | min |
ea8adc8c | 19 | } |
b7449926 XL |
20 | |
21 | pub(crate) fn corrupt() -> io::Error { | |
ea8adc8c XL |
22 | io::Error::new( |
23 | io::ErrorKind::InvalidInput, | |
24 | "corrupt gzip stream does not have a matching checksum", | |
25 | ) | |
26 | } | |
27 | ||
28 | fn bad_header() -> io::Error { | |
29 | io::Error::new(io::ErrorKind::InvalidInput, "invalid gzip header") | |
30 | } | |
31 | ||
c295e0f8 | 32 | fn read_le_u16<R: Read>(r: &mut Buffer<R>) -> io::Result<u16> { |
ea8adc8c | 33 | let mut b = [0; 2]; |
c295e0f8 | 34 | r.read_and_forget(&mut b)?; |
ea8adc8c XL |
35 | Ok((b[0] as u16) | ((b[1] as u16) << 8)) |
36 | } | |
37 | ||
c295e0f8 XL |
38 | fn read_gz_header_part<'a, R: Read>(r: &'a mut Buffer<'a, R>) -> io::Result<()> { |
39 | loop { | |
40 | match r.part.state { | |
41 | GzHeaderParsingState::Start => { | |
42 | let mut header = [0; 10]; | |
43 | r.read_and_forget(&mut header)?; | |
44 | ||
45 | if header[0] != 0x1f || header[1] != 0x8b { | |
46 | return Err(bad_header()); | |
47 | } | |
48 | if header[2] != 8 { | |
49 | return Err(bad_header()); | |
50 | } | |
51 | ||
52 | r.part.flg = header[3]; | |
53 | r.part.header.mtime = ((header[4] as u32) << 0) | |
54 | | ((header[5] as u32) << 8) | |
55 | | ((header[6] as u32) << 16) | |
56 | | ((header[7] as u32) << 24); | |
57 | let _xfl = header[8]; | |
58 | r.part.header.operating_system = header[9]; | |
59 | r.part.state = GzHeaderParsingState::Xlen; | |
ea8adc8c | 60 | } |
c295e0f8 XL |
61 | GzHeaderParsingState::Xlen => { |
62 | if r.part.flg & FEXTRA != 0 { | |
63 | r.part.xlen = read_le_u16(r)?; | |
64 | } | |
65 | r.part.state = GzHeaderParsingState::Extra; | |
66 | } | |
67 | GzHeaderParsingState::Extra => { | |
68 | if r.part.flg & FEXTRA != 0 { | |
69 | let mut extra = vec![0; r.part.xlen as usize]; | |
70 | r.read_and_forget(&mut extra)?; | |
71 | r.part.header.extra = Some(extra); | |
72 | } | |
73 | r.part.state = GzHeaderParsingState::Filename; | |
74 | } | |
75 | GzHeaderParsingState::Filename => { | |
76 | if r.part.flg & FNAME != 0 { | |
77 | if None == r.part.header.filename { | |
78 | r.part.header.filename = Some(Vec::new()); | |
79 | }; | |
80 | for byte in r.bytes() { | |
81 | let byte = byte?; | |
82 | if byte == 0 { | |
83 | break; | |
84 | } | |
85 | } | |
86 | } | |
87 | r.part.state = GzHeaderParsingState::Comment; | |
88 | } | |
89 | GzHeaderParsingState::Comment => { | |
90 | if r.part.flg & FCOMMENT != 0 { | |
91 | if None == r.part.header.comment { | |
92 | r.part.header.comment = Some(Vec::new()); | |
93 | }; | |
94 | for byte in r.bytes() { | |
95 | let byte = byte?; | |
96 | if byte == 0 { | |
97 | break; | |
98 | } | |
99 | } | |
100 | } | |
101 | r.part.state = GzHeaderParsingState::Crc; | |
102 | } | |
103 | GzHeaderParsingState::Crc => { | |
104 | if r.part.flg & FHCRC != 0 { | |
105 | let stored_crc = read_le_u16(r)?; | |
106 | let calced_crc = r.part.crc.sum() as u16; | |
107 | if stored_crc != calced_crc { | |
108 | return Err(corrupt()); | |
109 | } | |
110 | } | |
111 | return Ok(()); | |
ea8adc8c | 112 | } |
ea8adc8c | 113 | } |
c295e0f8 XL |
114 | } |
115 | } | |
116 | ||
117 | pub(crate) fn read_gz_header<R: Read>(r: &mut R) -> io::Result<GzHeader> { | |
118 | let mut part = GzHeaderPartial::new(); | |
119 | ||
120 | let result = { | |
121 | let mut reader = Buffer::new(&mut part, r); | |
122 | read_gz_header_part(&mut reader) | |
ea8adc8c | 123 | }; |
04454e1e | 124 | result.map(|()| part.take_header()) |
ea8adc8c XL |
125 | } |
126 | ||
ea8adc8c XL |
127 | /// A gzip streaming encoder |
128 | /// | |
129 | /// This structure exposes a [`BufRead`] interface that will read uncompressed data | |
130 | /// from the underlying reader and expose the compressed version as a [`BufRead`] | |
131 | /// interface. | |
132 | /// | |
133 | /// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html | |
134 | /// | |
135 | /// # Examples | |
136 | /// | |
137 | /// ``` | |
138 | /// use std::io::prelude::*; | |
139 | /// use std::io; | |
140 | /// use flate2::Compression; | |
141 | /// use flate2::bufread::GzEncoder; | |
142 | /// use std::fs::File; | |
143 | /// use std::io::BufReader; | |
144 | /// | |
145 | /// // Opens sample file, compresses the contents and returns a Vector or error | |
146 | /// // File wrapped in a BufReader implements BufRead | |
147 | /// | |
148 | /// fn open_hello_world() -> io::Result<Vec<u8>> { | |
149 | /// let f = File::open("examples/hello_world.txt")?; | |
150 | /// let b = BufReader::new(f); | |
ff7c6d11 | 151 | /// let mut gz = GzEncoder::new(b, Compression::fast()); |
ea8adc8c XL |
152 | /// let mut buffer = Vec::new(); |
153 | /// gz.read_to_end(&mut buffer)?; | |
154 | /// Ok(buffer) | |
155 | /// } | |
156 | /// ``` | |
157 | #[derive(Debug)] | |
158 | pub struct GzEncoder<R> { | |
159 | inner: deflate::bufread::DeflateEncoder<CrcReader<R>>, | |
160 | header: Vec<u8>, | |
161 | pos: usize, | |
162 | eof: bool, | |
163 | } | |
164 | ||
b7449926 | 165 | pub fn gz_encoder<R: BufRead>(header: Vec<u8>, r: R, lvl: Compression) -> GzEncoder<R> { |
ea8adc8c XL |
166 | let crc = CrcReader::new(r); |
167 | GzEncoder { | |
168 | inner: deflate::bufread::DeflateEncoder::new(crc, lvl), | |
04454e1e | 169 | header, |
ea8adc8c XL |
170 | pos: 0, |
171 | eof: false, | |
172 | } | |
173 | } | |
174 | ||
175 | impl<R: BufRead> GzEncoder<R> { | |
176 | /// Creates a new encoder which will use the given compression level. | |
177 | /// | |
178 | /// The encoder is not configured specially for the emitted header. For | |
ff7c6d11 | 179 | /// header configuration, see the `GzBuilder` type. |
ea8adc8c XL |
180 | /// |
181 | /// The data read from the stream `r` will be compressed and available | |
182 | /// through the returned reader. | |
183 | pub fn new(r: R, level: Compression) -> GzEncoder<R> { | |
ff7c6d11 | 184 | GzBuilder::new().buf_read(r, level) |
ea8adc8c XL |
185 | } |
186 | ||
187 | fn read_footer(&mut self, into: &mut [u8]) -> io::Result<usize> { | |
188 | if self.pos == 8 { | |
189 | return Ok(0); | |
190 | } | |
191 | let crc = self.inner.get_ref().crc(); | |
192 | let ref arr = [ | |
193 | (crc.sum() >> 0) as u8, | |
194 | (crc.sum() >> 8) as u8, | |
195 | (crc.sum() >> 16) as u8, | |
196 | (crc.sum() >> 24) as u8, | |
197 | (crc.amount() >> 0) as u8, | |
198 | (crc.amount() >> 8) as u8, | |
199 | (crc.amount() >> 16) as u8, | |
200 | (crc.amount() >> 24) as u8, | |
201 | ]; | |
202 | Ok(copy(into, arr, &mut self.pos)) | |
203 | } | |
204 | } | |
205 | ||
206 | impl<R> GzEncoder<R> { | |
207 | /// Acquires a reference to the underlying reader. | |
208 | pub fn get_ref(&self) -> &R { | |
209 | self.inner.get_ref().get_ref() | |
210 | } | |
211 | ||
212 | /// Acquires a mutable reference to the underlying reader. | |
213 | /// | |
214 | /// Note that mutation of the reader may result in surprising results if | |
215 | /// this encoder is continued to be used. | |
216 | pub fn get_mut(&mut self) -> &mut R { | |
217 | self.inner.get_mut().get_mut() | |
218 | } | |
219 | ||
220 | /// Returns the underlying stream, consuming this encoder | |
221 | pub fn into_inner(self) -> R { | |
222 | self.inner.into_inner().into_inner() | |
223 | } | |
224 | } | |
225 | ||
60c5eb7d XL |
226 | #[inline] |
227 | fn finish(buf: &[u8; 8]) -> (u32, u32) { | |
228 | let crc = ((buf[0] as u32) << 0) | |
229 | | ((buf[1] as u32) << 8) | |
230 | | ((buf[2] as u32) << 16) | |
231 | | ((buf[3] as u32) << 24); | |
232 | let amt = ((buf[4] as u32) << 0) | |
233 | | ((buf[5] as u32) << 8) | |
234 | | ((buf[6] as u32) << 16) | |
235 | | ((buf[7] as u32) << 24); | |
236 | (crc, amt) | |
237 | } | |
238 | ||
ea8adc8c XL |
239 | impl<R: BufRead> Read for GzEncoder<R> { |
240 | fn read(&mut self, mut into: &mut [u8]) -> io::Result<usize> { | |
241 | let mut amt = 0; | |
242 | if self.eof { | |
243 | return self.read_footer(into); | |
244 | } else if self.pos < self.header.len() { | |
245 | amt += copy(into, &self.header, &mut self.pos); | |
246 | if amt == into.len() { | |
247 | return Ok(amt); | |
248 | } | |
249 | let tmp = into; | |
250 | into = &mut tmp[amt..]; | |
251 | } | |
b7449926 | 252 | match self.inner.read(into)? { |
ea8adc8c XL |
253 | 0 => { |
254 | self.eof = true; | |
255 | self.pos = 0; | |
256 | self.read_footer(into) | |
257 | } | |
258 | n => Ok(amt + n), | |
259 | } | |
260 | } | |
261 | } | |
262 | ||
263 | impl<R: BufRead + Write> Write for GzEncoder<R> { | |
264 | fn write(&mut self, buf: &[u8]) -> io::Result<usize> { | |
265 | self.get_mut().write(buf) | |
266 | } | |
267 | ||
268 | fn flush(&mut self) -> io::Result<()> { | |
269 | self.get_mut().flush() | |
270 | } | |
271 | } | |
272 | ||
ea8adc8c XL |
273 | /// A gzip streaming decoder |
274 | /// | |
5869c6ff XL |
275 | /// This structure consumes a [`BufRead`] interface, reading compressed data |
276 | /// from the underlying reader, and emitting uncompressed data. | |
ea8adc8c | 277 | /// |
5869c6ff | 278 | /// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html |
ea8adc8c XL |
279 | /// |
280 | /// # Examples | |
281 | /// | |
282 | /// ``` | |
283 | /// use std::io::prelude::*; | |
284 | /// use std::io; | |
285 | /// # use flate2::Compression; | |
286 | /// # use flate2::write::GzEncoder; | |
287 | /// use flate2::bufread::GzDecoder; | |
288 | /// | |
289 | /// # fn main() { | |
ff7c6d11 | 290 | /// # let mut e = GzEncoder::new(Vec::new(), Compression::default()); |
b7449926 | 291 | /// # e.write_all(b"Hello World").unwrap(); |
ea8adc8c XL |
292 | /// # let bytes = e.finish().unwrap(); |
293 | /// # println!("{}", decode_reader(bytes).unwrap()); | |
294 | /// # } | |
295 | /// # | |
296 | /// // Uncompresses a Gz Encoded vector of bytes and returns a string or error | |
297 | /// // Here &[u8] implements BufRead | |
298 | /// | |
299 | /// fn decode_reader(bytes: Vec<u8>) -> io::Result<String> { | |
ff7c6d11 | 300 | /// let mut gz = GzDecoder::new(&bytes[..]); |
ea8adc8c XL |
301 | /// let mut s = String::new(); |
302 | /// gz.read_to_string(&mut s)?; | |
303 | /// Ok(s) | |
304 | /// } | |
305 | /// ``` | |
306 | #[derive(Debug)] | |
307 | pub struct GzDecoder<R> { | |
60c5eb7d XL |
308 | inner: GzState, |
309 | header: Option<GzHeader>, | |
310 | reader: CrcReader<deflate::bufread::DeflateDecoder<R>>, | |
311 | multi: bool, | |
312 | } | |
313 | ||
c295e0f8 XL |
314 | #[derive(Debug)] |
315 | pub enum GzHeaderParsingState { | |
316 | Start, | |
317 | Xlen, | |
318 | Extra, | |
319 | Filename, | |
320 | Comment, | |
321 | Crc, | |
322 | } | |
323 | ||
324 | #[derive(Debug)] | |
325 | pub struct GzHeaderPartial { | |
326 | buf: Vec<u8>, | |
327 | state: GzHeaderParsingState, | |
328 | flg: u8, | |
329 | xlen: u16, | |
330 | crc: Crc, | |
331 | header: GzHeader, | |
332 | } | |
333 | ||
334 | impl GzHeaderPartial { | |
335 | fn new() -> GzHeaderPartial { | |
336 | GzHeaderPartial { | |
337 | buf: Vec::with_capacity(10), // minimum header length | |
338 | state: GzHeaderParsingState::Start, | |
339 | flg: 0, | |
340 | xlen: 0, | |
341 | crc: Crc::new(), | |
342 | header: GzHeader { | |
343 | extra: None, | |
344 | filename: None, | |
345 | comment: None, | |
346 | operating_system: 0, | |
347 | mtime: 0, | |
348 | }, | |
349 | } | |
350 | } | |
351 | ||
352 | pub fn take_header(self) -> GzHeader { | |
04454e1e | 353 | self.header |
c295e0f8 XL |
354 | } |
355 | } | |
356 | ||
60c5eb7d XL |
357 | #[derive(Debug)] |
358 | enum GzState { | |
c295e0f8 | 359 | Header(GzHeaderPartial), |
60c5eb7d XL |
360 | Body, |
361 | Finished(usize, [u8; 8]), | |
362 | Err(io::Error), | |
363 | End, | |
364 | } | |
365 | ||
366 | /// A small adapter which reads data originally from `buf` and then reads all | |
367 | /// further data from `reader`. This will also buffer all data read from | |
368 | /// `reader` into `buf` for reuse on a further call. | |
369 | struct Buffer<'a, T: 'a> { | |
c295e0f8 | 370 | part: &'a mut GzHeaderPartial, |
60c5eb7d XL |
371 | buf_cur: usize, |
372 | buf_max: usize, | |
373 | reader: &'a mut T, | |
374 | } | |
375 | ||
376 | impl<'a, T> Buffer<'a, T> { | |
c295e0f8 | 377 | fn new(part: &'a mut GzHeaderPartial, reader: &'a mut T) -> Buffer<'a, T> { |
60c5eb7d XL |
378 | Buffer { |
379 | reader, | |
380 | buf_cur: 0, | |
c295e0f8 XL |
381 | buf_max: part.buf.len(), |
382 | part, | |
60c5eb7d XL |
383 | } |
384 | } | |
385 | } | |
386 | ||
387 | impl<'a, T: Read> Read for Buffer<'a, T> { | |
388 | fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> { | |
c295e0f8 XL |
389 | let mut bufref = match self.part.state { |
390 | GzHeaderParsingState::Filename => self.part.header.filename.as_mut(), | |
391 | GzHeaderParsingState::Comment => self.part.header.comment.as_mut(), | |
392 | _ => None, | |
393 | }; | |
394 | if let Some(ref mut b) = bufref { | |
395 | // we have a direct reference to a buffer where to write | |
60c5eb7d | 396 | let len = self.reader.read(buf)?; |
c295e0f8 XL |
397 | if len > 0 && buf[len - 1] == 0 { |
398 | // we do not append the final 0 | |
399 | b.extend_from_slice(&buf[..len - 1]); | |
400 | } else { | |
401 | b.extend_from_slice(&buf[..len]); | |
402 | } | |
403 | self.part.crc.update(&buf[..len]); | |
404 | Ok(len) | |
405 | } else if self.buf_cur == self.buf_max { | |
406 | // we read new bytes and also save them in self.part.buf | |
407 | let len = self.reader.read(buf)?; | |
408 | self.part.buf.extend_from_slice(&buf[..len]); | |
409 | self.part.crc.update(&buf[..len]); | |
60c5eb7d XL |
410 | Ok(len) |
411 | } else { | |
c295e0f8 XL |
412 | // we first read the previously saved bytes |
413 | let len = (&self.part.buf[self.buf_cur..self.buf_max]).read(buf)?; | |
60c5eb7d XL |
414 | self.buf_cur += len; |
415 | Ok(len) | |
416 | } | |
417 | } | |
ea8adc8c XL |
418 | } |
419 | ||
c295e0f8 XL |
420 | impl<'a, T> Buffer<'a, T> |
421 | where | |
422 | T: std::io::Read, | |
423 | { | |
424 | // If we manage to read all the bytes, we reset the buffer | |
425 | fn read_and_forget(&mut self, buf: &mut [u8]) -> io::Result<usize> { | |
426 | self.read_exact(buf)?; | |
427 | // we managed to read the whole buf | |
428 | // we will no longer need the previously saved bytes in self.part.buf | |
429 | let rlen = buf.len(); | |
430 | self.part.buf.truncate(0); | |
431 | self.buf_cur = 0; | |
432 | self.buf_max = 0; | |
04454e1e | 433 | Ok(rlen) |
c295e0f8 XL |
434 | } |
435 | } | |
436 | ||
ea8adc8c XL |
437 | impl<R: BufRead> GzDecoder<R> { |
438 | /// Creates a new decoder from the given reader, immediately parsing the | |
439 | /// gzip header. | |
ff7c6d11 | 440 | pub fn new(mut r: R) -> GzDecoder<R> { |
c295e0f8 | 441 | let mut part = GzHeaderPartial::new(); |
60c5eb7d | 442 | let mut header = None; |
ea8adc8c | 443 | |
60c5eb7d | 444 | let result = { |
c295e0f8 XL |
445 | let mut reader = Buffer::new(&mut part, &mut r); |
446 | read_gz_header_part(&mut reader) | |
60c5eb7d | 447 | }; |
ea8adc8c | 448 | |
60c5eb7d | 449 | let state = match result { |
c295e0f8 XL |
450 | Ok(()) => { |
451 | header = Some(part.take_header()); | |
60c5eb7d | 452 | GzState::Body |
ea8adc8c | 453 | } |
c295e0f8 | 454 | Err(ref err) if io::ErrorKind::WouldBlock == err.kind() => GzState::Header(part), |
60c5eb7d XL |
455 | Err(err) => GzState::Err(err), |
456 | }; | |
ea8adc8c | 457 | |
60c5eb7d XL |
458 | GzDecoder { |
459 | inner: state, | |
460 | reader: CrcReader::new(deflate::bufread::DeflateDecoder::new(r)), | |
461 | multi: false, | |
462 | header, | |
ea8adc8c | 463 | } |
60c5eb7d XL |
464 | } |
465 | ||
466 | fn multi(mut self, flag: bool) -> GzDecoder<R> { | |
467 | self.multi = flag; | |
468 | self | |
ea8adc8c XL |
469 | } |
470 | } | |
471 | ||
472 | impl<R> GzDecoder<R> { | |
ff7c6d11 XL |
473 | /// Returns the header associated with this stream, if it was valid |
474 | pub fn header(&self) -> Option<&GzHeader> { | |
60c5eb7d | 475 | self.header.as_ref() |
ea8adc8c XL |
476 | } |
477 | ||
478 | /// Acquires a reference to the underlying reader. | |
479 | pub fn get_ref(&self) -> &R { | |
60c5eb7d | 480 | self.reader.get_ref().get_ref() |
ea8adc8c XL |
481 | } |
482 | ||
483 | /// Acquires a mutable reference to the underlying stream. | |
484 | /// | |
485 | /// Note that mutation of the stream may result in surprising results if | |
486 | /// this encoder is continued to be used. | |
487 | pub fn get_mut(&mut self) -> &mut R { | |
60c5eb7d | 488 | self.reader.get_mut().get_mut() |
ea8adc8c XL |
489 | } |
490 | ||
491 | /// Consumes this decoder, returning the underlying reader. | |
492 | pub fn into_inner(self) -> R { | |
60c5eb7d | 493 | self.reader.into_inner().into_inner() |
ea8adc8c XL |
494 | } |
495 | } | |
496 | ||
497 | impl<R: BufRead> Read for GzDecoder<R> { | |
498 | fn read(&mut self, into: &mut [u8]) -> io::Result<usize> { | |
60c5eb7d XL |
499 | let GzDecoder { |
500 | inner, | |
501 | header, | |
502 | reader, | |
503 | multi, | |
504 | } = self; | |
505 | ||
506 | loop { | |
507 | *inner = match mem::replace(inner, GzState::End) { | |
c295e0f8 | 508 | GzState::Header(mut part) => { |
60c5eb7d | 509 | let result = { |
c295e0f8 XL |
510 | let mut reader = Buffer::new(&mut part, reader.get_mut().get_mut()); |
511 | read_gz_header_part(&mut reader) | |
60c5eb7d | 512 | }; |
04454e1e | 513 | match result { |
c295e0f8 XL |
514 | Ok(()) => { |
515 | *header = Some(part.take_header()); | |
516 | GzState::Body | |
60c5eb7d | 517 | } |
c295e0f8 XL |
518 | Err(err) if io::ErrorKind::WouldBlock == err.kind() => { |
519 | *inner = GzState::Header(part); | |
520 | return Err(err); | |
521 | } | |
522 | Err(err) => return Err(err), | |
04454e1e | 523 | } |
60c5eb7d XL |
524 | } |
525 | GzState::Body => { | |
526 | if into.is_empty() { | |
527 | *inner = GzState::Body; | |
528 | return Ok(0); | |
529 | } | |
530 | ||
531 | let n = reader.read(into).map_err(|err| { | |
532 | if io::ErrorKind::WouldBlock == err.kind() { | |
533 | *inner = GzState::Body; | |
534 | } | |
535 | ||
536 | err | |
537 | })?; | |
538 | ||
539 | match n { | |
540 | 0 => GzState::Finished(0, [0; 8]), | |
541 | n => { | |
542 | *inner = GzState::Body; | |
543 | return Ok(n); | |
544 | } | |
545 | } | |
546 | } | |
547 | GzState::Finished(pos, mut buf) => { | |
548 | if pos < buf.len() { | |
549 | let n = reader | |
550 | .get_mut() | |
551 | .get_mut() | |
552 | .read(&mut buf[pos..]) | |
553 | .and_then(|n| { | |
554 | if n == 0 { | |
555 | Err(io::ErrorKind::UnexpectedEof.into()) | |
556 | } else { | |
557 | Ok(n) | |
558 | } | |
559 | }) | |
560 | .map_err(|err| { | |
561 | if io::ErrorKind::WouldBlock == err.kind() { | |
562 | *inner = GzState::Finished(pos, buf); | |
563 | } | |
564 | ||
565 | err | |
566 | })?; | |
567 | ||
568 | GzState::Finished(pos + n, buf) | |
569 | } else { | |
570 | let (crc, amt) = finish(&buf); | |
571 | ||
572 | if crc != reader.crc().sum() || amt != reader.crc().amount() { | |
573 | return Err(corrupt()); | |
574 | } else if *multi { | |
575 | let is_eof = reader | |
576 | .get_mut() | |
577 | .get_mut() | |
578 | .fill_buf() | |
579 | .map(|buf| buf.is_empty()) | |
580 | .map_err(|err| { | |
581 | if io::ErrorKind::WouldBlock == err.kind() { | |
582 | *inner = GzState::Finished(pos, buf); | |
583 | } | |
584 | ||
585 | err | |
586 | })?; | |
587 | ||
588 | if is_eof { | |
589 | GzState::End | |
590 | } else { | |
591 | reader.reset(); | |
592 | reader.get_mut().reset_data(); | |
593 | header.take(); | |
c295e0f8 | 594 | GzState::Header(GzHeaderPartial::new()) |
60c5eb7d XL |
595 | } |
596 | } else { | |
597 | GzState::End | |
598 | } | |
599 | } | |
600 | } | |
601 | GzState::Err(err) => return Err(err), | |
602 | GzState::End => return Ok(0), | |
603 | }; | |
ea8adc8c XL |
604 | } |
605 | } | |
606 | } | |
607 | ||
608 | impl<R: BufRead + Write> Write for GzDecoder<R> { | |
609 | fn write(&mut self, buf: &[u8]) -> io::Result<usize> { | |
610 | self.get_mut().write(buf) | |
611 | } | |
612 | ||
613 | fn flush(&mut self) -> io::Result<()> { | |
614 | self.get_mut().flush() | |
615 | } | |
616 | } | |
617 | ||
ea8adc8c XL |
618 | /// A gzip streaming decoder that decodes all members of a multistream |
619 | /// | |
620 | /// A gzip member consists of a header, compressed data and a trailer. The [gzip | |
621 | /// specification](https://tools.ietf.org/html/rfc1952), however, allows multiple | |
622 | /// gzip members to be joined in a single stream. `MultiGzDecoder` will | |
623 | /// decode all consecutive members while `GzDecoder` will only decompress | |
624 | /// the first gzip member. The multistream format is commonly used in | |
625 | /// bioinformatics, for example when using the BGZF compressed data. | |
626 | /// | |
627 | /// This structure exposes a [`BufRead`] interface that will consume all gzip members | |
628 | /// from the underlying reader and emit uncompressed data. | |
629 | /// | |
630 | /// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html | |
631 | /// | |
632 | /// # Examples | |
633 | /// | |
634 | /// ``` | |
635 | /// use std::io::prelude::*; | |
636 | /// use std::io; | |
637 | /// # use flate2::Compression; | |
638 | /// # use flate2::write::GzEncoder; | |
639 | /// use flate2::bufread::MultiGzDecoder; | |
640 | /// | |
641 | /// # fn main() { | |
ff7c6d11 | 642 | /// # let mut e = GzEncoder::new(Vec::new(), Compression::default()); |
b7449926 | 643 | /// # e.write_all(b"Hello World").unwrap(); |
ea8adc8c XL |
644 | /// # let bytes = e.finish().unwrap(); |
645 | /// # println!("{}", decode_reader(bytes).unwrap()); | |
646 | /// # } | |
647 | /// # | |
648 | /// // Uncompresses a Gz Encoded vector of bytes and returns a string or error | |
649 | /// // Here &[u8] implements BufRead | |
650 | /// | |
651 | /// fn decode_reader(bytes: Vec<u8>) -> io::Result<String> { | |
ff7c6d11 | 652 | /// let mut gz = MultiGzDecoder::new(&bytes[..]); |
ea8adc8c XL |
653 | /// let mut s = String::new(); |
654 | /// gz.read_to_string(&mut s)?; | |
655 | /// Ok(s) | |
656 | /// } | |
657 | /// ``` | |
658 | #[derive(Debug)] | |
60c5eb7d | 659 | pub struct MultiGzDecoder<R>(GzDecoder<R>); |
ea8adc8c | 660 | |
ea8adc8c XL |
661 | impl<R: BufRead> MultiGzDecoder<R> { |
662 | /// Creates a new decoder from the given reader, immediately parsing the | |
663 | /// (first) gzip header. If the gzip stream contains multiple members all will | |
664 | /// be decoded. | |
60c5eb7d XL |
665 | pub fn new(r: R) -> MultiGzDecoder<R> { |
666 | MultiGzDecoder(GzDecoder::new(r).multi(true)) | |
ea8adc8c XL |
667 | } |
668 | } | |
669 | ||
670 | impl<R> MultiGzDecoder<R> { | |
ff7c6d11 XL |
671 | /// Returns the current header associated with this stream, if it's valid |
672 | pub fn header(&self) -> Option<&GzHeader> { | |
60c5eb7d | 673 | self.0.header() |
ea8adc8c XL |
674 | } |
675 | ||
676 | /// Acquires a reference to the underlying reader. | |
677 | pub fn get_ref(&self) -> &R { | |
60c5eb7d | 678 | self.0.get_ref() |
ea8adc8c XL |
679 | } |
680 | ||
681 | /// Acquires a mutable reference to the underlying stream. | |
682 | /// | |
683 | /// Note that mutation of the stream may result in surprising results if | |
684 | /// this encoder is continued to be used. | |
685 | pub fn get_mut(&mut self) -> &mut R { | |
60c5eb7d | 686 | self.0.get_mut() |
ea8adc8c XL |
687 | } |
688 | ||
689 | /// Consumes this decoder, returning the underlying reader. | |
690 | pub fn into_inner(self) -> R { | |
60c5eb7d | 691 | self.0.into_inner() |
ea8adc8c XL |
692 | } |
693 | } | |
694 | ||
695 | impl<R: BufRead> Read for MultiGzDecoder<R> { | |
696 | fn read(&mut self, into: &mut [u8]) -> io::Result<usize> { | |
60c5eb7d | 697 | self.0.read(into) |
ea8adc8c XL |
698 | } |
699 | } | |
700 | ||
c295e0f8 XL |
701 | #[cfg(test)] |
702 | pub mod tests { | |
703 | use crate::gz::bufread::*; | |
704 | use std::io; | |
705 | use std::io::{Cursor, Read, Write}; | |
706 | ||
707 | //a cursor turning EOF into blocking errors | |
708 | #[derive(Debug)] | |
709 | pub struct BlockingCursor { | |
710 | pub cursor: Cursor<Vec<u8>>, | |
711 | } | |
712 | ||
713 | impl BlockingCursor { | |
714 | pub fn new() -> BlockingCursor { | |
715 | BlockingCursor { | |
716 | cursor: Cursor::new(Vec::new()), | |
717 | } | |
718 | } | |
719 | ||
720 | pub fn set_position(&mut self, pos: u64) { | |
721 | return self.cursor.set_position(pos); | |
722 | } | |
723 | ||
724 | pub fn position(&mut self) -> u64 { | |
725 | return self.cursor.position(); | |
726 | } | |
727 | } | |
728 | ||
729 | impl Write for BlockingCursor { | |
730 | fn write(&mut self, buf: &[u8]) -> io::Result<usize> { | |
731 | return self.cursor.write(buf); | |
732 | } | |
733 | fn flush(&mut self) -> io::Result<()> { | |
734 | return self.cursor.flush(); | |
735 | } | |
736 | } | |
737 | ||
738 | impl Read for BlockingCursor { | |
739 | fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> { | |
740 | //use the cursor, except it turns eof into blocking error | |
741 | let r = self.cursor.read(buf); | |
742 | match r { | |
743 | Err(ref err) => { | |
744 | if err.kind() == io::ErrorKind::UnexpectedEof { | |
745 | return Err(io::ErrorKind::WouldBlock.into()); | |
746 | } | |
747 | } | |
748 | Ok(0) => { | |
749 | //regular EOF turned into blocking error | |
750 | return Err(io::ErrorKind::WouldBlock.into()); | |
751 | } | |
752 | Ok(_n) => {} | |
753 | } | |
754 | return r; | |
755 | } | |
756 | } | |
757 | #[test] | |
758 | // test function read_and_forget of Buffer | |
759 | fn buffer_read_and_forget() { | |
760 | // this is unused except for the buffering | |
761 | let mut part = GzHeaderPartial::new(); | |
762 | // this is a reader which receives data afterwards | |
763 | let mut r = BlockingCursor::new(); | |
764 | let data = vec![1, 2, 3]; | |
765 | let mut out = Vec::with_capacity(7); | |
766 | ||
767 | match r.write_all(&data) { | |
768 | Ok(()) => {} | |
769 | _ => { | |
770 | panic!("Unexpected result for write_all"); | |
771 | } | |
772 | } | |
773 | r.set_position(0); | |
774 | ||
775 | // First read : successful for one byte | |
776 | let mut reader = Buffer::new(&mut part, &mut r); | |
777 | out.resize(1, 0); | |
778 | match reader.read_and_forget(&mut out) { | |
779 | Ok(1) => {} | |
780 | _ => { | |
781 | panic!("Unexpected result for read_and_forget with data"); | |
782 | } | |
783 | } | |
784 | ||
785 | // Second read : incomplete for 7 bytes (we have only 2) | |
786 | out.resize(7, 0); | |
787 | match reader.read_and_forget(&mut out) { | |
788 | Err(ref err) => { | |
789 | assert_eq!(io::ErrorKind::WouldBlock, err.kind()); | |
790 | } | |
791 | _ => { | |
792 | panic!("Unexpected result for read_and_forget with incomplete"); | |
793 | } | |
794 | } | |
795 | ||
796 | // 3 more data bytes have arrived | |
797 | let pos = r.position(); | |
798 | let data2 = vec![4, 5, 6]; | |
799 | match r.write_all(&data2) { | |
800 | Ok(()) => {} | |
801 | _ => { | |
802 | panic!("Unexpected result for write_all"); | |
803 | } | |
804 | } | |
805 | r.set_position(pos); | |
806 | ||
807 | // Third read : still incomplete for 7 bytes (we have 5) | |
808 | let mut reader2 = Buffer::new(&mut part, &mut r); | |
809 | match reader2.read_and_forget(&mut out) { | |
810 | Err(ref err) => { | |
811 | assert_eq!(io::ErrorKind::WouldBlock, err.kind()); | |
812 | } | |
813 | _ => { | |
814 | panic!("Unexpected result for read_and_forget with more incomplete"); | |
815 | } | |
816 | } | |
817 | ||
818 | // 3 more data bytes have arrived again | |
819 | let pos2 = r.position(); | |
820 | let data3 = vec![7, 8, 9]; | |
821 | match r.write_all(&data3) { | |
822 | Ok(()) => {} | |
823 | _ => { | |
824 | panic!("Unexpected result for write_all"); | |
825 | } | |
826 | } | |
827 | r.set_position(pos2); | |
828 | ||
04454e1e | 829 | // Fourth read : now successful for 7 bytes |
c295e0f8 XL |
830 | let mut reader3 = Buffer::new(&mut part, &mut r); |
831 | match reader3.read_and_forget(&mut out) { | |
832 | Ok(7) => { | |
833 | assert_eq!(out[0], 2); | |
834 | assert_eq!(out[6], 8); | |
835 | } | |
836 | _ => { | |
837 | panic!("Unexpected result for read_and_forget with data"); | |
838 | } | |
839 | } | |
840 | ||
04454e1e | 841 | // Fifth read : successful for one more byte |
c295e0f8 XL |
842 | out.resize(1, 0); |
843 | match reader3.read_and_forget(&mut out) { | |
844 | Ok(1) => { | |
845 | assert_eq!(out[0], 9); | |
846 | } | |
847 | _ => { | |
848 | panic!("Unexpected result for read_and_forget with data"); | |
849 | } | |
850 | } | |
851 | } | |
852 | } |