]> git.proxmox.com Git - rustc.git/blame - src/vendor/flate2/src/gz/bufread.rs
New upstream version 1.30.0~beta.7+dfsg1
[rustc.git] / src / vendor / flate2 / src / gz / bufread.rs
CommitLineData
ea8adc8c 1use std::cmp;
ea8adc8c 2use std::io;
b7449926 3use std::io::prelude::*;
ea8adc8c
XL
4use std::mem;
5
ff7c6d11 6use super::{GzBuilder, GzHeader};
ea8adc8c 7use super::{FCOMMENT, FEXTRA, FHCRC, FNAME};
ea8adc8c
XL
8use crc::CrcReader;
9use deflate;
b7449926 10use Compression;
ea8adc8c
XL
11
12fn copy(into: &mut [u8], from: &[u8], pos: &mut usize) -> usize {
13 let min = cmp::min(into.len(), from.len() - *pos);
14 for (slot, val) in into.iter_mut().zip(from[*pos..*pos + min].iter()) {
15 *slot = *val;
16 }
17 *pos += min;
18 return min;
19}
b7449926
XL
20
21pub(crate) fn corrupt() -> io::Error {
ea8adc8c
XL
22 io::Error::new(
23 io::ErrorKind::InvalidInput,
24 "corrupt gzip stream does not have a matching checksum",
25 )
26}
27
28fn bad_header() -> io::Error {
29 io::Error::new(io::ErrorKind::InvalidInput, "invalid gzip header")
30}
31
32fn read_le_u16<R: Read>(r: &mut R) -> io::Result<u16> {
33 let mut b = [0; 2];
b7449926 34 r.read_exact(&mut b)?;
ea8adc8c
XL
35 Ok((b[0] as u16) | ((b[1] as u16) << 8))
36}
37
b7449926 38pub(crate) fn read_gz_header<R: Read>(r: &mut R) -> io::Result<GzHeader> {
ea8adc8c
XL
39 let mut crc_reader = CrcReader::new(r);
40 let mut header = [0; 10];
b7449926 41 crc_reader.read_exact(&mut header)?;
ea8adc8c
XL
42
43 let id1 = header[0];
44 let id2 = header[1];
45 if id1 != 0x1f || id2 != 0x8b {
46 return Err(bad_header());
47 }
48 let cm = header[2];
49 if cm != 8 {
50 return Err(bad_header());
51 }
52
53 let flg = header[3];
b7449926
XL
54 let mtime = ((header[4] as u32) << 0)
55 | ((header[5] as u32) << 8)
56 | ((header[6] as u32) << 16)
57 | ((header[7] as u32) << 24);
ea8adc8c 58 let _xfl = header[8];
ff7c6d11 59 let os = header[9];
ea8adc8c
XL
60
61 let extra = if flg & FEXTRA != 0 {
b7449926 62 let xlen = read_le_u16(&mut crc_reader)?;
ea8adc8c 63 let mut extra = vec![0; xlen as usize];
b7449926 64 crc_reader.read_exact(&mut extra)?;
ea8adc8c
XL
65 Some(extra)
66 } else {
67 None
68 };
69 let filename = if flg & FNAME != 0 {
70 // wow this is slow
71 let mut b = Vec::new();
72 for byte in crc_reader.by_ref().bytes() {
b7449926 73 let byte = byte?;
ea8adc8c
XL
74 if byte == 0 {
75 break;
76 }
77 b.push(byte);
78 }
79 Some(b)
80 } else {
81 None
82 };
83 let comment = if flg & FCOMMENT != 0 {
84 // wow this is slow
85 let mut b = Vec::new();
86 for byte in crc_reader.by_ref().bytes() {
b7449926 87 let byte = byte?;
ea8adc8c
XL
88 if byte == 0 {
89 break;
90 }
91 b.push(byte);
92 }
93 Some(b)
94 } else {
95 None
96 };
97
98 if flg & FHCRC != 0 {
99 let calced_crc = crc_reader.crc().sum() as u16;
b7449926 100 let stored_crc = read_le_u16(&mut crc_reader)?;
ea8adc8c
XL
101 if calced_crc != stored_crc {
102 return Err(corrupt());
103 }
104 }
105
ff7c6d11 106 Ok(GzHeader {
ea8adc8c
XL
107 extra: extra,
108 filename: filename,
109 comment: comment,
ff7c6d11 110 operating_system: os,
ea8adc8c
XL
111 mtime: mtime,
112 })
113}
114
ea8adc8c
XL
115/// A gzip streaming encoder
116///
117/// This structure exposes a [`BufRead`] interface that will read uncompressed data
118/// from the underlying reader and expose the compressed version as a [`BufRead`]
119/// interface.
120///
121/// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html
122///
123/// # Examples
124///
125/// ```
126/// use std::io::prelude::*;
127/// use std::io;
128/// use flate2::Compression;
129/// use flate2::bufread::GzEncoder;
130/// use std::fs::File;
131/// use std::io::BufReader;
132///
133/// // Opens sample file, compresses the contents and returns a Vector or error
134/// // File wrapped in a BufReader implements BufRead
135///
136/// fn open_hello_world() -> io::Result<Vec<u8>> {
137/// let f = File::open("examples/hello_world.txt")?;
138/// let b = BufReader::new(f);
ff7c6d11 139/// let mut gz = GzEncoder::new(b, Compression::fast());
ea8adc8c
XL
140/// let mut buffer = Vec::new();
141/// gz.read_to_end(&mut buffer)?;
142/// Ok(buffer)
143/// }
144/// ```
145#[derive(Debug)]
146pub struct GzEncoder<R> {
147 inner: deflate::bufread::DeflateEncoder<CrcReader<R>>,
148 header: Vec<u8>,
149 pos: usize,
150 eof: bool,
151}
152
b7449926 153pub fn gz_encoder<R: BufRead>(header: Vec<u8>, r: R, lvl: Compression) -> GzEncoder<R> {
ea8adc8c
XL
154 let crc = CrcReader::new(r);
155 GzEncoder {
156 inner: deflate::bufread::DeflateEncoder::new(crc, lvl),
157 header: header,
158 pos: 0,
159 eof: false,
160 }
161}
162
163impl<R: BufRead> GzEncoder<R> {
164 /// Creates a new encoder which will use the given compression level.
165 ///
166 /// The encoder is not configured specially for the emitted header. For
ff7c6d11 167 /// header configuration, see the `GzBuilder` type.
ea8adc8c
XL
168 ///
169 /// The data read from the stream `r` will be compressed and available
170 /// through the returned reader.
171 pub fn new(r: R, level: Compression) -> GzEncoder<R> {
ff7c6d11 172 GzBuilder::new().buf_read(r, level)
ea8adc8c
XL
173 }
174
175 fn read_footer(&mut self, into: &mut [u8]) -> io::Result<usize> {
176 if self.pos == 8 {
177 return Ok(0);
178 }
179 let crc = self.inner.get_ref().crc();
180 let ref arr = [
181 (crc.sum() >> 0) as u8,
182 (crc.sum() >> 8) as u8,
183 (crc.sum() >> 16) as u8,
184 (crc.sum() >> 24) as u8,
185 (crc.amount() >> 0) as u8,
186 (crc.amount() >> 8) as u8,
187 (crc.amount() >> 16) as u8,
188 (crc.amount() >> 24) as u8,
189 ];
190 Ok(copy(into, arr, &mut self.pos))
191 }
192}
193
194impl<R> GzEncoder<R> {
195 /// Acquires a reference to the underlying reader.
196 pub fn get_ref(&self) -> &R {
197 self.inner.get_ref().get_ref()
198 }
199
200 /// Acquires a mutable reference to the underlying reader.
201 ///
202 /// Note that mutation of the reader may result in surprising results if
203 /// this encoder is continued to be used.
204 pub fn get_mut(&mut self) -> &mut R {
205 self.inner.get_mut().get_mut()
206 }
207
208 /// Returns the underlying stream, consuming this encoder
209 pub fn into_inner(self) -> R {
210 self.inner.into_inner().into_inner()
211 }
212}
213
214impl<R: BufRead> Read for GzEncoder<R> {
215 fn read(&mut self, mut into: &mut [u8]) -> io::Result<usize> {
216 let mut amt = 0;
217 if self.eof {
218 return self.read_footer(into);
219 } else if self.pos < self.header.len() {
220 amt += copy(into, &self.header, &mut self.pos);
221 if amt == into.len() {
222 return Ok(amt);
223 }
224 let tmp = into;
225 into = &mut tmp[amt..];
226 }
b7449926 227 match self.inner.read(into)? {
ea8adc8c
XL
228 0 => {
229 self.eof = true;
230 self.pos = 0;
231 self.read_footer(into)
232 }
233 n => Ok(amt + n),
234 }
235 }
236}
237
238impl<R: BufRead + Write> Write for GzEncoder<R> {
239 fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
240 self.get_mut().write(buf)
241 }
242
243 fn flush(&mut self) -> io::Result<()> {
244 self.get_mut().flush()
245 }
246}
247
ea8adc8c
XL
248/// A gzip streaming decoder
249///
250/// This structure exposes a [`ReadBuf`] interface that will consume compressed
251/// data from the underlying reader and emit uncompressed data.
252///
253/// [`ReadBuf`]: https://doc.rust-lang.org/std/io/trait.BufRead.html
254///
255/// # Examples
256///
257/// ```
258/// use std::io::prelude::*;
259/// use std::io;
260/// # use flate2::Compression;
261/// # use flate2::write::GzEncoder;
262/// use flate2::bufread::GzDecoder;
263///
264/// # fn main() {
ff7c6d11 265/// # let mut e = GzEncoder::new(Vec::new(), Compression::default());
b7449926 266/// # e.write_all(b"Hello World").unwrap();
ea8adc8c
XL
267/// # let bytes = e.finish().unwrap();
268/// # println!("{}", decode_reader(bytes).unwrap());
269/// # }
270/// #
271/// // Uncompresses a Gz Encoded vector of bytes and returns a string or error
272/// // Here &[u8] implements BufRead
273///
274/// fn decode_reader(bytes: Vec<u8>) -> io::Result<String> {
ff7c6d11 275/// let mut gz = GzDecoder::new(&bytes[..]);
ea8adc8c
XL
276/// let mut s = String::new();
277/// gz.read_to_string(&mut s)?;
278/// Ok(s)
279/// }
280/// ```
281#[derive(Debug)]
282pub struct GzDecoder<R> {
283 inner: CrcReader<deflate::bufread::DeflateDecoder<R>>,
b7449926 284 header: Option<io::Result<GzHeader>>,
ea8adc8c
XL
285 finished: bool,
286}
287
ea8adc8c
XL
288impl<R: BufRead> GzDecoder<R> {
289 /// Creates a new decoder from the given reader, immediately parsing the
290 /// gzip header.
ff7c6d11
XL
291 pub fn new(mut r: R) -> GzDecoder<R> {
292 let header = read_gz_header(&mut r);
ea8adc8c
XL
293
294 let flate = deflate::bufread::DeflateDecoder::new(r);
ff7c6d11 295 GzDecoder {
ea8adc8c 296 inner: CrcReader::new(flate),
b7449926 297 header: Some(header),
ea8adc8c 298 finished: false,
ff7c6d11 299 }
ea8adc8c
XL
300 }
301
302 fn finish(&mut self) -> io::Result<()> {
303 if self.finished {
304 return Ok(());
305 }
306 let ref mut buf = [0u8; 8];
307 {
308 let mut len = 0;
309
310 while len < buf.len() {
b7449926 311 match self.inner.get_mut().get_mut().read(&mut buf[len..])? {
ea8adc8c
XL
312 0 => return Err(corrupt()),
313 n => len += n,
314 }
315 }
316 }
317
b7449926
XL
318 let crc = ((buf[0] as u32) << 0)
319 | ((buf[1] as u32) << 8)
320 | ((buf[2] as u32) << 16)
321 | ((buf[3] as u32) << 24);
322 let amt = ((buf[4] as u32) << 0)
323 | ((buf[5] as u32) << 8)
324 | ((buf[6] as u32) << 16)
325 | ((buf[7] as u32) << 24);
326 if crc != self.inner.crc().sum() {
ea8adc8c
XL
327 return Err(corrupt());
328 }
329 if amt != self.inner.crc().amount() {
330 return Err(corrupt());
331 }
332 self.finished = true;
333 Ok(())
334 }
335}
336
337impl<R> GzDecoder<R> {
ff7c6d11
XL
338 /// Returns the header associated with this stream, if it was valid
339 pub fn header(&self) -> Option<&GzHeader> {
b7449926 340 self.header.as_ref().and_then(|h| h.as_ref().ok())
ea8adc8c
XL
341 }
342
343 /// Acquires a reference to the underlying reader.
344 pub fn get_ref(&self) -> &R {
345 self.inner.get_ref().get_ref()
346 }
347
348 /// Acquires a mutable reference to the underlying stream.
349 ///
350 /// Note that mutation of the stream may result in surprising results if
351 /// this encoder is continued to be used.
352 pub fn get_mut(&mut self) -> &mut R {
353 self.inner.get_mut().get_mut()
354 }
355
356 /// Consumes this decoder, returning the underlying reader.
357 pub fn into_inner(self) -> R {
358 self.inner.into_inner().into_inner()
359 }
360}
361
362impl<R: BufRead> Read for GzDecoder<R> {
363 fn read(&mut self, into: &mut [u8]) -> io::Result<usize> {
b7449926
XL
364 match self.header {
365 None => return Ok(0), // error already returned,
366 Some(Ok(_)) => {}
367 Some(Err(_)) => match self.header.take().unwrap() {
368 Ok(_) => panic!(),
369 Err(e) => return Err(e),
370 },
ff7c6d11 371 }
b7449926
XL
372 if into.is_empty() {
373 return Ok(0);
374 }
375 match self.inner.read(into)? {
ea8adc8c 376 0 => {
b7449926 377 self.finish()?;
ea8adc8c
XL
378 Ok(0)
379 }
380 n => Ok(n),
381 }
382 }
383}
384
385impl<R: BufRead + Write> Write for GzDecoder<R> {
386 fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
387 self.get_mut().write(buf)
388 }
389
390 fn flush(&mut self) -> io::Result<()> {
391 self.get_mut().flush()
392 }
393}
394
ea8adc8c
XL
395/// A gzip streaming decoder that decodes all members of a multistream
396///
397/// A gzip member consists of a header, compressed data and a trailer. The [gzip
398/// specification](https://tools.ietf.org/html/rfc1952), however, allows multiple
399/// gzip members to be joined in a single stream. `MultiGzDecoder` will
400/// decode all consecutive members while `GzDecoder` will only decompress
401/// the first gzip member. The multistream format is commonly used in
402/// bioinformatics, for example when using the BGZF compressed data.
403///
404/// This structure exposes a [`BufRead`] interface that will consume all gzip members
405/// from the underlying reader and emit uncompressed data.
406///
407/// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html
408///
409/// # Examples
410///
411/// ```
412/// use std::io::prelude::*;
413/// use std::io;
414/// # use flate2::Compression;
415/// # use flate2::write::GzEncoder;
416/// use flate2::bufread::MultiGzDecoder;
417///
418/// # fn main() {
ff7c6d11 419/// # let mut e = GzEncoder::new(Vec::new(), Compression::default());
b7449926 420/// # e.write_all(b"Hello World").unwrap();
ea8adc8c
XL
421/// # let bytes = e.finish().unwrap();
422/// # println!("{}", decode_reader(bytes).unwrap());
423/// # }
424/// #
425/// // Uncompresses a Gz Encoded vector of bytes and returns a string or error
426/// // Here &[u8] implements BufRead
427///
428/// fn decode_reader(bytes: Vec<u8>) -> io::Result<String> {
ff7c6d11 429/// let mut gz = MultiGzDecoder::new(&bytes[..]);
ea8adc8c
XL
430/// let mut s = String::new();
431/// gz.read_to_string(&mut s)?;
432/// Ok(s)
433/// }
434/// ```
435#[derive(Debug)]
436pub struct MultiGzDecoder<R> {
437 inner: CrcReader<deflate::bufread::DeflateDecoder<R>>,
ff7c6d11 438 header: io::Result<GzHeader>,
ea8adc8c
XL
439 finished: bool,
440}
441
ea8adc8c
XL
442impl<R: BufRead> MultiGzDecoder<R> {
443 /// Creates a new decoder from the given reader, immediately parsing the
444 /// (first) gzip header. If the gzip stream contains multiple members all will
445 /// be decoded.
ff7c6d11
XL
446 pub fn new(mut r: R) -> MultiGzDecoder<R> {
447 let header = read_gz_header(&mut r);
ea8adc8c
XL
448
449 let flate = deflate::bufread::DeflateDecoder::new(r);
ff7c6d11 450 MultiGzDecoder {
ea8adc8c
XL
451 inner: CrcReader::new(flate),
452 header: header,
453 finished: false,
ff7c6d11 454 }
ea8adc8c
XL
455 }
456
457 fn finish_member(&mut self) -> io::Result<usize> {
458 if self.finished {
459 return Ok(0);
460 }
461 let ref mut buf = [0u8; 8];
462 {
463 let mut len = 0;
464
465 while len < buf.len() {
b7449926 466 match self.inner.get_mut().get_mut().read(&mut buf[len..])? {
ea8adc8c
XL
467 0 => return Err(corrupt()),
468 n => len += n,
469 }
470 }
471 }
472
b7449926
XL
473 let crc = ((buf[0] as u32) << 0)
474 | ((buf[1] as u32) << 8)
475 | ((buf[2] as u32) << 16)
476 | ((buf[3] as u32) << 24);
477 let amt = ((buf[4] as u32) << 0)
478 | ((buf[5] as u32) << 8)
479 | ((buf[6] as u32) << 16)
480 | ((buf[7] as u32) << 24);
ea8adc8c
XL
481 if crc != self.inner.crc().sum() as u32 {
482 return Err(corrupt());
483 }
484 if amt != self.inner.crc().amount() {
485 return Err(corrupt());
486 }
487 let remaining = match self.inner.get_mut().get_mut().fill_buf() {
488 Ok(b) => if b.is_empty() {
489 self.finished = true;
490 return Ok(0);
491 } else {
492 b.len()
493 },
494 Err(e) => return Err(e),
495 };
496
ff7c6d11
XL
497 let next_header = read_gz_header(self.inner.get_mut().get_mut());
498 drop(mem::replace(&mut self.header, next_header));
ea8adc8c
XL
499 self.inner.reset();
500 self.inner.get_mut().reset_data();
501
502 Ok(remaining)
503 }
504}
505
506impl<R> MultiGzDecoder<R> {
ff7c6d11
XL
507 /// Returns the current header associated with this stream, if it's valid
508 pub fn header(&self) -> Option<&GzHeader> {
509 self.header.as_ref().ok()
ea8adc8c
XL
510 }
511
512 /// Acquires a reference to the underlying reader.
513 pub fn get_ref(&self) -> &R {
514 self.inner.get_ref().get_ref()
515 }
516
517 /// Acquires a mutable reference to the underlying stream.
518 ///
519 /// Note that mutation of the stream may result in surprising results if
520 /// this encoder is continued to be used.
521 pub fn get_mut(&mut self) -> &mut R {
522 self.inner.get_mut().get_mut()
523 }
524
525 /// Consumes this decoder, returning the underlying reader.
526 pub fn into_inner(self) -> R {
527 self.inner.into_inner().into_inner()
528 }
529}
530
531impl<R: BufRead> Read for MultiGzDecoder<R> {
532 fn read(&mut self, into: &mut [u8]) -> io::Result<usize> {
ff7c6d11
XL
533 if let Err(ref mut e) = self.header {
534 let another_error = io::ErrorKind::Other.into();
b7449926 535 return Err(mem::replace(e, another_error));
ff7c6d11 536 }
b7449926 537 match self.inner.read(into)? {
ea8adc8c
XL
538 0 => match self.finish_member() {
539 Ok(0) => Ok(0),
540 Ok(_) => self.read(into),
541 Err(e) => Err(e),
542 },
543 n => Ok(n),
544 }
545 }
546}
547
548impl<R: BufRead + Write> Write for MultiGzDecoder<R> {
549 fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
550 self.get_mut().write(buf)
551 }
552
553 fn flush(&mut self) -> io::Result<()> {
554 self.get_mut().flush()
555 }
556}