]> git.proxmox.com Git - rustc.git/blob - vendor/snap/src/read.rs
New upstream version 1.48.0~beta.8+dfsg1
[rustc.git] / vendor / snap / src / read.rs
1 /*!
2 This module provides two `std::io::Read` implementations:
3
4 * [`read::FrameDecoder`](struct.FrameDecoder.html)
5 wraps another `std::io::Read` implemenation, and decompresses data encoded
6 using the Snappy frame format. Use this if you have a compressed data source
7 and wish to read it as uncompressed data.
8 * [`read::FrameEncoder`](struct.FrameEncoder.html)
9 wraps another `std::io::Read` implemenation, and compresses data encoded
10 using the Snappy frame format. Use this if you have uncompressed data source
11 and wish to read it as compressed data.
12
13 Typically, `read::FrameDecoder` is the version that you'll want.
14 */
15
16 use std::cmp;
17 use std::fmt;
18 use std::io;
19
20 use crate::bytes;
21 use crate::compress::Encoder;
22 use crate::crc32::CheckSummer;
23 use crate::decompress::{decompress_len, Decoder};
24 use crate::error::Error;
25 use crate::frame::{
26 compress_frame, ChunkType, CHUNK_HEADER_AND_CRC_SIZE,
27 MAX_COMPRESS_BLOCK_SIZE, STREAM_BODY, STREAM_IDENTIFIER,
28 };
29 use crate::MAX_BLOCK_SIZE;
30
31 /// The maximum size of a compressed block, including the header and stream
32 /// identifier, that can be emitted by FrameEncoder.
33 const MAX_READ_FRAME_ENCODER_BLOCK_SIZE: usize = STREAM_IDENTIFIER.len()
34 + CHUNK_HEADER_AND_CRC_SIZE
35 + MAX_COMPRESS_BLOCK_SIZE;
36
37 /// A reader for decompressing a Snappy stream.
38 ///
39 /// This `FrameDecoder` wraps any other reader that implements `std::io::Read`.
40 /// Bytes read from this reader are decompressed using the
41 /// [Snappy frame format](https://github.com/google/snappy/blob/master/framing_format.txt)
42 /// (file extension `sz`, MIME type `application/x-snappy-framed`).
43 ///
44 /// This reader can potentially make many small reads from the underlying
45 /// stream depending on its format, therefore, passing in a buffered reader
46 /// may be beneficial.
47 pub struct FrameDecoder<R: io::Read> {
48 /// The underlying reader.
49 r: R,
50 /// A Snappy decoder that we reuse that does the actual block based
51 /// decompression.
52 dec: Decoder,
53 /// A CRC32 checksummer that is configured to either use the portable
54 /// fallback version or the SSE4.2 accelerated version when the right CPU
55 /// features are available.
56 checksummer: CheckSummer,
57 /// The compressed bytes buffer, taken from the underlying reader.
58 src: Vec<u8>,
59 /// The decompressed bytes buffer. Bytes are decompressed from src to dst
60 /// before being passed back to the caller.
61 dst: Vec<u8>,
62 /// Index into dst: starting point of bytes not yet given back to caller.
63 dsts: usize,
64 /// Index into dst: ending point of bytes not yet given back to caller.
65 dste: usize,
66 /// Whether we've read the special stream header or not.
67 read_stream_ident: bool,
68 }
69
70 impl<R: io::Read> FrameDecoder<R> {
71 /// Create a new reader for streaming Snappy decompression.
72 pub fn new(rdr: R) -> FrameDecoder<R> {
73 FrameDecoder {
74 r: rdr,
75 dec: Decoder::new(),
76 checksummer: CheckSummer::new(),
77 src: vec![0; MAX_COMPRESS_BLOCK_SIZE],
78 dst: vec![0; MAX_BLOCK_SIZE],
79 dsts: 0,
80 dste: 0,
81 read_stream_ident: false,
82 }
83 }
84
85 /// Gets a reference to the underlying reader in this decoder.
86 pub fn get_ref(&self) -> &R {
87 &self.r
88 }
89
90 /// Gets a mutable reference to the underlying reader in this decoder.
91 ///
92 /// Note that mutation of the stream may result in surprising results if
93 /// this decoder is continued to be used.
94 pub fn get_mut(&mut self) -> &mut R {
95 &mut self.r
96 }
97 }
98
99 impl<R: io::Read> io::Read for FrameDecoder<R> {
100 fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
101 macro_rules! fail {
102 ($err:expr) => {
103 return Err(io::Error::from($err));
104 };
105 }
106 loop {
107 if self.dsts < self.dste {
108 let len = cmp::min(self.dste - self.dsts, buf.len());
109 let dste = self.dsts.checked_add(len).unwrap();
110 buf[0..len].copy_from_slice(&self.dst[self.dsts..dste]);
111 self.dsts = dste;
112 return Ok(len);
113 }
114 if !read_exact_eof(&mut self.r, &mut self.src[0..4])? {
115 return Ok(0);
116 }
117 let ty = ChunkType::from_u8(self.src[0]);
118 if !self.read_stream_ident {
119 if ty != Ok(ChunkType::Stream) {
120 fail!(Error::StreamHeader { byte: self.src[0] });
121 }
122 self.read_stream_ident = true;
123 }
124 let len64 = bytes::read_u24_le(&self.src[1..]) as u64;
125 if len64 > self.src.len() as u64 {
126 fail!(Error::UnsupportedChunkLength {
127 len: len64,
128 header: false,
129 });
130 }
131 let len = len64 as usize;
132 match ty {
133 Err(b) if 0x02 <= b && b <= 0x7F => {
134 // Spec says that chunk types 0x02-0x7F are reserved and
135 // conformant decoders must return an error.
136 fail!(Error::UnsupportedChunkType { byte: b });
137 }
138 Err(b) if 0x80 <= b && b <= 0xFD => {
139 // Spec says that chunk types 0x80-0xFD are reserved but
140 // skippable.
141 self.r.read_exact(&mut self.src[0..len])?;
142 }
143 Err(b) => {
144 // Can never happen. 0x02-0x7F and 0x80-0xFD are handled
145 // above in the error case. That leaves 0x00, 0x01, 0xFE
146 // and 0xFF, each of which correspond to one of the four
147 // defined chunk types.
148 unreachable!("BUG: unhandled chunk type: {}", b);
149 }
150 Ok(ChunkType::Padding) => {
151 // Just read and move on.
152 self.r.read_exact(&mut self.src[0..len])?;
153 }
154 Ok(ChunkType::Stream) => {
155 if len != STREAM_BODY.len() {
156 fail!(Error::UnsupportedChunkLength {
157 len: len64,
158 header: true,
159 })
160 }
161 self.r.read_exact(&mut self.src[0..len])?;
162 if &self.src[0..len] != STREAM_BODY {
163 fail!(Error::StreamHeaderMismatch {
164 bytes: self.src[0..len].to_vec(),
165 });
166 }
167 }
168 Ok(ChunkType::Uncompressed) => {
169 if len < 4 {
170 fail!(Error::UnsupportedChunkLength {
171 len: len as u64,
172 header: false,
173 });
174 }
175 let expected_sum = bytes::io_read_u32_le(&mut self.r)?;
176 let n = len - 4;
177 if n > self.dst.len() {
178 fail!(Error::UnsupportedChunkLength {
179 len: n as u64,
180 header: false,
181 });
182 }
183 self.r.read_exact(&mut self.dst[0..n])?;
184 let got_sum =
185 self.checksummer.crc32c_masked(&self.dst[0..n]);
186 if expected_sum != got_sum {
187 fail!(Error::Checksum {
188 expected: expected_sum,
189 got: got_sum,
190 });
191 }
192 self.dsts = 0;
193 self.dste = n;
194 }
195 Ok(ChunkType::Compressed) => {
196 if len < 4 {
197 fail!(Error::UnsupportedChunkLength {
198 len: len as u64,
199 header: false,
200 });
201 }
202 let expected_sum = bytes::io_read_u32_le(&mut self.r)?;
203 let sn = len - 4;
204 if sn > self.src.len() {
205 fail!(Error::UnsupportedChunkLength {
206 len: len64,
207 header: false,
208 });
209 }
210 self.r.read_exact(&mut self.src[0..sn])?;
211 let dn = decompress_len(&self.src)?;
212 if dn > self.dst.len() {
213 fail!(Error::UnsupportedChunkLength {
214 len: dn as u64,
215 header: false,
216 });
217 }
218 self.dec
219 .decompress(&self.src[0..sn], &mut self.dst[0..dn])?;
220 let got_sum =
221 self.checksummer.crc32c_masked(&self.dst[0..dn]);
222 if expected_sum != got_sum {
223 fail!(Error::Checksum {
224 expected: expected_sum,
225 got: got_sum,
226 });
227 }
228 self.dsts = 0;
229 self.dste = dn;
230 }
231 }
232 }
233 }
234 }
235
236 impl<R: fmt::Debug + io::Read> fmt::Debug for FrameDecoder<R> {
237 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
238 f.debug_struct("FrameDecoder")
239 .field("r", &self.r)
240 .field("dec", &self.dec)
241 .field("checksummer", &self.checksummer)
242 .field("src", &"[...]")
243 .field("dst", &"[...]")
244 .field("dsts", &self.dsts)
245 .field("dste", &self.dste)
246 .field("read_stream_ident", &self.read_stream_ident)
247 .finish()
248 }
249 }
250
251 /// A reader for compressing data using snappy as it is read.
252 ///
253 /// This `FrameEncoder` wraps any other reader that implements `std::io::Read`.
254 /// Bytes read from this reader are compressed using the
255 /// [Snappy frame format](https://github.com/google/snappy/blob/master/framing_format.txt)
256 /// (file extension `sz`, MIME type `application/x-snappy-framed`).
257 ///
258 /// Usually you'll want
259 /// [`read::FrameDecoder`](struct.FrameDecoder.html)
260 /// (for decompressing while reading) or
261 /// [`write::FrameEncoder`](../write/struct.FrameEncoder.html)
262 /// (for compressing while writing) instead.
263 ///
264 /// Unlike `FrameDecoder`, this will attempt to make large reads roughly
265 /// equivalent to the size of a single Snappy block. Therefore, callers may not
266 /// benefit from using a buffered reader.
267 pub struct FrameEncoder<R: io::Read> {
268 /// Internally, we split `FrameEncoder` in two to keep the borrow checker
269 /// happy. The `inner` member contains everything that `read_frame` needs
270 /// to fetch a frame's worth of data and compress it.
271 inner: Inner<R>,
272 /// Data that we've encoded and are ready to return to our caller.
273 dst: Vec<u8>,
274 /// Starting point of bytes in `dst` not yet given back to the caller.
275 dsts: usize,
276 /// Ending point of bytes in `dst` that we want to give to our caller.
277 dste: usize,
278 }
279
280 struct Inner<R: io::Read> {
281 /// The underlying data source.
282 r: R,
283 /// An encoder that we reuse that does the actual block based compression.
284 enc: Encoder,
285 /// A CRC32 checksummer that is configured to either use the portable
286 /// fallback version or the SSE4.2 accelerated version when the right CPU
287 /// features are available.
288 checksummer: CheckSummer,
289 /// Data taken from the underlying `r`, and not yet compressed.
290 src: Vec<u8>,
291 /// Have we written the standard snappy header to `dst` yet?
292 wrote_stream_ident: bool,
293 }
294
295 impl<R: io::Read> FrameEncoder<R> {
296 /// Create a new reader for streaming Snappy compression.
297 pub fn new(rdr: R) -> FrameEncoder<R> {
298 FrameEncoder {
299 inner: Inner {
300 r: rdr,
301 enc: Encoder::new(),
302 checksummer: CheckSummer::new(),
303 src: vec![0; MAX_BLOCK_SIZE],
304 wrote_stream_ident: false,
305 },
306 dst: vec![0; MAX_READ_FRAME_ENCODER_BLOCK_SIZE],
307 dsts: 0,
308 dste: 0,
309 }
310 }
311
312 /// Gets a reference to the underlying reader in this decoder.
313 pub fn get_ref(&self) -> &R {
314 &self.inner.r
315 }
316
317 /// Gets a mutable reference to the underlying reader in this decoder.
318 ///
319 /// Note that mutation of the stream may result in surprising results if
320 /// this encoder is continued to be used.
321 pub fn get_mut(&mut self) -> &mut R {
322 &mut self.inner.r
323 }
324
325 /// Read previously compressed data from `self.dst`, returning the number of
326 /// bytes read. If `self.dst` is empty, returns 0.
327 fn read_from_dst(&mut self, buf: &mut [u8]) -> usize {
328 let available_bytes = self.dste - self.dsts;
329 let count = cmp::min(available_bytes, buf.len());
330 buf[..count].copy_from_slice(&self.dst[self.dsts..self.dsts + count]);
331 self.dsts += count;
332 count
333 }
334 }
335
336 impl<R: io::Read> io::Read for FrameEncoder<R> {
337 fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
338 // Try reading previously compressed bytes from our `dst` buffer, if
339 // any.
340 let count = self.read_from_dst(buf);
341
342 if count > 0 {
343 // We had some bytes in our `dst` buffer that we used.
344 Ok(count)
345 } else if buf.len() >= MAX_READ_FRAME_ENCODER_BLOCK_SIZE {
346 // Our output `buf` is big enough that we can directly write into
347 // it, so bypass `dst` entirely.
348 self.inner.read_frame(buf)
349 } else {
350 // We need to refill `self.dst`, and then return some bytes from
351 // that.
352 let count = self.inner.read_frame(&mut self.dst)?;
353 self.dsts = 0;
354 self.dste = count;
355 Ok(self.read_from_dst(buf))
356 }
357 }
358 }
359
360 impl<R: io::Read> Inner<R> {
361 /// Read from `self.r`, and create a new frame, writing it to `dst`, which
362 /// must be at least `MAX_READ_FRAME_ENCODER_BLOCK_SIZE` bytes in size.
363 fn read_frame(&mut self, dst: &mut [u8]) -> io::Result<usize> {
364 debug_assert!(dst.len() >= MAX_READ_FRAME_ENCODER_BLOCK_SIZE);
365
366 // We make one read to the underlying reader. If the underlying reader
367 // doesn't fill the buffer but there are still bytes to be read, then
368 // compression won't be optimal. The alternative would be to block
369 // until our buffer is maximally full (or we see EOF), but this seems
370 // more surprising. In general, io::Read implementations should try to
371 // fill the caller's buffer as much as they can, so this seems like the
372 // better choice.
373 let nread = self.r.read(&mut self.src)?;
374 if nread == 0 {
375 return Ok(0);
376 }
377
378 // If we haven't yet written the stream header to `dst`, write it.
379 let mut dst_write_start = 0;
380 if !self.wrote_stream_ident {
381 dst[0..STREAM_IDENTIFIER.len()].copy_from_slice(STREAM_IDENTIFIER);
382 dst_write_start += STREAM_IDENTIFIER.len();
383 self.wrote_stream_ident = true;
384 }
385
386 // Reserve space for our chunk header. We need to use `split_at_mut` so
387 // that we can get two mutable slices pointing at non-overlapping parts
388 // of `dst`.
389 let (chunk_header, remaining_dst) =
390 dst[dst_write_start..].split_at_mut(CHUNK_HEADER_AND_CRC_SIZE);
391 dst_write_start += CHUNK_HEADER_AND_CRC_SIZE;
392
393 // Compress our frame if possible, telling `compress_frame` to always
394 // put the output in `dst`.
395 let frame_data = compress_frame(
396 &mut self.enc,
397 self.checksummer,
398 &self.src[..nread],
399 chunk_header,
400 remaining_dst,
401 true,
402 )?;
403 Ok(dst_write_start + frame_data.len())
404 }
405 }
406
407 impl<R: fmt::Debug + io::Read> fmt::Debug for FrameEncoder<R> {
408 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
409 f.debug_struct("FrameEncoder")
410 .field("inner", &self.inner)
411 .field("dst", &"[...]")
412 .field("dsts", &self.dsts)
413 .field("dste", &self.dste)
414 .finish()
415 }
416 }
417
418 impl<R: fmt::Debug + io::Read> fmt::Debug for Inner<R> {
419 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
420 f.debug_struct("Inner")
421 .field("r", &self.r)
422 .field("enc", &self.enc)
423 .field("checksummer", &self.checksummer)
424 .field("src", &"[...]")
425 .field("wrote_stream_ident", &self.wrote_stream_ident)
426 .finish()
427 }
428 }
429
430 // read_exact_eof is like Read::read_exact, except it converts an UnexpectedEof
431 // error to a bool of false.
432 //
433 // If no error occurred, then this returns true.
434 fn read_exact_eof<R: io::Read>(
435 rdr: &mut R,
436 buf: &mut [u8],
437 ) -> io::Result<bool> {
438 use std::io::ErrorKind::UnexpectedEof;
439 match rdr.read_exact(buf) {
440 Ok(()) => Ok(true),
441 Err(ref err) if err.kind() == UnexpectedEof => Ok(false),
442 Err(err) => Err(err),
443 }
444 }