]> git.proxmox.com Git - proxmox-backup.git/blobdiff - src/backup/data_blob.rs
bump proxmox crate to 0.1.7
[proxmox-backup.git] / src / backup / data_blob.rs
index 2bc11faf42d367b10d3f75878595ec6ca1da8d96..3a5a05e0dbd0ecda64169e4a3b1f277777c13cd3 100644 (file)
@@ -5,14 +5,25 @@ use proxmox::tools::io::{ReadExt, WriteExt};
 
 const MAX_BLOB_SIZE: usize = 128*1024*1024;
 
-use super::*;
+use super::file_formats::*;
+use super::CryptConfig;
+
+/// Encoded data chunk with digest and positional information
+pub struct ChunkInfo {
+    pub chunk: DataBlob,
+    pub digest: [u8; 32],
+    pub chunk_len: u64,
+    pub offset: u64,
+}
 
 /// Data blob binary storage format
 ///
 /// Data blobs store arbitrary binary data (< 128MB), and can be
-/// compressed and encrypted. A simply binary format is used to store
-/// them on disk or transfer them over the network. Please use index
-/// files to store large data files (".fidx" of ".didx").
+/// compressed and encrypted (or just signed). A simply binary format
+/// is used to store them on disk or transfer them over the network.
+///
+/// Please use index files to store large data files (".fidx" of
+/// ".didx").
 ///
 pub struct DataBlob {
     raw_data: Vec<u8>, // tagged, compressed, encryped data
@@ -37,20 +48,20 @@ impl DataBlob {
 
     /// accessor to crc32 checksum
     pub fn crc(&self) -> u32 {
-        let crc_o = proxmox::tools::offsetof!(DataBlobHeader, crc);
+        let crc_o = proxmox::offsetof!(DataBlobHeader, crc);
         u32::from_le_bytes(self.raw_data[crc_o..crc_o+4].try_into().unwrap())
     }
 
     // set the CRC checksum field
     pub fn set_crc(&mut self, crc: u32) {
-        let crc_o = proxmox::tools::offsetof!(DataBlobHeader, crc);
+        let crc_o = proxmox::offsetof!(DataBlobHeader, crc);
         self.raw_data[crc_o..crc_o+4].copy_from_slice(&crc.to_le_bytes());
     }
 
     /// compute the CRC32 checksum
     pub fn compute_crc(&self) -> u32 {
         let mut hasher = crc32fast::Hasher::new();
-        let start = std::mem::size_of::<DataBlobHeader>(); // start after HEAD
+        let start = header_size(self.magic()); // start after HEAD
         hasher.update(&self.raw_data[start..]);
         hasher.finalize()
     }
@@ -162,11 +173,11 @@ impl DataBlob {
 
         if magic == &UNCOMPRESSED_BLOB_MAGIC_1_0 {
             let data_start = std::mem::size_of::<DataBlobHeader>();
-            return Ok(self.raw_data[data_start..].to_vec());
+            Ok(self.raw_data[data_start..].to_vec())
         } else if magic == &COMPRESSED_BLOB_MAGIC_1_0 {
             let data_start = std::mem::size_of::<DataBlobHeader>();
             let data = zstd::block::decompress(&self.raw_data[data_start..], MAX_BLOB_SIZE)?;
-            return Ok(data);
+            Ok(data)
         } else if magic == &ENCR_COMPR_BLOB_MAGIC_1_0 || magic == &ENCRYPTED_BLOB_MAGIC_1_0 {
             let header_len = std::mem::size_of::<EncryptedDataBlobHeader>();
             let head = unsafe {
@@ -179,7 +190,7 @@ impl DataBlob {
                 } else {
                     config.decode_uncompressed_chunk(&self.raw_data[header_len..], &head.iv, &head.tag)?
                 };
-                return Ok(data);
+                Ok(data)
             } else {
                 bail!("unable to decrypt blob - missing CryptConfig");
             }
@@ -201,9 +212,9 @@ impl DataBlob {
 
             if magic == &AUTH_COMPR_BLOB_MAGIC_1_0 {
                 let data = zstd::block::decompress(&self.raw_data[data_start..], 16*1024*1024)?;
-                return Ok(data);
+                Ok(data)
             } else {
-                return Ok(self.raw_data[data_start..].to_vec());
+                Ok(self.raw_data[data_start..].to_vec())
             }
         } else {
             bail!("Invalid blob magic number.");
@@ -249,7 +260,16 @@ impl DataBlob {
         let mut blob = DataBlob { raw_data };
         blob.set_crc(blob.compute_crc());
 
-        return Ok(blob);
+        Ok(blob)
+    }
+
+    /// Load blob from ``reader``
+    pub fn load(reader: &mut dyn std::io::Read) -> Result<Self, Error> {
+
+        let mut data = Vec::with_capacity(1024*1024);
+        reader.read_to_end(&mut data)?;
+
+        Self::from_raw(data)
     }
 
     /// Create Instance from raw data
@@ -288,192 +308,139 @@ impl DataBlob {
         }
     }
 
-}
+    /// Verify digest and data length for unencrypted chunks.
+    ///
+    /// To do that, we need to decompress data first. Please note that
+    /// this is noth possible for encrypted chunks.
+    pub fn verify_unencrypted(
+        &self,
+        expected_chunk_size: usize,
+        expected_digest: &[u8; 32],
+    ) -> Result<(), Error> {
 
-// TODO: impl. other blob types
+        let magic = self.magic();
+
+        let verify_raw_data = |data: &[u8]| {
+            if expected_chunk_size != data.len() {
+                bail!("detected chunk with wrong length ({} != {})", expected_chunk_size, data.len());
+            }
+            let digest = openssl::sha::sha256(data);
+            if &digest != expected_digest {
+                bail!("detected chunk with wrong digest.");
+            }
+            Ok(())
+        };
 
-use std::io::{Read, BufRead, Write, Seek, SeekFrom};
+        if magic == &COMPRESSED_BLOB_MAGIC_1_0 {
+            let data = zstd::block::decompress(&self.raw_data[12..], 16*1024*1024)?;
+            verify_raw_data(&data)?;
+        } else if magic == &UNCOMPRESSED_BLOB_MAGIC_1_0 {
+            verify_raw_data(&self.raw_data[12..])?;
+        }
 
-enum BlobWriterState<'a, W: Write> {
-    Uncompressed { writer: W, hasher: crc32fast::Hasher },
-    Compressed { compr: zstd::stream::write::Encoder<W>, hasher: crc32fast::Hasher },
-    Signed {
-        writer: W,
-        hasher: crc32fast::Hasher,
-        signer: openssl::sign::Signer<'a>,
-    },
+        Ok(())
+    }
 }
 
-/// Write compressed data blobs
-pub struct DataBlobWriter<'a, W: Write> {
-    state: BlobWriterState<'a, W>,
+/// Builder for chunk DataBlobs
+///
+/// Main purpose is to centralize digest computation. Digest
+/// computation differ for encryped chunk, and this interface ensures that
+/// we always compute the correct one.
+pub struct DataChunkBuilder<'a, 'b> {
+    config: Option<&'b CryptConfig>,
+    orig_data: &'a [u8],
+    digest_computed: bool,
+    digest: [u8; 32],
+    compress: bool,
 }
 
-impl <'a, W: Write + Seek> DataBlobWriter<'a, W> {
+impl <'a, 'b> DataChunkBuilder<'a, 'b> {
 
-    pub fn new_uncompressed(mut writer: W) -> Result<Self, Error> {
-        let hasher = crc32fast::Hasher::new();
-        writer.seek(SeekFrom::Start(0))?;
-        let head = DataBlobHeader { magic: UNCOMPRESSED_BLOB_MAGIC_1_0, crc: [0; 4] };
-        unsafe {
-            writer.write_le_value(head)?;
+    /// Create a new builder instance.
+    pub fn new(orig_data: &'a [u8]) -> Self {
+        Self {
+            orig_data,
+            config: None,
+            digest_computed: false,
+            digest: [0u8; 32],
+            compress: true,
         }
-        let state = BlobWriterState::Uncompressed { writer, hasher };
-        Ok(Self { state })
     }
 
-    pub fn new_compressed(mut writer: W) -> Result<Self, Error> {
-        let hasher = crc32fast::Hasher::new();
-        writer.seek(SeekFrom::Start(0))?;
-        let head = DataBlobHeader { magic: COMPRESSED_BLOB_MAGIC_1_0, crc: [0; 4] };
-        unsafe {
-            writer.write_le_value(head)?;
-        }
-        let compr = zstd::stream::write::Encoder::new(writer, 1)?;
-        let state = BlobWriterState::Compressed { compr, hasher };
-        Ok(Self { state })
+    /// Set compression flag.
+    ///
+    /// If true, chunk data is compressed using zstd (level 1).
+    pub fn compress(mut self, value: bool) -> Self {
+        self.compress = value;
+        self
     }
 
-    pub fn new_signed(mut writer: W, config: &'a CryptConfig) -> Result<Self, Error> {
-        let hasher = crc32fast::Hasher::new();
-        writer.seek(SeekFrom::Start(0))?;
-        let head = AuthenticatedDataBlobHeader {
-            head: DataBlobHeader { magic: AUTHENTICATED_BLOB_MAGIC_1_0, crc: [0; 4] },
-            tag: [0u8; 32],
-        };
-        unsafe {
-            writer.write_le_value(head)?;
+    /// Set encryption Configuration
+    ///
+    /// If set, chunks are encrypted.
+    pub fn crypt_config(mut self, value: &'b CryptConfig) -> Self {
+        if self.digest_computed {
+            panic!("unable to set crypt_config after compute_digest().");
         }
-        let signer = config.data_signer();
-
-        let state = BlobWriterState::Signed { writer, hasher, signer };
-        Ok(Self { state })
+        self.config = Some(value);
+        self
     }
 
-    pub fn finish(self) -> Result<W, Error> {
-        match self.state {
-            BlobWriterState::Uncompressed { mut writer, hasher } => {
-                // write CRC
-                let crc = hasher.finalize();
-                let head = DataBlobHeader { magic: COMPRESSED_BLOB_MAGIC_1_0, crc: crc.to_le_bytes() };
-
-                writer.seek(SeekFrom::Start(0))?;
-                unsafe {
-                    writer.write_le_value(head)?;
-                }
-
-                return Ok(writer)
-            }
-            BlobWriterState::Compressed { compr, hasher } => {
-                let mut writer = compr.finish()?;
-
-                // write CRC
-                let crc = hasher.finalize();
-                let head = DataBlobHeader { magic: COMPRESSED_BLOB_MAGIC_1_0, crc: crc.to_le_bytes() };
-
-                writer.seek(SeekFrom::Start(0))?;
-                unsafe {
-                    writer.write_le_value(head)?;
-                }
-
-                return Ok(writer)
-            }
-            BlobWriterState::Signed { mut writer, hasher, signer, .. } => {
-                // write CRC and hmac
-                let crc = hasher.finalize();
-
-                let mut head = AuthenticatedDataBlobHeader {
-                    head: DataBlobHeader { magic: AUTHENTICATED_BLOB_MAGIC_1_0, crc: crc.to_le_bytes() },
-                    tag: [0u8; 32],
-                };
-                signer.sign(&mut head.tag)?;
-
-                writer.seek(SeekFrom::Start(0))?;
-                unsafe {
-                    writer.write_le_value(head)?;
-                }
-
-                return Ok(writer)
+    fn compute_digest(&mut self) {
+        if !self.digest_computed {
+            if let Some(ref config) = self.config {
+                self.digest = config.compute_digest(self.orig_data);
+            } else {
+                self.digest = openssl::sha::sha256(self.orig_data);
             }
+            self.digest_computed = true;
         }
     }
-}
 
-impl <'a, W: Write + Seek> Write for DataBlobWriter<'a, W> {
-
-    fn write(&mut self, buf: &[u8]) -> Result<usize, std::io::Error> {
-        match self.state {
-            BlobWriterState::Uncompressed { ref mut writer, ref mut hasher } => {
-                hasher.update(buf);
-                writer.write(buf)
-            }
-            BlobWriterState::Compressed { ref mut compr, ref mut hasher } => {
-                hasher.update(buf);
-                compr.write(buf)
-            }
-            BlobWriterState::Signed { ref mut writer, ref mut hasher, ref mut signer, .. } => {
-                hasher.update(buf);
-                signer.update(buf).
-                    map_err(|err| {
-                        std::io::Error::new(
-                            std::io::ErrorKind::Other,
-                            format!("hmac update failed - {}", err))
-                    })?;
-                writer.write(buf)
-             }
+    /// Returns the chunk Digest
+    ///
+    /// Note: For encrypted chunks, this needs to be called after
+    /// ``crypt_config``.
+    pub fn digest(&mut self) -> &[u8; 32] {
+        if !self.digest_computed {
+            self.compute_digest();
         }
+        &self.digest
     }
 
-    fn flush(&mut self) -> Result<(), std::io::Error> {
-        match self.state {
-            BlobWriterState::Uncompressed { ref mut writer, .. } => {
-                writer.flush()
-            }
-            BlobWriterState::Compressed { ref mut compr, .. } => {
-                compr.flush()
-            }
-            BlobWriterState::Signed { ref mut writer, .. } => {
-                writer.flush()
-            }
+    /// Consume self and build the ``DataBlob``.
+    ///
+    /// Returns the blob and the computet digest.
+    pub fn build(mut self) -> Result<(DataBlob, [u8; 32]), Error> {
+        if !self.digest_computed {
+            self.compute_digest();
         }
-    }
-}
-
-/// Read compressed data blobs
-pub struct CompressedDataBlobReader<R: BufRead> {
-    decompr: zstd::stream::read::Decoder<R>,
-    hasher: Option<crc32fast::Hasher>,
-    expected_crc: u32,
-}
-
-impl <R: BufRead> CompressedDataBlobReader<R> {
 
-    pub fn new(mut reader: R) -> Result<Self, Error> {
+        let chunk = DataBlob::encode(
+            self.orig_data,
+            self.config,
+            self.compress,
+        )?;
 
-        let head: DataBlobHeader = unsafe { reader.read_le_value()? };
-        if head.magic != COMPRESSED_BLOB_MAGIC_1_0 {
-            bail!("got wrong magic number");
-        }
-        let expected_crc = u32::from_le_bytes(head.crc);
-        let decompr = zstd::stream::read::Decoder::with_buffer(reader)?;
-        Ok(Self { decompr: decompr, hasher: Some(crc32fast::Hasher::new()), expected_crc })
+        Ok((chunk, self.digest))
     }
-}
 
-impl <R: BufRead> Read for CompressedDataBlobReader<R> {
+    /// Create a chunk filled with zeroes
+    pub fn build_zero_chunk(
+        crypt_config: Option<&CryptConfig>,
+        chunk_size: usize,
+        compress: bool,
+    ) -> Result<(DataBlob, [u8; 32]), Error> {
 
-    fn read(&mut self, buf: &mut [u8]) -> Result<usize, std::io::Error> {
-        let count = self.decompr.read(buf)?;
-        if count == 0 { // EOF, verify crc
-            let hasher = self.hasher.take().expect("blob reader already finished");
-            let crc = hasher.finalize();
-            if crc != self.expected_crc {
-                return Err(std::io::Error::new(std::io::ErrorKind::Other, "blob reader crc error"));
-            }
-        } else {
-            let hasher = self.hasher.as_mut().expect("blob reader already finished");
-            hasher.update(buf);
+        let mut zero_bytes = Vec::with_capacity(chunk_size);
+        zero_bytes.resize(chunk_size, 0u8);
+        let mut chunk_builder = DataChunkBuilder::new(&zero_bytes).compress(compress);
+        if let Some(ref crypt_config) = crypt_config {
+            chunk_builder = chunk_builder.crypt_config(crypt_config);
         }
-        Ok(count)
+
+        chunk_builder.build()
     }
+
 }