const MAX_BLOB_SIZE: usize = 128*1024*1024;
-use super::*;
+use super::file_formats::*;
+use super::CryptConfig;
+
+/// Encoded data chunk with digest and positional information
+pub struct ChunkInfo {
+ pub chunk: DataBlob,
+ pub digest: [u8; 32],
+ pub chunk_len: u64,
+ pub offset: u64,
+}
/// Data blob binary storage format
///
/// Data blobs store arbitrary binary data (< 128MB), and can be
-/// compressed and encrypted. A simply binary format is used to store
-/// them on disk or transfer them over the network. Please use index
-/// files to store large data files (".fidx" of ".didx").
+/// compressed and encrypted (or just signed). A simply binary format
+/// is used to store them on disk or transfer them over the network.
+///
+/// Please use index files to store large data files (".fidx" of
+/// ".didx").
///
pub struct DataBlob {
raw_data: Vec<u8>, // tagged, compressed, encryped data
/// accessor to crc32 checksum
pub fn crc(&self) -> u32 {
- let crc_o = proxmox::tools::offsetof!(DataBlobHeader, crc);
+ let crc_o = proxmox::offsetof!(DataBlobHeader, crc);
u32::from_le_bytes(self.raw_data[crc_o..crc_o+4].try_into().unwrap())
}
// set the CRC checksum field
pub fn set_crc(&mut self, crc: u32) {
- let crc_o = proxmox::tools::offsetof!(DataBlobHeader, crc);
+ let crc_o = proxmox::offsetof!(DataBlobHeader, crc);
self.raw_data[crc_o..crc_o+4].copy_from_slice(&crc.to_le_bytes());
}
/// compute the CRC32 checksum
pub fn compute_crc(&self) -> u32 {
let mut hasher = crc32fast::Hasher::new();
- let start = std::mem::size_of::<DataBlobHeader>(); // start after HEAD
+ let start = header_size(self.magic()); // start after HEAD
hasher.update(&self.raw_data[start..]);
hasher.finalize()
}
if magic == &UNCOMPRESSED_BLOB_MAGIC_1_0 {
let data_start = std::mem::size_of::<DataBlobHeader>();
- return Ok(self.raw_data[data_start..].to_vec());
+ Ok(self.raw_data[data_start..].to_vec())
} else if magic == &COMPRESSED_BLOB_MAGIC_1_0 {
let data_start = std::mem::size_of::<DataBlobHeader>();
let data = zstd::block::decompress(&self.raw_data[data_start..], MAX_BLOB_SIZE)?;
- return Ok(data);
+ Ok(data)
} else if magic == &ENCR_COMPR_BLOB_MAGIC_1_0 || magic == &ENCRYPTED_BLOB_MAGIC_1_0 {
let header_len = std::mem::size_of::<EncryptedDataBlobHeader>();
let head = unsafe {
} else {
config.decode_uncompressed_chunk(&self.raw_data[header_len..], &head.iv, &head.tag)?
};
- return Ok(data);
+ Ok(data)
} else {
bail!("unable to decrypt blob - missing CryptConfig");
}
if magic == &AUTH_COMPR_BLOB_MAGIC_1_0 {
let data = zstd::block::decompress(&self.raw_data[data_start..], 16*1024*1024)?;
- return Ok(data);
+ Ok(data)
} else {
- return Ok(self.raw_data[data_start..].to_vec());
+ Ok(self.raw_data[data_start..].to_vec())
}
} else {
bail!("Invalid blob magic number.");
let mut blob = DataBlob { raw_data };
blob.set_crc(blob.compute_crc());
- return Ok(blob);
+ Ok(blob)
+ }
+
+ /// Load blob from ``reader``
+ pub fn load(reader: &mut dyn std::io::Read) -> Result<Self, Error> {
+
+ let mut data = Vec::with_capacity(1024*1024);
+ reader.read_to_end(&mut data)?;
+
+ Self::from_raw(data)
}
/// Create Instance from raw data
}
}
-}
+ /// Verify digest and data length for unencrypted chunks.
+ ///
+ /// To do that, we need to decompress data first. Please note that
+ /// this is noth possible for encrypted chunks.
+ pub fn verify_unencrypted(
+ &self,
+ expected_chunk_size: usize,
+ expected_digest: &[u8; 32],
+ ) -> Result<(), Error> {
-// TODO: impl. other blob types
+ let magic = self.magic();
+
+ let verify_raw_data = |data: &[u8]| {
+ if expected_chunk_size != data.len() {
+ bail!("detected chunk with wrong length ({} != {})", expected_chunk_size, data.len());
+ }
+ let digest = openssl::sha::sha256(data);
+ if &digest != expected_digest {
+ bail!("detected chunk with wrong digest.");
+ }
+ Ok(())
+ };
-use std::io::{Read, BufRead, Write, Seek, SeekFrom};
+ if magic == &COMPRESSED_BLOB_MAGIC_1_0 {
+ let data = zstd::block::decompress(&self.raw_data[12..], 16*1024*1024)?;
+ verify_raw_data(&data)?;
+ } else if magic == &UNCOMPRESSED_BLOB_MAGIC_1_0 {
+ verify_raw_data(&self.raw_data[12..])?;
+ }
-enum BlobWriterState<'a, W: Write> {
- Uncompressed { writer: W, hasher: crc32fast::Hasher },
- Compressed { compr: zstd::stream::write::Encoder<W>, hasher: crc32fast::Hasher },
- Signed {
- writer: W,
- hasher: crc32fast::Hasher,
- signer: openssl::sign::Signer<'a>,
- },
+ Ok(())
+ }
}
-/// Write compressed data blobs
-pub struct DataBlobWriter<'a, W: Write> {
- state: BlobWriterState<'a, W>,
+/// Builder for chunk DataBlobs
+///
+/// Main purpose is to centralize digest computation. Digest
+/// computation differ for encryped chunk, and this interface ensures that
+/// we always compute the correct one.
+pub struct DataChunkBuilder<'a, 'b> {
+ config: Option<&'b CryptConfig>,
+ orig_data: &'a [u8],
+ digest_computed: bool,
+ digest: [u8; 32],
+ compress: bool,
}
-impl <'a, W: Write + Seek> DataBlobWriter<'a, W> {
+impl <'a, 'b> DataChunkBuilder<'a, 'b> {
- pub fn new_uncompressed(mut writer: W) -> Result<Self, Error> {
- let hasher = crc32fast::Hasher::new();
- writer.seek(SeekFrom::Start(0))?;
- let head = DataBlobHeader { magic: UNCOMPRESSED_BLOB_MAGIC_1_0, crc: [0; 4] };
- unsafe {
- writer.write_le_value(head)?;
+ /// Create a new builder instance.
+ pub fn new(orig_data: &'a [u8]) -> Self {
+ Self {
+ orig_data,
+ config: None,
+ digest_computed: false,
+ digest: [0u8; 32],
+ compress: true,
}
- let state = BlobWriterState::Uncompressed { writer, hasher };
- Ok(Self { state })
}
- pub fn new_compressed(mut writer: W) -> Result<Self, Error> {
- let hasher = crc32fast::Hasher::new();
- writer.seek(SeekFrom::Start(0))?;
- let head = DataBlobHeader { magic: COMPRESSED_BLOB_MAGIC_1_0, crc: [0; 4] };
- unsafe {
- writer.write_le_value(head)?;
- }
- let compr = zstd::stream::write::Encoder::new(writer, 1)?;
- let state = BlobWriterState::Compressed { compr, hasher };
- Ok(Self { state })
+ /// Set compression flag.
+ ///
+ /// If true, chunk data is compressed using zstd (level 1).
+ pub fn compress(mut self, value: bool) -> Self {
+ self.compress = value;
+ self
}
- pub fn new_signed(mut writer: W, config: &'a CryptConfig) -> Result<Self, Error> {
- let hasher = crc32fast::Hasher::new();
- writer.seek(SeekFrom::Start(0))?;
- let head = AuthenticatedDataBlobHeader {
- head: DataBlobHeader { magic: AUTHENTICATED_BLOB_MAGIC_1_0, crc: [0; 4] },
- tag: [0u8; 32],
- };
- unsafe {
- writer.write_le_value(head)?;
+ /// Set encryption Configuration
+ ///
+ /// If set, chunks are encrypted.
+ pub fn crypt_config(mut self, value: &'b CryptConfig) -> Self {
+ if self.digest_computed {
+ panic!("unable to set crypt_config after compute_digest().");
}
- let signer = config.data_signer();
-
- let state = BlobWriterState::Signed { writer, hasher, signer };
- Ok(Self { state })
+ self.config = Some(value);
+ self
}
- pub fn finish(self) -> Result<W, Error> {
- match self.state {
- BlobWriterState::Uncompressed { mut writer, hasher } => {
- // write CRC
- let crc = hasher.finalize();
- let head = DataBlobHeader { magic: COMPRESSED_BLOB_MAGIC_1_0, crc: crc.to_le_bytes() };
-
- writer.seek(SeekFrom::Start(0))?;
- unsafe {
- writer.write_le_value(head)?;
- }
-
- return Ok(writer)
- }
- BlobWriterState::Compressed { compr, hasher } => {
- let mut writer = compr.finish()?;
-
- // write CRC
- let crc = hasher.finalize();
- let head = DataBlobHeader { magic: COMPRESSED_BLOB_MAGIC_1_0, crc: crc.to_le_bytes() };
-
- writer.seek(SeekFrom::Start(0))?;
- unsafe {
- writer.write_le_value(head)?;
- }
-
- return Ok(writer)
- }
- BlobWriterState::Signed { mut writer, hasher, signer, .. } => {
- // write CRC and hmac
- let crc = hasher.finalize();
-
- let mut head = AuthenticatedDataBlobHeader {
- head: DataBlobHeader { magic: AUTHENTICATED_BLOB_MAGIC_1_0, crc: crc.to_le_bytes() },
- tag: [0u8; 32],
- };
- signer.sign(&mut head.tag)?;
-
- writer.seek(SeekFrom::Start(0))?;
- unsafe {
- writer.write_le_value(head)?;
- }
-
- return Ok(writer)
+ fn compute_digest(&mut self) {
+ if !self.digest_computed {
+ if let Some(ref config) = self.config {
+ self.digest = config.compute_digest(self.orig_data);
+ } else {
+ self.digest = openssl::sha::sha256(self.orig_data);
}
+ self.digest_computed = true;
}
}
-}
-impl <'a, W: Write + Seek> Write for DataBlobWriter<'a, W> {
-
- fn write(&mut self, buf: &[u8]) -> Result<usize, std::io::Error> {
- match self.state {
- BlobWriterState::Uncompressed { ref mut writer, ref mut hasher } => {
- hasher.update(buf);
- writer.write(buf)
- }
- BlobWriterState::Compressed { ref mut compr, ref mut hasher } => {
- hasher.update(buf);
- compr.write(buf)
- }
- BlobWriterState::Signed { ref mut writer, ref mut hasher, ref mut signer, .. } => {
- hasher.update(buf);
- signer.update(buf).
- map_err(|err| {
- std::io::Error::new(
- std::io::ErrorKind::Other,
- format!("hmac update failed - {}", err))
- })?;
- writer.write(buf)
- }
+ /// Returns the chunk Digest
+ ///
+ /// Note: For encrypted chunks, this needs to be called after
+ /// ``crypt_config``.
+ pub fn digest(&mut self) -> &[u8; 32] {
+ if !self.digest_computed {
+ self.compute_digest();
}
+ &self.digest
}
- fn flush(&mut self) -> Result<(), std::io::Error> {
- match self.state {
- BlobWriterState::Uncompressed { ref mut writer, .. } => {
- writer.flush()
- }
- BlobWriterState::Compressed { ref mut compr, .. } => {
- compr.flush()
- }
- BlobWriterState::Signed { ref mut writer, .. } => {
- writer.flush()
- }
+ /// Consume self and build the ``DataBlob``.
+ ///
+ /// Returns the blob and the computet digest.
+ pub fn build(mut self) -> Result<(DataBlob, [u8; 32]), Error> {
+ if !self.digest_computed {
+ self.compute_digest();
}
- }
-}
-
-/// Read compressed data blobs
-pub struct CompressedDataBlobReader<R: BufRead> {
- decompr: zstd::stream::read::Decoder<R>,
- hasher: Option<crc32fast::Hasher>,
- expected_crc: u32,
-}
-
-impl <R: BufRead> CompressedDataBlobReader<R> {
- pub fn new(mut reader: R) -> Result<Self, Error> {
+ let chunk = DataBlob::encode(
+ self.orig_data,
+ self.config,
+ self.compress,
+ )?;
- let head: DataBlobHeader = unsafe { reader.read_le_value()? };
- if head.magic != COMPRESSED_BLOB_MAGIC_1_0 {
- bail!("got wrong magic number");
- }
- let expected_crc = u32::from_le_bytes(head.crc);
- let decompr = zstd::stream::read::Decoder::with_buffer(reader)?;
- Ok(Self { decompr: decompr, hasher: Some(crc32fast::Hasher::new()), expected_crc })
+ Ok((chunk, self.digest))
}
-}
-impl <R: BufRead> Read for CompressedDataBlobReader<R> {
+ /// Create a chunk filled with zeroes
+ pub fn build_zero_chunk(
+ crypt_config: Option<&CryptConfig>,
+ chunk_size: usize,
+ compress: bool,
+ ) -> Result<(DataBlob, [u8; 32]), Error> {
- fn read(&mut self, buf: &mut [u8]) -> Result<usize, std::io::Error> {
- let count = self.decompr.read(buf)?;
- if count == 0 { // EOF, verify crc
- let hasher = self.hasher.take().expect("blob reader already finished");
- let crc = hasher.finalize();
- if crc != self.expected_crc {
- return Err(std::io::Error::new(std::io::ErrorKind::Other, "blob reader crc error"));
- }
- } else {
- let hasher = self.hasher.as_mut().expect("blob reader already finished");
- hasher.update(buf);
+ let mut zero_bytes = Vec::with_capacity(chunk_size);
+ zero_bytes.resize(chunk_size, 0u8);
+ let mut chunk_builder = DataChunkBuilder::new(&zero_bytes).compress(compress);
+ if let Some(ref crypt_config) = crypt_config {
+ chunk_builder = chunk_builder.crypt_config(crypt_config);
}
- Ok(count)
+
+ chunk_builder.build()
}
+
}