]> git.proxmox.com Git - proxmox-backup.git/blob - src/backup/data_blob.rs
src/backup/file_formats.rs: remove signed chunks
[proxmox-backup.git] / src / backup / data_blob.rs
1 use anyhow::{bail, Error};
2 use std::convert::TryInto;
3
4 use proxmox::tools::io::{ReadExt, WriteExt};
5
6 use super::file_formats::*;
7 use super::{CryptConfig, CryptMode};
8
9 const MAX_BLOB_SIZE: usize = 128*1024*1024;
10
11 /// Encoded data chunk with digest and positional information
12 pub struct ChunkInfo {
13 pub chunk: DataBlob,
14 pub digest: [u8; 32],
15 pub chunk_len: u64,
16 pub offset: u64,
17 }
18
19 /// Data blob binary storage format
20 ///
21 /// Data blobs store arbitrary binary data (< 128MB), and can be
22 /// compressed and encrypted (or just signed). A simply binary format
23 /// is used to store them on disk or transfer them over the network.
24 ///
25 /// Please use index files to store large data files (".fidx" of
26 /// ".didx").
27 ///
28 pub struct DataBlob {
29 raw_data: Vec<u8>, // tagged, compressed, encryped data
30 }
31
32 impl DataBlob {
33
34 /// accessor to raw_data field
35 pub fn raw_data(&self) -> &[u8] {
36 &self.raw_data
37 }
38
39 /// Consume self and returns raw_data
40 pub fn into_inner(self) -> Vec<u8> {
41 self.raw_data
42 }
43
44 /// accessor to chunk type (magic number)
45 pub fn magic(&self) -> &[u8; 8] {
46 self.raw_data[0..8].try_into().unwrap()
47 }
48
49 /// accessor to crc32 checksum
50 pub fn crc(&self) -> u32 {
51 let crc_o = proxmox::offsetof!(DataBlobHeader, crc);
52 u32::from_le_bytes(self.raw_data[crc_o..crc_o+4].try_into().unwrap())
53 }
54
55 // set the CRC checksum field
56 pub fn set_crc(&mut self, crc: u32) {
57 let crc_o = proxmox::offsetof!(DataBlobHeader, crc);
58 self.raw_data[crc_o..crc_o+4].copy_from_slice(&crc.to_le_bytes());
59 }
60
61 /// compute the CRC32 checksum
62 pub fn compute_crc(&self) -> u32 {
63 let mut hasher = crc32fast::Hasher::new();
64 let start = header_size(self.magic()); // start after HEAD
65 hasher.update(&self.raw_data[start..]);
66 hasher.finalize()
67 }
68
69 /// verify the CRC32 checksum
70 pub fn verify_crc(&self) -> Result<(), Error> {
71 let expected_crc = self.compute_crc();
72 if expected_crc != self.crc() {
73 bail!("Data blob has wrong CRC checksum.");
74 }
75 Ok(())
76 }
77
78 /// Create a DataBlob, optionally compressed and/or encrypted
79 pub fn encode(
80 data: &[u8],
81 config: Option<&CryptConfig>,
82 compress: bool,
83 ) -> Result<Self, Error> {
84
85 if data.len() > MAX_BLOB_SIZE {
86 bail!("data blob too large ({} bytes).", data.len());
87 }
88
89 let mut blob = if let Some(config) = config {
90
91 let compr_data;
92 let (_compress, data, magic) = if compress {
93 compr_data = zstd::block::compress(data, 1)?;
94 // Note: We only use compression if result is shorter
95 if compr_data.len() < data.len() {
96 (true, &compr_data[..], ENCR_COMPR_BLOB_MAGIC_1_0)
97 } else {
98 (false, data, ENCRYPTED_BLOB_MAGIC_1_0)
99 }
100 } else {
101 (false, data, ENCRYPTED_BLOB_MAGIC_1_0)
102 };
103
104 let header_len = std::mem::size_of::<EncryptedDataBlobHeader>();
105 let mut raw_data = Vec::with_capacity(data.len() + header_len);
106
107 let dummy_head = EncryptedDataBlobHeader {
108 head: DataBlobHeader { magic: [0u8; 8], crc: [0; 4] },
109 iv: [0u8; 16],
110 tag: [0u8; 16],
111 };
112 unsafe {
113 raw_data.write_le_value(dummy_head)?;
114 }
115
116 let (iv, tag) = config.encrypt_to(data, &mut raw_data)?;
117
118 let head = EncryptedDataBlobHeader {
119 head: DataBlobHeader { magic, crc: [0; 4] }, iv, tag,
120 };
121
122 unsafe {
123 (&mut raw_data[0..header_len]).write_le_value(head)?;
124 }
125
126 DataBlob { raw_data }
127 } else {
128
129 let max_data_len = data.len() + std::mem::size_of::<DataBlobHeader>();
130 if compress {
131 let mut comp_data = Vec::with_capacity(max_data_len);
132
133 let head = DataBlobHeader {
134 magic: COMPRESSED_BLOB_MAGIC_1_0,
135 crc: [0; 4],
136 };
137 unsafe {
138 comp_data.write_le_value(head)?;
139 }
140
141 zstd::stream::copy_encode(data, &mut comp_data, 1)?;
142
143 if comp_data.len() < max_data_len {
144 let mut blob = DataBlob { raw_data: comp_data };
145 blob.set_crc(blob.compute_crc());
146 return Ok(blob);
147 }
148 }
149
150 let mut raw_data = Vec::with_capacity(max_data_len);
151
152 let head = DataBlobHeader {
153 magic: UNCOMPRESSED_BLOB_MAGIC_1_0,
154 crc: [0; 4],
155 };
156 unsafe {
157 raw_data.write_le_value(head)?;
158 }
159 raw_data.extend_from_slice(data);
160
161 DataBlob { raw_data }
162 };
163
164 blob.set_crc(blob.compute_crc());
165
166 Ok(blob)
167 }
168
169 /// Get the encryption mode for this blob.
170 pub fn crypt_mode(&self) -> Result<CryptMode, Error> {
171 let magic = self.magic();
172
173 Ok(if magic == &UNCOMPRESSED_BLOB_MAGIC_1_0 || magic == &COMPRESSED_BLOB_MAGIC_1_0 {
174 CryptMode::None
175 } else if magic == &ENCR_COMPR_BLOB_MAGIC_1_0 || magic == &ENCRYPTED_BLOB_MAGIC_1_0 {
176 CryptMode::Encrypt
177 } else {
178 bail!("Invalid blob magic number.");
179 })
180 }
181
182 /// Decode blob data
183 pub fn decode(&self, config: Option<&CryptConfig>) -> Result<Vec<u8>, Error> {
184
185 let magic = self.magic();
186
187 if magic == &UNCOMPRESSED_BLOB_MAGIC_1_0 {
188 let data_start = std::mem::size_of::<DataBlobHeader>();
189 Ok(self.raw_data[data_start..].to_vec())
190 } else if magic == &COMPRESSED_BLOB_MAGIC_1_0 {
191 let data_start = std::mem::size_of::<DataBlobHeader>();
192 let data = zstd::block::decompress(&self.raw_data[data_start..], MAX_BLOB_SIZE)?;
193 Ok(data)
194 } else if magic == &ENCR_COMPR_BLOB_MAGIC_1_0 || magic == &ENCRYPTED_BLOB_MAGIC_1_0 {
195 let header_len = std::mem::size_of::<EncryptedDataBlobHeader>();
196 let head = unsafe {
197 (&self.raw_data[..header_len]).read_le_value::<EncryptedDataBlobHeader>()?
198 };
199
200 if let Some(config) = config {
201 let data = if magic == &ENCR_COMPR_BLOB_MAGIC_1_0 {
202 config.decode_compressed_chunk(&self.raw_data[header_len..], &head.iv, &head.tag)?
203 } else {
204 config.decode_uncompressed_chunk(&self.raw_data[header_len..], &head.iv, &head.tag)?
205 };
206 Ok(data)
207 } else {
208 bail!("unable to decrypt blob - missing CryptConfig");
209 }
210 } else {
211 bail!("Invalid blob magic number.");
212 }
213 }
214
215 /// Load blob from ``reader``
216 pub fn load(reader: &mut dyn std::io::Read) -> Result<Self, Error> {
217
218 let mut data = Vec::with_capacity(1024*1024);
219 reader.read_to_end(&mut data)?;
220
221 Self::from_raw(data)
222 }
223
224 /// Create Instance from raw data
225 pub fn from_raw(data: Vec<u8>) -> Result<Self, Error> {
226
227 if data.len() < std::mem::size_of::<DataBlobHeader>() {
228 bail!("blob too small ({} bytes).", data.len());
229 }
230
231 let magic = &data[0..8];
232
233 if magic == ENCR_COMPR_BLOB_MAGIC_1_0 || magic == ENCRYPTED_BLOB_MAGIC_1_0 {
234
235 if data.len() < std::mem::size_of::<EncryptedDataBlobHeader>() {
236 bail!("encrypted blob too small ({} bytes).", data.len());
237 }
238
239 let blob = DataBlob { raw_data: data };
240
241 Ok(blob)
242 } else if magic == COMPRESSED_BLOB_MAGIC_1_0 || magic == UNCOMPRESSED_BLOB_MAGIC_1_0 {
243
244 let blob = DataBlob { raw_data: data };
245
246 Ok(blob)
247 } else {
248 bail!("unable to parse raw blob - wrong magic");
249 }
250 }
251
252 /// Verify digest and data length for unencrypted chunks.
253 ///
254 /// To do that, we need to decompress data first. Please note that
255 /// this is not possible for encrypted chunks. This function simply return Ok
256 /// for encrypted chunks.
257 /// Note: This does not call verify_crc
258 pub fn verify_unencrypted(
259 &self,
260 expected_chunk_size: usize,
261 expected_digest: &[u8; 32],
262 ) -> Result<(), Error> {
263
264 let magic = self.magic();
265
266 if magic == &ENCR_COMPR_BLOB_MAGIC_1_0 || magic == &ENCRYPTED_BLOB_MAGIC_1_0 {
267 return Ok(());
268 }
269
270 let data = self.decode(None)?;
271
272 if expected_chunk_size != data.len() {
273 bail!("detected chunk with wrong length ({} != {})", expected_chunk_size, data.len());
274 }
275 let digest = openssl::sha::sha256(&data);
276 if &digest != expected_digest {
277 bail!("detected chunk with wrong digest.");
278 }
279
280 Ok(())
281 }
282 }
283
284 /// Builder for chunk DataBlobs
285 ///
286 /// Main purpose is to centralize digest computation. Digest
287 /// computation differ for encryped chunk, and this interface ensures that
288 /// we always compute the correct one.
289 pub struct DataChunkBuilder<'a, 'b> {
290 config: Option<&'b CryptConfig>,
291 orig_data: &'a [u8],
292 digest_computed: bool,
293 digest: [u8; 32],
294 compress: bool,
295 }
296
297 impl <'a, 'b> DataChunkBuilder<'a, 'b> {
298
299 /// Create a new builder instance.
300 pub fn new(orig_data: &'a [u8]) -> Self {
301 Self {
302 orig_data,
303 config: None,
304 digest_computed: false,
305 digest: [0u8; 32],
306 compress: true,
307 }
308 }
309
310 /// Set compression flag.
311 ///
312 /// If true, chunk data is compressed using zstd (level 1).
313 pub fn compress(mut self, value: bool) -> Self {
314 self.compress = value;
315 self
316 }
317
318 /// Set encryption Configuration
319 ///
320 /// If set, chunks are encrypted
321 pub fn crypt_config(mut self, value: &'b CryptConfig) -> Self {
322 if self.digest_computed {
323 panic!("unable to set crypt_config after compute_digest().");
324 }
325 self.config = Some(value);
326 self
327 }
328
329 fn compute_digest(&mut self) {
330 if !self.digest_computed {
331 if let Some(ref config) = self.config {
332 self.digest = config.compute_digest(self.orig_data);
333 } else {
334 self.digest = openssl::sha::sha256(self.orig_data);
335 }
336 self.digest_computed = true;
337 }
338 }
339
340 /// Returns the chunk Digest
341 ///
342 /// Note: For encrypted chunks, this needs to be called after
343 /// ``crypt_config``.
344 pub fn digest(&mut self) -> &[u8; 32] {
345 if !self.digest_computed {
346 self.compute_digest();
347 }
348 &self.digest
349 }
350
351 /// Consume self and build the ``DataBlob``.
352 ///
353 /// Returns the blob and the computet digest.
354 pub fn build(mut self) -> Result<(DataBlob, [u8; 32]), Error> {
355 if !self.digest_computed {
356 self.compute_digest();
357 }
358
359 let chunk = DataBlob::encode(self.orig_data, self.config, self.compress)?;
360 Ok((chunk, self.digest))
361 }
362
363 /// Create a chunk filled with zeroes
364 pub fn build_zero_chunk(
365 crypt_config: Option<&CryptConfig>,
366 chunk_size: usize,
367 compress: bool,
368 ) -> Result<(DataBlob, [u8; 32]), Error> {
369
370 let mut zero_bytes = Vec::with_capacity(chunk_size);
371 zero_bytes.resize(chunk_size, 0u8);
372 let mut chunk_builder = DataChunkBuilder::new(&zero_bytes).compress(compress);
373 if let Some(ref crypt_config) = crypt_config {
374 chunk_builder = chunk_builder.crypt_config(crypt_config);
375 }
376
377 chunk_builder.build()
378 }
379
380 }