]> git.proxmox.com Git - proxmox-backup.git/blob - src/backup/data_blob.rs
src/backup/data_blob.rs: define create_zero_chunk as member function
[proxmox-backup.git] / src / backup / data_blob.rs
1 use failure::*;
2 use std::convert::TryInto;
3
4 use proxmox::tools::io::{ReadExt, WriteExt};
5
6 const MAX_BLOB_SIZE: usize = 128*1024*1024;
7
8 use super::file_formats::*;
9 use super::CryptConfig;
10
11 /// Encoded data chunk with digest and positional information
12 pub struct ChunkInfo {
13 pub chunk: DataBlob,
14 pub digest: [u8; 32],
15 pub chunk_len: u64,
16 pub offset: u64,
17 }
18
19 /// Data blob binary storage format
20 ///
21 /// Data blobs store arbitrary binary data (< 128MB), and can be
22 /// compressed and encrypted (or just signed). A simply binary format
23 /// is used to store them on disk or transfer them over the network.
24 ///
25 /// Please use index files to store large data files (".fidx" of
26 /// ".didx").
27 ///
28 pub struct DataBlob {
29 raw_data: Vec<u8>, // tagged, compressed, encryped data
30 }
31
32 impl DataBlob {
33
34 /// accessor to raw_data field
35 pub fn raw_data(&self) -> &[u8] {
36 &self.raw_data
37 }
38
39 /// Consume self and returns raw_data
40 pub fn into_inner(self) -> Vec<u8> {
41 self.raw_data
42 }
43
44 /// accessor to chunk type (magic number)
45 pub fn magic(&self) -> &[u8; 8] {
46 self.raw_data[0..8].try_into().unwrap()
47 }
48
49 /// accessor to crc32 checksum
50 pub fn crc(&self) -> u32 {
51 let crc_o = proxmox::tools::offsetof!(DataBlobHeader, crc);
52 u32::from_le_bytes(self.raw_data[crc_o..crc_o+4].try_into().unwrap())
53 }
54
55 // set the CRC checksum field
56 pub fn set_crc(&mut self, crc: u32) {
57 let crc_o = proxmox::tools::offsetof!(DataBlobHeader, crc);
58 self.raw_data[crc_o..crc_o+4].copy_from_slice(&crc.to_le_bytes());
59 }
60
61 /// compute the CRC32 checksum
62 pub fn compute_crc(&self) -> u32 {
63 let mut hasher = crc32fast::Hasher::new();
64 let start = header_size(self.magic()); // start after HEAD
65 hasher.update(&self.raw_data[start..]);
66 hasher.finalize()
67 }
68
69 /// verify the CRC32 checksum
70 pub fn verify_crc(&self) -> Result<(), Error> {
71 let expected_crc = self.compute_crc();
72 if expected_crc != self.crc() {
73 bail!("Data blob has wrong CRC checksum.");
74 }
75 Ok(())
76 }
77
78 /// Create a DataBlob, optionally compressed and/or encrypted
79 pub fn encode(
80 data: &[u8],
81 config: Option<&CryptConfig>,
82 compress: bool,
83 ) -> Result<Self, Error> {
84
85 if data.len() > MAX_BLOB_SIZE {
86 bail!("data blob too large ({} bytes).", data.len());
87 }
88
89 let mut blob = if let Some(config) = config {
90
91 let compr_data;
92 let (_compress, data, magic) = if compress {
93 compr_data = zstd::block::compress(data, 1)?;
94 // Note: We only use compression if result is shorter
95 if compr_data.len() < data.len() {
96 (true, &compr_data[..], ENCR_COMPR_BLOB_MAGIC_1_0)
97 } else {
98 (false, data, ENCRYPTED_BLOB_MAGIC_1_0)
99 }
100 } else {
101 (false, data, ENCRYPTED_BLOB_MAGIC_1_0)
102 };
103
104 let header_len = std::mem::size_of::<EncryptedDataBlobHeader>();
105 let mut raw_data = Vec::with_capacity(data.len() + header_len);
106
107 let dummy_head = EncryptedDataBlobHeader {
108 head: DataBlobHeader { magic: [0u8; 8], crc: [0; 4] },
109 iv: [0u8; 16],
110 tag: [0u8; 16],
111 };
112 unsafe {
113 raw_data.write_le_value(dummy_head)?;
114 }
115
116 let (iv, tag) = config.encrypt_to(data, &mut raw_data)?;
117
118 let head = EncryptedDataBlobHeader {
119 head: DataBlobHeader { magic, crc: [0; 4] }, iv, tag,
120 };
121
122 unsafe {
123 (&mut raw_data[0..header_len]).write_le_value(head)?;
124 }
125
126 DataBlob { raw_data }
127 } else {
128
129 let max_data_len = data.len() + std::mem::size_of::<DataBlobHeader>();
130 if compress {
131 let mut comp_data = Vec::with_capacity(max_data_len);
132
133 let head = DataBlobHeader {
134 magic: COMPRESSED_BLOB_MAGIC_1_0,
135 crc: [0; 4],
136 };
137 unsafe {
138 comp_data.write_le_value(head)?;
139 }
140
141 zstd::stream::copy_encode(data, &mut comp_data, 1)?;
142
143 if comp_data.len() < max_data_len {
144 let mut blob = DataBlob { raw_data: comp_data };
145 blob.set_crc(blob.compute_crc());
146 return Ok(blob);
147 }
148 }
149
150 let mut raw_data = Vec::with_capacity(max_data_len);
151
152 let head = DataBlobHeader {
153 magic: UNCOMPRESSED_BLOB_MAGIC_1_0,
154 crc: [0; 4],
155 };
156 unsafe {
157 raw_data.write_le_value(head)?;
158 }
159 raw_data.extend_from_slice(data);
160
161 DataBlob { raw_data }
162 };
163
164 blob.set_crc(blob.compute_crc());
165
166 Ok(blob)
167 }
168
169 /// Decode blob data
170 pub fn decode(self, config: Option<&CryptConfig>) -> Result<Vec<u8>, Error> {
171
172 let magic = self.magic();
173
174 if magic == &UNCOMPRESSED_BLOB_MAGIC_1_0 {
175 let data_start = std::mem::size_of::<DataBlobHeader>();
176 return Ok(self.raw_data[data_start..].to_vec());
177 } else if magic == &COMPRESSED_BLOB_MAGIC_1_0 {
178 let data_start = std::mem::size_of::<DataBlobHeader>();
179 let data = zstd::block::decompress(&self.raw_data[data_start..], MAX_BLOB_SIZE)?;
180 return Ok(data);
181 } else if magic == &ENCR_COMPR_BLOB_MAGIC_1_0 || magic == &ENCRYPTED_BLOB_MAGIC_1_0 {
182 let header_len = std::mem::size_of::<EncryptedDataBlobHeader>();
183 let head = unsafe {
184 (&self.raw_data[..header_len]).read_le_value::<EncryptedDataBlobHeader>()?
185 };
186
187 if let Some(config) = config {
188 let data = if magic == &ENCR_COMPR_BLOB_MAGIC_1_0 {
189 config.decode_compressed_chunk(&self.raw_data[header_len..], &head.iv, &head.tag)?
190 } else {
191 config.decode_uncompressed_chunk(&self.raw_data[header_len..], &head.iv, &head.tag)?
192 };
193 return Ok(data);
194 } else {
195 bail!("unable to decrypt blob - missing CryptConfig");
196 }
197 } else if magic == &AUTH_COMPR_BLOB_MAGIC_1_0 || magic == &AUTHENTICATED_BLOB_MAGIC_1_0 {
198 let header_len = std::mem::size_of::<AuthenticatedDataBlobHeader>();
199 let head = unsafe {
200 (&self.raw_data[..header_len]).read_le_value::<AuthenticatedDataBlobHeader>()?
201 };
202
203 let data_start = std::mem::size_of::<AuthenticatedDataBlobHeader>();
204
205 // Note: only verify if we have a crypt config
206 if let Some(config) = config {
207 let signature = config.compute_auth_tag(&self.raw_data[data_start..]);
208 if signature != head.tag {
209 bail!("verifying blob signature failed");
210 }
211 }
212
213 if magic == &AUTH_COMPR_BLOB_MAGIC_1_0 {
214 let data = zstd::block::decompress(&self.raw_data[data_start..], 16*1024*1024)?;
215 return Ok(data);
216 } else {
217 return Ok(self.raw_data[data_start..].to_vec());
218 }
219 } else {
220 bail!("Invalid blob magic number.");
221 }
222 }
223
224 /// Create a signed DataBlob, optionally compressed
225 pub fn create_signed(
226 data: &[u8],
227 config: &CryptConfig,
228 compress: bool,
229 ) -> Result<Self, Error> {
230
231 if data.len() > MAX_BLOB_SIZE {
232 bail!("data blob too large ({} bytes).", data.len());
233 }
234
235 let compr_data;
236 let (_compress, data, magic) = if compress {
237 compr_data = zstd::block::compress(data, 1)?;
238 // Note: We only use compression if result is shorter
239 if compr_data.len() < data.len() {
240 (true, &compr_data[..], AUTH_COMPR_BLOB_MAGIC_1_0)
241 } else {
242 (false, data, AUTHENTICATED_BLOB_MAGIC_1_0)
243 }
244 } else {
245 (false, data, AUTHENTICATED_BLOB_MAGIC_1_0)
246 };
247
248 let header_len = std::mem::size_of::<AuthenticatedDataBlobHeader>();
249 let mut raw_data = Vec::with_capacity(data.len() + header_len);
250
251 let head = AuthenticatedDataBlobHeader {
252 head: DataBlobHeader { magic, crc: [0; 4] },
253 tag: config.compute_auth_tag(data),
254 };
255 unsafe {
256 raw_data.write_le_value(head)?;
257 }
258 raw_data.extend_from_slice(data);
259
260 let mut blob = DataBlob { raw_data };
261 blob.set_crc(blob.compute_crc());
262
263 return Ok(blob);
264 }
265
266 /// Load blob from ``reader``
267 pub fn load(reader: &mut dyn std::io::Read) -> Result<Self, Error> {
268
269 let mut data = Vec::with_capacity(1024*1024);
270 reader.read_to_end(&mut data)?;
271
272 Self::from_raw(data)
273 }
274
275 /// Create Instance from raw data
276 pub fn from_raw(data: Vec<u8>) -> Result<Self, Error> {
277
278 if data.len() < std::mem::size_of::<DataBlobHeader>() {
279 bail!("blob too small ({} bytes).", data.len());
280 }
281
282 let magic = &data[0..8];
283
284 if magic == ENCR_COMPR_BLOB_MAGIC_1_0 || magic == ENCRYPTED_BLOB_MAGIC_1_0 {
285
286 if data.len() < std::mem::size_of::<EncryptedDataBlobHeader>() {
287 bail!("encrypted blob too small ({} bytes).", data.len());
288 }
289
290 let blob = DataBlob { raw_data: data };
291
292 Ok(blob)
293 } else if magic == COMPRESSED_BLOB_MAGIC_1_0 || magic == UNCOMPRESSED_BLOB_MAGIC_1_0 {
294
295 let blob = DataBlob { raw_data: data };
296
297 Ok(blob)
298 } else if magic == AUTH_COMPR_BLOB_MAGIC_1_0 || magic == AUTHENTICATED_BLOB_MAGIC_1_0 {
299 if data.len() < std::mem::size_of::<AuthenticatedDataBlobHeader>() {
300 bail!("authenticated blob too small ({} bytes).", data.len());
301 }
302
303 let blob = DataBlob { raw_data: data };
304
305 Ok(blob)
306 } else {
307 bail!("unable to parse raw blob - wrong magic");
308 }
309 }
310
311 /// Verify digest and data length for unencrypted chunks.
312 ///
313 /// To do that, we need to decompress data first. Please note that
314 /// this is noth possible for encrypted chunks.
315 pub fn verify_unencrypted(
316 &self,
317 expected_chunk_size: usize,
318 expected_digest: &[u8; 32],
319 ) -> Result<(), Error> {
320
321 let magic = self.magic();
322
323 let verify_raw_data = |data: &[u8]| {
324 if expected_chunk_size != data.len() {
325 bail!("detected chunk with wrong length ({} != {})", expected_chunk_size, data.len());
326 }
327 let digest = openssl::sha::sha256(data);
328 if &digest != expected_digest {
329 bail!("detected chunk with wrong digest.");
330 }
331 Ok(())
332 };
333
334 if magic == &COMPRESSED_BLOB_MAGIC_1_0 {
335 let data = zstd::block::decompress(&self.raw_data[12..], 16*1024*1024)?;
336 verify_raw_data(&data)?;
337 } else if magic == &UNCOMPRESSED_BLOB_MAGIC_1_0 {
338 verify_raw_data(&self.raw_data[12..])?;
339 }
340
341 Ok(())
342 }
343 }
344
345 /// Builder for chunk DataBlobs
346 ///
347 /// Main purpose is to centralize digest computation. Digest
348 /// computation differ for encryped chunk, and this interface ensures that
349 /// we always compute the correct one.
350 pub struct DataChunkBuilder<'a, 'b> {
351 config: Option<&'b CryptConfig>,
352 orig_data: &'a [u8],
353 digest_computed: bool,
354 digest: [u8; 32],
355 compress: bool,
356 }
357
358 impl <'a, 'b> DataChunkBuilder<'a, 'b> {
359
360 /// Create a new builder instance.
361 pub fn new(orig_data: &'a [u8]) -> Self {
362 Self {
363 orig_data,
364 config: None,
365 digest_computed: false,
366 digest: [0u8; 32],
367 compress: true,
368 }
369 }
370
371 /// Set compression flag.
372 ///
373 /// If true, chunk data is compressed using zstd (level 1).
374 pub fn compress(mut self, value: bool) -> Self {
375 self.compress = value;
376 self
377 }
378
379 /// Set encryption Configuration
380 ///
381 /// If set, chunks are encrypted.
382 pub fn crypt_config(mut self, value: &'b CryptConfig) -> Self {
383 if self.digest_computed {
384 panic!("unable to set crypt_config after compute_digest().");
385 }
386 self.config = Some(value);
387 self
388 }
389
390 fn compute_digest(&mut self) {
391 if !self.digest_computed {
392 if let Some(ref config) = self.config {
393 self.digest = config.compute_digest(self.orig_data);
394 } else {
395 self.digest = openssl::sha::sha256(self.orig_data);
396 }
397 self.digest_computed = true;
398 }
399 }
400
401 /// Returns the chunk Digest
402 ///
403 /// Note: For encrypted chunks, this needs to be called after
404 /// ``crypt_config``.
405 pub fn digest(&mut self) -> &[u8; 32] {
406 if !self.digest_computed {
407 self.compute_digest();
408 }
409 &self.digest
410 }
411
412 /// Consume self and build the ``DataBlob``.
413 ///
414 /// Returns the blob and the computet digest.
415 pub fn build(mut self) -> Result<(DataBlob, [u8; 32]), Error> {
416 if !self.digest_computed {
417 self.compute_digest();
418 }
419
420 let chunk = DataBlob::encode(
421 self.orig_data,
422 self.config,
423 self.compress,
424 )?;
425
426 Ok((chunk, self.digest))
427 }
428
429 /// Create a chunk filled with zeroes
430 pub fn build_zero_chunk(
431 crypt_config: Option<&CryptConfig>,
432 chunk_size: usize,
433 compress: bool,
434 ) -> Result<(DataBlob, [u8; 32]), Error> {
435
436 let mut zero_bytes = Vec::with_capacity(chunk_size);
437 zero_bytes.resize(chunk_size, 0u8);
438 let mut chunk_builder = DataChunkBuilder::new(&zero_bytes).compress(compress);
439 if let Some(ref crypt_config) = crypt_config {
440 chunk_builder = chunk_builder.crypt_config(crypt_config);
441 }
442
443 chunk_builder.build()
444 }
445
446 }