]> git.proxmox.com Git - proxmox-backup.git/blob - pbs-datastore/src/data_blob.rs
update to first proxmox crate split
[proxmox-backup.git] / pbs-datastore / src / data_blob.rs
1 use std::convert::TryInto;
2 use std::io::Write;
3
4 use anyhow::{bail, Error};
5 use openssl::symm::{decrypt_aead, Mode};
6
7 use proxmox_io::{ReadExt, WriteExt};
8
9 use pbs_tools::crypt_config::CryptConfig;
10 use pbs_api_types::CryptMode;
11
12 use super::file_formats::*;
13
14 const MAX_BLOB_SIZE: usize = 128*1024*1024;
15
16 /// Encoded data chunk with digest and positional information
17 pub struct ChunkInfo {
18 pub chunk: DataBlob,
19 pub digest: [u8; 32],
20 pub chunk_len: u64,
21 pub offset: u64,
22 }
23
24 /// Data blob binary storage format
25 ///
26 /// Data blobs store arbitrary binary data (< 128MB), and can be
27 /// compressed and encrypted (or just signed). A simply binary format
28 /// is used to store them on disk or transfer them over the network.
29 ///
30 /// Please use index files to store large data files (".fidx" of
31 /// ".didx").
32 ///
33 pub struct DataBlob {
34 raw_data: Vec<u8>, // tagged, compressed, encryped data
35 }
36
37 impl DataBlob {
38
39 /// accessor to raw_data field
40 pub fn raw_data(&self) -> &[u8] {
41 &self.raw_data
42 }
43
44 /// Returns raw_data size
45 pub fn raw_size(&self) -> u64 {
46 self.raw_data.len() as u64
47 }
48
49 /// Consume self and returns raw_data
50 pub fn into_inner(self) -> Vec<u8> {
51 self.raw_data
52 }
53
54 /// accessor to chunk type (magic number)
55 pub fn magic(&self) -> &[u8; 8] {
56 self.raw_data[0..8].try_into().unwrap()
57 }
58
59 /// accessor to crc32 checksum
60 pub fn crc(&self) -> u32 {
61 let crc_o = proxmox_lang::offsetof!(DataBlobHeader, crc);
62 u32::from_le_bytes(self.raw_data[crc_o..crc_o+4].try_into().unwrap())
63 }
64
65 // set the CRC checksum field
66 pub fn set_crc(&mut self, crc: u32) {
67 let crc_o = proxmox_lang::offsetof!(DataBlobHeader, crc);
68 self.raw_data[crc_o..crc_o+4].copy_from_slice(&crc.to_le_bytes());
69 }
70
71 /// compute the CRC32 checksum
72 pub fn compute_crc(&self) -> u32 {
73 let mut hasher = crc32fast::Hasher::new();
74 let start = header_size(self.magic()); // start after HEAD
75 hasher.update(&self.raw_data[start..]);
76 hasher.finalize()
77 }
78
79 // verify the CRC32 checksum
80 pub fn verify_crc(&self) -> Result<(), Error> {
81 let expected_crc = self.compute_crc();
82 if expected_crc != self.crc() {
83 bail!("Data blob has wrong CRC checksum.");
84 }
85 Ok(())
86 }
87
88 /// Create a DataBlob, optionally compressed and/or encrypted
89 pub fn encode(
90 data: &[u8],
91 config: Option<&CryptConfig>,
92 compress: bool,
93 ) -> Result<Self, Error> {
94
95 if data.len() > MAX_BLOB_SIZE {
96 bail!("data blob too large ({} bytes).", data.len());
97 }
98
99 let mut blob = if let Some(config) = config {
100
101 let compr_data;
102 let (_compress, data, magic) = if compress {
103 compr_data = zstd::block::compress(data, 1)?;
104 // Note: We only use compression if result is shorter
105 if compr_data.len() < data.len() {
106 (true, &compr_data[..], ENCR_COMPR_BLOB_MAGIC_1_0)
107 } else {
108 (false, data, ENCRYPTED_BLOB_MAGIC_1_0)
109 }
110 } else {
111 (false, data, ENCRYPTED_BLOB_MAGIC_1_0)
112 };
113
114 let header_len = std::mem::size_of::<EncryptedDataBlobHeader>();
115 let mut raw_data = Vec::with_capacity(data.len() + header_len);
116
117 let dummy_head = EncryptedDataBlobHeader {
118 head: DataBlobHeader { magic: [0u8; 8], crc: [0; 4] },
119 iv: [0u8; 16],
120 tag: [0u8; 16],
121 };
122 unsafe {
123 raw_data.write_le_value(dummy_head)?;
124 }
125
126 let (iv, tag) = Self::encrypt_to(&config, data, &mut raw_data)?;
127
128 let head = EncryptedDataBlobHeader {
129 head: DataBlobHeader { magic, crc: [0; 4] }, iv, tag,
130 };
131
132 unsafe {
133 (&mut raw_data[0..header_len]).write_le_value(head)?;
134 }
135
136 DataBlob { raw_data }
137 } else {
138
139 let max_data_len = data.len() + std::mem::size_of::<DataBlobHeader>();
140 if compress {
141 let mut comp_data = Vec::with_capacity(max_data_len);
142
143 let head = DataBlobHeader {
144 magic: COMPRESSED_BLOB_MAGIC_1_0,
145 crc: [0; 4],
146 };
147 unsafe {
148 comp_data.write_le_value(head)?;
149 }
150
151 zstd::stream::copy_encode(data, &mut comp_data, 1)?;
152
153 if comp_data.len() < max_data_len {
154 let mut blob = DataBlob { raw_data: comp_data };
155 blob.set_crc(blob.compute_crc());
156 return Ok(blob);
157 }
158 }
159
160 let mut raw_data = Vec::with_capacity(max_data_len);
161
162 let head = DataBlobHeader {
163 magic: UNCOMPRESSED_BLOB_MAGIC_1_0,
164 crc: [0; 4],
165 };
166 unsafe {
167 raw_data.write_le_value(head)?;
168 }
169 raw_data.extend_from_slice(data);
170
171 DataBlob { raw_data }
172 };
173
174 blob.set_crc(blob.compute_crc());
175
176 Ok(blob)
177 }
178
179 /// Get the encryption mode for this blob.
180 pub fn crypt_mode(&self) -> Result<CryptMode, Error> {
181 let magic = self.magic();
182
183 Ok(if magic == &UNCOMPRESSED_BLOB_MAGIC_1_0 || magic == &COMPRESSED_BLOB_MAGIC_1_0 {
184 CryptMode::None
185 } else if magic == &ENCR_COMPR_BLOB_MAGIC_1_0 || magic == &ENCRYPTED_BLOB_MAGIC_1_0 {
186 CryptMode::Encrypt
187 } else {
188 bail!("Invalid blob magic number.");
189 })
190 }
191
192 /// Decode blob data
193 pub fn decode(&self, config: Option<&CryptConfig>, digest: Option<&[u8; 32]>) -> Result<Vec<u8>, Error> {
194
195 let magic = self.magic();
196
197 if magic == &UNCOMPRESSED_BLOB_MAGIC_1_0 {
198 let data_start = std::mem::size_of::<DataBlobHeader>();
199 let data = self.raw_data[data_start..].to_vec();
200 if let Some(digest) = digest {
201 Self::verify_digest(&data, None, digest)?;
202 }
203 Ok(data)
204 } else if magic == &COMPRESSED_BLOB_MAGIC_1_0 {
205 let data_start = std::mem::size_of::<DataBlobHeader>();
206 let mut reader = &self.raw_data[data_start..];
207 let data = zstd::stream::decode_all(&mut reader)?;
208 // zstd::block::decompress is abou 10% slower
209 // let data = zstd::block::decompress(&self.raw_data[data_start..], MAX_BLOB_SIZE)?;
210 if let Some(digest) = digest {
211 Self::verify_digest(&data, None, digest)?;
212 }
213 Ok(data)
214 } else if magic == &ENCR_COMPR_BLOB_MAGIC_1_0 || magic == &ENCRYPTED_BLOB_MAGIC_1_0 {
215 let header_len = std::mem::size_of::<EncryptedDataBlobHeader>();
216 let head = unsafe {
217 (&self.raw_data[..header_len]).read_le_value::<EncryptedDataBlobHeader>()?
218 };
219
220 if let Some(config) = config {
221 let data = if magic == &ENCR_COMPR_BLOB_MAGIC_1_0 {
222 Self::decode_compressed_chunk(config, &self.raw_data[header_len..], &head.iv, &head.tag)?
223 } else {
224 Self::decode_uncompressed_chunk(config, &self.raw_data[header_len..], &head.iv, &head.tag)?
225 };
226 if let Some(digest) = digest {
227 Self::verify_digest(&data, Some(config), digest)?;
228 }
229 Ok(data)
230 } else {
231 bail!("unable to decrypt blob - missing CryptConfig");
232 }
233 } else {
234 bail!("Invalid blob magic number.");
235 }
236 }
237
238 /// Load blob from ``reader``, verify CRC
239 pub fn load_from_reader(reader: &mut dyn std::io::Read) -> Result<Self, Error> {
240
241 let mut data = Vec::with_capacity(1024*1024);
242 reader.read_to_end(&mut data)?;
243
244 let blob = Self::from_raw(data)?;
245
246 blob.verify_crc()?;
247
248 Ok(blob)
249 }
250
251 /// Create Instance from raw data
252 pub fn from_raw(data: Vec<u8>) -> Result<Self, Error> {
253
254 if data.len() < std::mem::size_of::<DataBlobHeader>() {
255 bail!("blob too small ({} bytes).", data.len());
256 }
257
258 let magic = &data[0..8];
259
260 if magic == ENCR_COMPR_BLOB_MAGIC_1_0 || magic == ENCRYPTED_BLOB_MAGIC_1_0 {
261
262 if data.len() < std::mem::size_of::<EncryptedDataBlobHeader>() {
263 bail!("encrypted blob too small ({} bytes).", data.len());
264 }
265
266 let blob = DataBlob { raw_data: data };
267
268 Ok(blob)
269 } else if magic == COMPRESSED_BLOB_MAGIC_1_0 || magic == UNCOMPRESSED_BLOB_MAGIC_1_0 {
270
271 let blob = DataBlob { raw_data: data };
272
273 Ok(blob)
274 } else {
275 bail!("unable to parse raw blob - wrong magic");
276 }
277 }
278
279 /// Returns if chunk is encrypted
280 pub fn is_encrypted(&self) -> bool {
281 let magic = self.magic();
282 magic == &ENCR_COMPR_BLOB_MAGIC_1_0 || magic == &ENCRYPTED_BLOB_MAGIC_1_0
283 }
284
285 /// Verify digest and data length for unencrypted chunks.
286 ///
287 /// To do that, we need to decompress data first. Please note that
288 /// this is not possible for encrypted chunks. This function simply return Ok
289 /// for encrypted chunks.
290 /// Note: This does not call verify_crc, because this is usually done in load
291 pub fn verify_unencrypted(
292 &self,
293 expected_chunk_size: usize,
294 expected_digest: &[u8; 32],
295 ) -> Result<(), Error> {
296
297 let magic = self.magic();
298
299 if magic == &ENCR_COMPR_BLOB_MAGIC_1_0 || magic == &ENCRYPTED_BLOB_MAGIC_1_0 {
300 return Ok(());
301 }
302
303 // verifies digest!
304 let data = self.decode(None, Some(expected_digest))?;
305
306 if expected_chunk_size != data.len() {
307 bail!("detected chunk with wrong length ({} != {})", expected_chunk_size, data.len());
308 }
309
310 Ok(())
311 }
312
313 fn verify_digest(
314 data: &[u8],
315 config: Option<&CryptConfig>,
316 expected_digest: &[u8; 32],
317 ) -> Result<(), Error> {
318
319 let digest = match config {
320 Some(config) => config.compute_digest(data),
321 None => openssl::sha::sha256(data),
322 };
323 if &digest != expected_digest {
324 bail!("detected chunk with wrong digest.");
325 }
326
327 Ok(())
328 }
329
330 /// Benchmark encryption speed
331 pub fn encrypt_benchmark<W: Write>(
332 config: &CryptConfig,
333 data: &[u8],
334 output: W,
335 ) -> Result<(), Error> {
336 let _ = Self::encrypt_to(config, data, output)?;
337 Ok(())
338 }
339
340 // Encrypt data using a random 16 byte IV.
341 //
342 // Writes encrypted data to ``output``, Return the used IV and computed MAC.
343 fn encrypt_to<W: Write>(
344 config: &CryptConfig,
345 data: &[u8],
346 mut output: W,
347 ) -> Result<([u8;16], [u8;16]), Error> {
348
349 let mut iv = [0u8; 16];
350 proxmox::sys::linux::fill_with_random_data(&mut iv)?;
351
352 let mut tag = [0u8; 16];
353
354 let mut c = config.data_crypter(&iv, Mode::Encrypt)?;
355
356 const BUFFER_SIZE: usize = 32*1024;
357
358 let mut encr_buf = [0u8; BUFFER_SIZE];
359 let max_encoder_input = BUFFER_SIZE - config.cipher().block_size();
360
361 let mut start = 0;
362 loop {
363 let mut end = start + max_encoder_input;
364 if end > data.len() { end = data.len(); }
365 if end > start {
366 let count = c.update(&data[start..end], &mut encr_buf)?;
367 output.write_all(&encr_buf[..count])?;
368 start = end;
369 } else {
370 break;
371 }
372 }
373
374 let rest = c.finalize(&mut encr_buf)?;
375 if rest > 0 { output.write_all(&encr_buf[..rest])?; }
376
377 output.flush()?;
378
379 c.get_tag(&mut tag)?;
380
381 Ok((iv, tag))
382 }
383
384 // Decompress and decrypt data, verify MAC.
385 fn decode_compressed_chunk(
386 config: &CryptConfig,
387 data: &[u8],
388 iv: &[u8; 16],
389 tag: &[u8; 16],
390 ) -> Result<Vec<u8>, Error> {
391
392 let dec = Vec::with_capacity(1024*1024);
393
394 let mut decompressor = zstd::stream::write::Decoder::new(dec)?;
395
396 let mut c = config.data_crypter(iv, Mode::Decrypt)?;
397
398 const BUFFER_SIZE: usize = 32*1024;
399
400 let mut decr_buf = [0u8; BUFFER_SIZE];
401 let max_decoder_input = BUFFER_SIZE - config.cipher().block_size();
402
403 let mut start = 0;
404 loop {
405 let mut end = start + max_decoder_input;
406 if end > data.len() { end = data.len(); }
407 if end > start {
408 let count = c.update(&data[start..end], &mut decr_buf)?;
409 decompressor.write_all(&decr_buf[0..count])?;
410 start = end;
411 } else {
412 break;
413 }
414 }
415
416 c.set_tag(tag)?;
417 let rest = c.finalize(&mut decr_buf)?;
418 if rest > 0 { decompressor.write_all(&decr_buf[..rest])?; }
419
420 decompressor.flush()?;
421
422 Ok(decompressor.into_inner())
423 }
424
425 // Decrypt data, verify tag.
426 fn decode_uncompressed_chunk(
427 config: &CryptConfig,
428 data: &[u8],
429 iv: &[u8; 16],
430 tag: &[u8; 16],
431 ) -> Result<Vec<u8>, Error> {
432
433 let decr_data = decrypt_aead(
434 *config.cipher(),
435 config.enc_key(),
436 Some(iv),
437 b"", //??
438 data,
439 tag,
440 )?;
441
442 Ok(decr_data)
443 }
444
445 }
446
447 /// Builder for chunk DataBlobs
448 ///
449 /// Main purpose is to centralize digest computation. Digest
450 /// computation differ for encryped chunk, and this interface ensures that
451 /// we always compute the correct one.
452 pub struct DataChunkBuilder<'a, 'b> {
453 config: Option<&'b CryptConfig>,
454 orig_data: &'a [u8],
455 digest_computed: bool,
456 digest: [u8; 32],
457 compress: bool,
458 }
459
460 impl <'a, 'b> DataChunkBuilder<'a, 'b> {
461
462 /// Create a new builder instance.
463 pub fn new(orig_data: &'a [u8]) -> Self {
464 Self {
465 orig_data,
466 config: None,
467 digest_computed: false,
468 digest: [0u8; 32],
469 compress: true,
470 }
471 }
472
473 /// Set compression flag.
474 ///
475 /// If true, chunk data is compressed using zstd (level 1).
476 pub fn compress(mut self, value: bool) -> Self {
477 self.compress = value;
478 self
479 }
480
481 /// Set encryption Configuration
482 ///
483 /// If set, chunks are encrypted
484 pub fn crypt_config(mut self, value: &'b CryptConfig) -> Self {
485 if self.digest_computed {
486 panic!("unable to set crypt_config after compute_digest().");
487 }
488 self.config = Some(value);
489 self
490 }
491
492 fn compute_digest(&mut self) {
493 if !self.digest_computed {
494 if let Some(ref config) = self.config {
495 self.digest = config.compute_digest(self.orig_data);
496 } else {
497 self.digest = openssl::sha::sha256(self.orig_data);
498 }
499 self.digest_computed = true;
500 }
501 }
502
503 /// Returns the chunk Digest
504 ///
505 /// Note: For encrypted chunks, this needs to be called after
506 /// ``crypt_config``.
507 pub fn digest(&mut self) -> &[u8; 32] {
508 if !self.digest_computed {
509 self.compute_digest();
510 }
511 &self.digest
512 }
513
514 /// Consume self and build the ``DataBlob``.
515 ///
516 /// Returns the blob and the computet digest.
517 pub fn build(mut self) -> Result<(DataBlob, [u8; 32]), Error> {
518 if !self.digest_computed {
519 self.compute_digest();
520 }
521
522 let chunk = DataBlob::encode(self.orig_data, self.config, self.compress)?;
523 Ok((chunk, self.digest))
524 }
525
526 /// Create a chunk filled with zeroes
527 pub fn build_zero_chunk(
528 crypt_config: Option<&CryptConfig>,
529 chunk_size: usize,
530 compress: bool,
531 ) -> Result<(DataBlob, [u8; 32]), Error> {
532 let zero_bytes = vec![0; chunk_size];
533 let mut chunk_builder = DataChunkBuilder::new(&zero_bytes).compress(compress);
534 if let Some(ref crypt_config) = crypt_config {
535 chunk_builder = chunk_builder.crypt_config(crypt_config);
536 }
537
538 chunk_builder.build()
539 }
540
541 }