]> git.proxmox.com Git - proxmox-backup.git/blame - pbs-datastore/src/data_blob.rs
tree-wide: fix needless borrows
[proxmox-backup.git] / pbs-datastore / src / data_blob.rs
CommitLineData
3025b3a5 1use std::convert::TryInto;
ed208076 2use std::io::Write;
3025b3a5 3
f323e906 4use anyhow::{bail, Error};
ed208076 5use openssl::symm::{decrypt_aead, Mode};
f323e906 6
6ef1b649 7use proxmox_io::{ReadExt, WriteExt};
9f83e0f7 8
bbdda58b
DM
9use pbs_tools::crypt_config::CryptConfig;
10use pbs_api_types::CryptMode;
11
4ee8f53d 12use super::file_formats::*;
3b66040d
WB
13
14const MAX_BLOB_SIZE: usize = 128*1024*1024;
4ee8f53d
DM
15
16/// Encoded data chunk with digest and positional information
17pub struct ChunkInfo {
18 pub chunk: DataBlob,
19 pub digest: [u8; 32],
20 pub chunk_len: u64,
21 pub offset: u64,
22}
3025b3a5
DM
23
24/// Data blob binary storage format
25///
863be2e6 26/// Data blobs store arbitrary binary data (< 128MB), and can be
4ee8f53d
DM
27/// compressed and encrypted (or just signed). A simply binary format
28/// is used to store them on disk or transfer them over the network.
39a4df61
DM
29///
30/// Please use index files to store large data files (".fidx" of
31/// ".didx").
3025b3a5 32///
3025b3a5
DM
33pub struct DataBlob {
34 raw_data: Vec<u8>, // tagged, compressed, encryped data
35}
36
37impl DataBlob {
38
39 /// accessor to raw_data field
40 pub fn raw_data(&self) -> &[u8] {
41 &self.raw_data
42 }
43
39f18b30
DM
44 /// Returns raw_data size
45 pub fn raw_size(&self) -> u64 {
46 self.raw_data.len() as u64
47 }
48
cb08ac3e
DM
49 /// Consume self and returns raw_data
50 pub fn into_inner(self) -> Vec<u8> {
51 self.raw_data
52 }
53
3025b3a5
DM
54 /// accessor to chunk type (magic number)
55 pub fn magic(&self) -> &[u8; 8] {
56 self.raw_data[0..8].try_into().unwrap()
57 }
58
b7f4f27d
DM
59 /// accessor to crc32 checksum
60 pub fn crc(&self) -> u32 {
6ef1b649 61 let crc_o = proxmox_lang::offsetof!(DataBlobHeader, crc);
991abfa8 62 u32::from_le_bytes(self.raw_data[crc_o..crc_o+4].try_into().unwrap())
b7f4f27d
DM
63 }
64
65 // set the CRC checksum field
66 pub fn set_crc(&mut self, crc: u32) {
6ef1b649 67 let crc_o = proxmox_lang::offsetof!(DataBlobHeader, crc);
991abfa8 68 self.raw_data[crc_o..crc_o+4].copy_from_slice(&crc.to_le_bytes());
b7f4f27d
DM
69 }
70
71 /// compute the CRC32 checksum
cb08ac3e 72 pub fn compute_crc(&self) -> u32 {
b7f4f27d 73 let mut hasher = crc32fast::Hasher::new();
c638542b 74 let start = header_size(self.magic()); // start after HEAD
991abfa8 75 hasher.update(&self.raw_data[start..]);
b7f4f27d
DM
76 hasher.finalize()
77 }
78
39f18b30 79 // verify the CRC32 checksum
4d431383 80 pub fn verify_crc(&self) -> Result<(), Error> {
b208da83
DM
81 let expected_crc = self.compute_crc();
82 if expected_crc != self.crc() {
83 bail!("Data blob has wrong CRC checksum.");
84 }
85 Ok(())
86 }
87
69ecd8d5 88 /// Create a DataBlob, optionally compressed and/or encrypted
3025b3a5
DM
89 pub fn encode(
90 data: &[u8],
7123ff7d 91 config: Option<&CryptConfig>,
3025b3a5
DM
92 compress: bool,
93 ) -> Result<Self, Error> {
94
781ac11c 95 if data.len() > MAX_BLOB_SIZE {
3025b3a5
DM
96 bail!("data blob too large ({} bytes).", data.len());
97 }
98
f889b158 99 let mut blob = if let Some(config) = config {
3025b3a5 100
0066c6d9
DM
101 let compr_data;
102 let (_compress, data, magic) = if compress {
103 compr_data = zstd::block::compress(data, 1)?;
104 // Note: We only use compression if result is shorter
105 if compr_data.len() < data.len() {
106 (true, &compr_data[..], ENCR_COMPR_BLOB_MAGIC_1_0)
107 } else {
108 (false, data, ENCRYPTED_BLOB_MAGIC_1_0)
109 }
110 } else {
111 (false, data, ENCRYPTED_BLOB_MAGIC_1_0)
112 };
113
114 let header_len = std::mem::size_of::<EncryptedDataBlobHeader>();
115 let mut raw_data = Vec::with_capacity(data.len() + header_len);
116
117 let dummy_head = EncryptedDataBlobHeader {
118 head: DataBlobHeader { magic: [0u8; 8], crc: [0; 4] },
119 iv: [0u8; 16],
120 tag: [0u8; 16],
121 };
5485b579
WB
122 unsafe {
123 raw_data.write_le_value(dummy_head)?;
124 }
0066c6d9 125
9a37bd6c 126 let (iv, tag) = Self::encrypt_to(config, data, &mut raw_data)?;
0066c6d9
DM
127
128 let head = EncryptedDataBlobHeader {
129 head: DataBlobHeader { magic, crc: [0; 4] }, iv, tag,
130 };
131
5485b579
WB
132 unsafe {
133 (&mut raw_data[0..header_len]).write_le_value(head)?;
134 }
0066c6d9 135
f889b158 136 DataBlob { raw_data }
3025b3a5
DM
137 } else {
138
991abfa8 139 let max_data_len = data.len() + std::mem::size_of::<DataBlobHeader>();
3025b3a5 140 if compress {
991abfa8 141 let mut comp_data = Vec::with_capacity(max_data_len);
3025b3a5 142
991abfa8
DM
143 let head = DataBlobHeader {
144 magic: COMPRESSED_BLOB_MAGIC_1_0,
145 crc: [0; 4],
146 };
5485b579
WB
147 unsafe {
148 comp_data.write_le_value(head)?;
149 }
b7f4f27d 150
3025b3a5
DM
151 zstd::stream::copy_encode(data, &mut comp_data, 1)?;
152
991abfa8 153 if comp_data.len() < max_data_len {
eecb2356
DM
154 let mut blob = DataBlob { raw_data: comp_data };
155 blob.set_crc(blob.compute_crc());
156 return Ok(blob);
3025b3a5
DM
157 }
158 }
159
991abfa8 160 let mut raw_data = Vec::with_capacity(max_data_len);
3025b3a5 161
991abfa8
DM
162 let head = DataBlobHeader {
163 magic: UNCOMPRESSED_BLOB_MAGIC_1_0,
164 crc: [0; 4],
165 };
5485b579
WB
166 unsafe {
167 raw_data.write_le_value(head)?;
168 }
3025b3a5
DM
169 raw_data.extend_from_slice(data);
170
f889b158
DM
171 DataBlob { raw_data }
172 };
173
174 blob.set_crc(blob.compute_crc());
175
176 Ok(blob)
3025b3a5
DM
177 }
178
3b66040d
WB
179 /// Get the encryption mode for this blob.
180 pub fn crypt_mode(&self) -> Result<CryptMode, Error> {
181 let magic = self.magic();
182
183 Ok(if magic == &UNCOMPRESSED_BLOB_MAGIC_1_0 || magic == &COMPRESSED_BLOB_MAGIC_1_0 {
184 CryptMode::None
185 } else if magic == &ENCR_COMPR_BLOB_MAGIC_1_0 || magic == &ENCRYPTED_BLOB_MAGIC_1_0 {
186 CryptMode::Encrypt
3b66040d
WB
187 } else {
188 bail!("Invalid blob magic number.");
189 })
190 }
191
3025b3a5 192 /// Decode blob data
8819d1f2 193 pub fn decode(&self, config: Option<&CryptConfig>, digest: Option<&[u8; 32]>) -> Result<Vec<u8>, Error> {
3025b3a5
DM
194
195 let magic = self.magic();
196
197 if magic == &UNCOMPRESSED_BLOB_MAGIC_1_0 {
991abfa8 198 let data_start = std::mem::size_of::<DataBlobHeader>();
8819d1f2
FG
199 let data = self.raw_data[data_start..].to_vec();
200 if let Some(digest) = digest {
201 Self::verify_digest(&data, None, digest)?;
202 }
203 Ok(data)
3025b3a5 204 } else if magic == &COMPRESSED_BLOB_MAGIC_1_0 {
991abfa8 205 let data_start = std::mem::size_of::<DataBlobHeader>();
09a1da25
DM
206 let mut reader = &self.raw_data[data_start..];
207 let data = zstd::stream::decode_all(&mut reader)?;
208 // zstd::block::decompress is abou 10% slower
209 // let data = zstd::block::decompress(&self.raw_data[data_start..], MAX_BLOB_SIZE)?;
8819d1f2
FG
210 if let Some(digest) = digest {
211 Self::verify_digest(&data, None, digest)?;
212 }
62ee2eb4 213 Ok(data)
3025b3a5 214 } else if magic == &ENCR_COMPR_BLOB_MAGIC_1_0 || magic == &ENCRYPTED_BLOB_MAGIC_1_0 {
9f83e0f7 215 let header_len = std::mem::size_of::<EncryptedDataBlobHeader>();
ba01828d
DM
216 let head = unsafe {
217 (&self.raw_data[..header_len]).read_le_value::<EncryptedDataBlobHeader>()?
218 };
9f83e0f7 219
3025b3a5
DM
220 if let Some(config) = config {
221 let data = if magic == &ENCR_COMPR_BLOB_MAGIC_1_0 {
ed208076 222 Self::decode_compressed_chunk(config, &self.raw_data[header_len..], &head.iv, &head.tag)?
3025b3a5 223 } else {
ed208076 224 Self::decode_uncompressed_chunk(config, &self.raw_data[header_len..], &head.iv, &head.tag)?
3025b3a5 225 };
8819d1f2
FG
226 if let Some(digest) = digest {
227 Self::verify_digest(&data, Some(config), digest)?;
228 }
62ee2eb4 229 Ok(data)
3025b3a5
DM
230 } else {
231 bail!("unable to decrypt blob - missing CryptConfig");
232 }
233 } else {
234 bail!("Invalid blob magic number.");
235 }
236 }
a38c5d4d 237
39f18b30
DM
238 /// Load blob from ``reader``, verify CRC
239 pub fn load_from_reader(reader: &mut dyn std::io::Read) -> Result<Self, Error> {
4ee8f53d
DM
240
241 let mut data = Vec::with_capacity(1024*1024);
242 reader.read_to_end(&mut data)?;
243
39f18b30
DM
244 let blob = Self::from_raw(data)?;
245
246 blob.verify_crc()?;
247
248 Ok(blob)
4ee8f53d
DM
249 }
250
a38c5d4d
DM
251 /// Create Instance from raw data
252 pub fn from_raw(data: Vec<u8>) -> Result<Self, Error> {
253
254 if data.len() < std::mem::size_of::<DataBlobHeader>() {
255 bail!("blob too small ({} bytes).", data.len());
256 }
257
258 let magic = &data[0..8];
259
260 if magic == ENCR_COMPR_BLOB_MAGIC_1_0 || magic == ENCRYPTED_BLOB_MAGIC_1_0 {
261
262 if data.len() < std::mem::size_of::<EncryptedDataBlobHeader>() {
263 bail!("encrypted blob too small ({} bytes).", data.len());
264 }
265
266 let blob = DataBlob { raw_data: data };
267
268 Ok(blob)
269 } else if magic == COMPRESSED_BLOB_MAGIC_1_0 || magic == UNCOMPRESSED_BLOB_MAGIC_1_0 {
270
271 let blob = DataBlob { raw_data: data };
272
273 Ok(blob)
274 } else {
275 bail!("unable to parse raw blob - wrong magic");
276 }
277 }
4ee8f53d 278
c0fa14d9
DM
279 /// Returns if chunk is encrypted
280 pub fn is_encrypted(&self) -> bool {
281 let magic = self.magic();
282 magic == &ENCR_COMPR_BLOB_MAGIC_1_0 || magic == &ENCRYPTED_BLOB_MAGIC_1_0
283 }
284
4ee8f53d
DM
285 /// Verify digest and data length for unencrypted chunks.
286 ///
287 /// To do that, we need to decompress data first. Please note that
1090fd44
DM
288 /// this is not possible for encrypted chunks. This function simply return Ok
289 /// for encrypted chunks.
39f18b30 290 /// Note: This does not call verify_crc, because this is usually done in load
4ee8f53d
DM
291 pub fn verify_unencrypted(
292 &self,
293 expected_chunk_size: usize,
294 expected_digest: &[u8; 32],
295 ) -> Result<(), Error> {
296
297 let magic = self.magic();
298
1090fd44
DM
299 if magic == &ENCR_COMPR_BLOB_MAGIC_1_0 || magic == &ENCRYPTED_BLOB_MAGIC_1_0 {
300 return Ok(());
301 }
302
8819d1f2
FG
303 // verifies digest!
304 let data = self.decode(None, Some(expected_digest))?;
4ee8f53d 305
1090fd44
DM
306 if expected_chunk_size != data.len() {
307 bail!("detected chunk with wrong length ({} != {})", expected_chunk_size, data.len());
308 }
8819d1f2
FG
309
310 Ok(())
311 }
312
313 fn verify_digest(
314 data: &[u8],
315 config: Option<&CryptConfig>,
316 expected_digest: &[u8; 32],
317 ) -> Result<(), Error> {
318
319 let digest = match config {
320 Some(config) => config.compute_digest(data),
58d73ddb 321 None => openssl::sha::sha256(data),
8819d1f2 322 };
1090fd44
DM
323 if &digest != expected_digest {
324 bail!("detected chunk with wrong digest.");
4ee8f53d
DM
325 }
326
327 Ok(())
328 }
ed208076
DM
329
330 /// Benchmark encryption speed
331 pub fn encrypt_benchmark<W: Write>(
332 config: &CryptConfig,
333 data: &[u8],
334 output: W,
335 ) -> Result<(), Error> {
336 let _ = Self::encrypt_to(config, data, output)?;
337 Ok(())
338 }
339
340 // Encrypt data using a random 16 byte IV.
341 //
342 // Writes encrypted data to ``output``, Return the used IV and computed MAC.
343 fn encrypt_to<W: Write>(
344 config: &CryptConfig,
345 data: &[u8],
346 mut output: W,
347 ) -> Result<([u8;16], [u8;16]), Error> {
348
349 let mut iv = [0u8; 16];
25877d05 350 proxmox_sys::linux::fill_with_random_data(&mut iv)?;
ed208076
DM
351
352 let mut tag = [0u8; 16];
353
354 let mut c = config.data_crypter(&iv, Mode::Encrypt)?;
355
356 const BUFFER_SIZE: usize = 32*1024;
357
358 let mut encr_buf = [0u8; BUFFER_SIZE];
359 let max_encoder_input = BUFFER_SIZE - config.cipher().block_size();
360
361 let mut start = 0;
362 loop {
363 let mut end = start + max_encoder_input;
364 if end > data.len() { end = data.len(); }
365 if end > start {
366 let count = c.update(&data[start..end], &mut encr_buf)?;
367 output.write_all(&encr_buf[..count])?;
368 start = end;
369 } else {
370 break;
371 }
372 }
373
374 let rest = c.finalize(&mut encr_buf)?;
375 if rest > 0 { output.write_all(&encr_buf[..rest])?; }
376
377 output.flush()?;
378
379 c.get_tag(&mut tag)?;
380
381 Ok((iv, tag))
382 }
383
384 // Decompress and decrypt data, verify MAC.
385 fn decode_compressed_chunk(
386 config: &CryptConfig,
387 data: &[u8],
388 iv: &[u8; 16],
389 tag: &[u8; 16],
390 ) -> Result<Vec<u8>, Error> {
391
392 let dec = Vec::with_capacity(1024*1024);
393
394 let mut decompressor = zstd::stream::write::Decoder::new(dec)?;
395
396 let mut c = config.data_crypter(iv, Mode::Decrypt)?;
397
398 const BUFFER_SIZE: usize = 32*1024;
399
400 let mut decr_buf = [0u8; BUFFER_SIZE];
401 let max_decoder_input = BUFFER_SIZE - config.cipher().block_size();
402
403 let mut start = 0;
404 loop {
405 let mut end = start + max_decoder_input;
406 if end > data.len() { end = data.len(); }
407 if end > start {
408 let count = c.update(&data[start..end], &mut decr_buf)?;
409 decompressor.write_all(&decr_buf[0..count])?;
410 start = end;
411 } else {
412 break;
413 }
414 }
415
416 c.set_tag(tag)?;
417 let rest = c.finalize(&mut decr_buf)?;
418 if rest > 0 { decompressor.write_all(&decr_buf[..rest])?; }
419
420 decompressor.flush()?;
421
422 Ok(decompressor.into_inner())
423 }
424
425 // Decrypt data, verify tag.
426 fn decode_uncompressed_chunk(
427 config: &CryptConfig,
428 data: &[u8],
429 iv: &[u8; 16],
430 tag: &[u8; 16],
431 ) -> Result<Vec<u8>, Error> {
432
433 let decr_data = decrypt_aead(
434 *config.cipher(),
435 config.enc_key(),
436 Some(iv),
437 b"", //??
438 data,
439 tag,
440 )?;
441
442 Ok(decr_data)
443 }
444
4ee8f53d
DM
445}
446
447/// Builder for chunk DataBlobs
448///
449/// Main purpose is to centralize digest computation. Digest
450/// computation differ for encryped chunk, and this interface ensures that
451/// we always compute the correct one.
7123ff7d
DM
452pub struct DataChunkBuilder<'a, 'b> {
453 config: Option<&'b CryptConfig>,
4ee8f53d
DM
454 orig_data: &'a [u8],
455 digest_computed: bool,
456 digest: [u8; 32],
457 compress: bool,
458}
459
7123ff7d 460impl <'a, 'b> DataChunkBuilder<'a, 'b> {
4ee8f53d
DM
461
462 /// Create a new builder instance.
463 pub fn new(orig_data: &'a [u8]) -> Self {
464 Self {
465 orig_data,
466 config: None,
467 digest_computed: false,
468 digest: [0u8; 32],
469 compress: true,
470 }
471 }
472
473 /// Set compression flag.
474 ///
475 /// If true, chunk data is compressed using zstd (level 1).
476 pub fn compress(mut self, value: bool) -> Self {
477 self.compress = value;
478 self
479 }
480
481 /// Set encryption Configuration
482 ///
3638341a
DM
483 /// If set, chunks are encrypted
484 pub fn crypt_config(mut self, value: &'b CryptConfig) -> Self {
4ee8f53d
DM
485 if self.digest_computed {
486 panic!("unable to set crypt_config after compute_digest().");
487 }
3638341a 488 self.config = Some(value);
4ee8f53d
DM
489 self
490 }
491
492 fn compute_digest(&mut self) {
493 if !self.digest_computed {
9a37bd6c 494 if let Some(config) = self.config {
4ee8f53d
DM
495 self.digest = config.compute_digest(self.orig_data);
496 } else {
497 self.digest = openssl::sha::sha256(self.orig_data);
498 }
499 self.digest_computed = true;
500 }
501 }
502
503 /// Returns the chunk Digest
504 ///
505 /// Note: For encrypted chunks, this needs to be called after
506 /// ``crypt_config``.
507 pub fn digest(&mut self) -> &[u8; 32] {
508 if !self.digest_computed {
509 self.compute_digest();
510 }
511 &self.digest
512 }
513
514 /// Consume self and build the ``DataBlob``.
515 ///
516 /// Returns the blob and the computet digest.
517 pub fn build(mut self) -> Result<(DataBlob, [u8; 32]), Error> {
518 if !self.digest_computed {
519 self.compute_digest();
520 }
521
3638341a 522 let chunk = DataBlob::encode(self.orig_data, self.config, self.compress)?;
4ee8f53d
DM
523 Ok((chunk, self.digest))
524 }
2745151d 525
4c9f753c
DM
526 /// Create a chunk filled with zeroes
527 pub fn build_zero_chunk(
528 crypt_config: Option<&CryptConfig>,
529 chunk_size: usize,
530 compress: bool,
531 ) -> Result<(DataBlob, [u8; 32]), Error> {
ea368a06 532 let zero_bytes = vec![0; chunk_size];
4c9f753c 533 let mut chunk_builder = DataChunkBuilder::new(&zero_bytes).compress(compress);
9a37bd6c 534 if let Some(crypt_config) = crypt_config {
3638341a 535 chunk_builder = chunk_builder.crypt_config(crypt_config);
4c9f753c 536 }
2745151d 537
4c9f753c 538 chunk_builder.build()
2745151d
DM
539 }
540
2745151d 541}