]> git.proxmox.com Git - proxmox-backup.git/blob - pbs-datastore/src/data_blob.rs
gc: fix ignoring if `lost+found` can't be accessed
[proxmox-backup.git] / pbs-datastore / src / data_blob.rs
1 use std::io::Write;
2
3 use anyhow::{bail, Error};
4 use openssl::symm::{decrypt_aead, Mode};
5
6 use proxmox_io::{ReadExt, WriteExt};
7
8 use pbs_api_types::CryptMode;
9 use pbs_tools::crypt_config::CryptConfig;
10
11 use super::file_formats::*;
12
13 const MAX_BLOB_SIZE: usize = 128 * 1024 * 1024;
14
15 /// Encoded data chunk with digest and positional information
16 pub struct ChunkInfo {
17 pub chunk: DataBlob,
18 pub digest: [u8; 32],
19 pub chunk_len: u64,
20 pub offset: u64,
21 }
22
23 /// Data blob binary storage format
24 ///
25 /// Data blobs store arbitrary binary data (< 128MB), and can be
26 /// compressed and encrypted (or just signed). A simply binary format
27 /// is used to store them on disk or transfer them over the network.
28 ///
29 /// Please use index files to store large data files (".fidx" of
30 /// ".didx").
31 ///
32 pub struct DataBlob {
33 raw_data: Vec<u8>, // tagged, compressed, encryped data
34 }
35
36 impl DataBlob {
37 /// accessor to raw_data field
38 pub fn raw_data(&self) -> &[u8] {
39 &self.raw_data
40 }
41
42 /// Returns raw_data size
43 pub fn raw_size(&self) -> u64 {
44 self.raw_data.len() as u64
45 }
46
47 /// Consume self and returns raw_data
48 pub fn into_inner(self) -> Vec<u8> {
49 self.raw_data
50 }
51
52 /// accessor to chunk type (magic number)
53 pub fn magic(&self) -> &[u8; 8] {
54 self.raw_data[0..8].try_into().unwrap()
55 }
56
57 /// accessor to crc32 checksum
58 pub fn crc(&self) -> u32 {
59 let crc_o = proxmox_lang::offsetof!(DataBlobHeader, crc);
60 u32::from_le_bytes(self.raw_data[crc_o..crc_o + 4].try_into().unwrap())
61 }
62
63 // set the CRC checksum field
64 pub fn set_crc(&mut self, crc: u32) {
65 let crc_o = proxmox_lang::offsetof!(DataBlobHeader, crc);
66 self.raw_data[crc_o..crc_o + 4].copy_from_slice(&crc.to_le_bytes());
67 }
68
69 /// compute the CRC32 checksum
70 pub fn compute_crc(&self) -> u32 {
71 let mut hasher = crc32fast::Hasher::new();
72 let start = header_size(self.magic()); // start after HEAD
73 hasher.update(&self.raw_data[start..]);
74 hasher.finalize()
75 }
76
77 // verify the CRC32 checksum
78 pub fn verify_crc(&self) -> Result<(), Error> {
79 let expected_crc = self.compute_crc();
80 if expected_crc != self.crc() {
81 bail!("Data blob has wrong CRC checksum.");
82 }
83 Ok(())
84 }
85
86 /// Create a DataBlob, optionally compressed and/or encrypted
87 pub fn encode(
88 data: &[u8],
89 config: Option<&CryptConfig>,
90 compress: bool,
91 ) -> Result<Self, Error> {
92 if data.len() > MAX_BLOB_SIZE {
93 bail!("data blob too large ({} bytes).", data.len());
94 }
95
96 let mut blob = if let Some(config) = config {
97 let compr_data;
98 let (_compress, data, magic) = if compress {
99 compr_data = zstd::bulk::compress(data, 1)?;
100 // Note: We only use compression if result is shorter
101 if compr_data.len() < data.len() {
102 (true, &compr_data[..], ENCR_COMPR_BLOB_MAGIC_1_0)
103 } else {
104 (false, data, ENCRYPTED_BLOB_MAGIC_1_0)
105 }
106 } else {
107 (false, data, ENCRYPTED_BLOB_MAGIC_1_0)
108 };
109
110 let header_len = std::mem::size_of::<EncryptedDataBlobHeader>();
111 let mut raw_data = Vec::with_capacity(data.len() + header_len);
112
113 let dummy_head = EncryptedDataBlobHeader {
114 head: DataBlobHeader {
115 magic: [0u8; 8],
116 crc: [0; 4],
117 },
118 iv: [0u8; 16],
119 tag: [0u8; 16],
120 };
121 unsafe {
122 raw_data.write_le_value(dummy_head)?;
123 }
124
125 let (iv, tag) = Self::encrypt_to(config, data, &mut raw_data)?;
126
127 let head = EncryptedDataBlobHeader {
128 head: DataBlobHeader { magic, crc: [0; 4] },
129 iv,
130 tag,
131 };
132
133 unsafe {
134 (&mut raw_data[0..header_len]).write_le_value(head)?;
135 }
136
137 DataBlob { raw_data }
138 } else {
139 let max_data_len = data.len() + std::mem::size_of::<DataBlobHeader>();
140 if compress {
141 let mut comp_data = Vec::with_capacity(max_data_len);
142
143 let head = DataBlobHeader {
144 magic: COMPRESSED_BLOB_MAGIC_1_0,
145 crc: [0; 4],
146 };
147 unsafe {
148 comp_data.write_le_value(head)?;
149 }
150
151 zstd::stream::copy_encode(data, &mut comp_data, 1)?;
152
153 if comp_data.len() < max_data_len {
154 let mut blob = DataBlob {
155 raw_data: comp_data,
156 };
157 blob.set_crc(blob.compute_crc());
158 return Ok(blob);
159 }
160 }
161
162 let mut raw_data = Vec::with_capacity(max_data_len);
163
164 let head = DataBlobHeader {
165 magic: UNCOMPRESSED_BLOB_MAGIC_1_0,
166 crc: [0; 4],
167 };
168 unsafe {
169 raw_data.write_le_value(head)?;
170 }
171 raw_data.extend_from_slice(data);
172
173 DataBlob { raw_data }
174 };
175
176 blob.set_crc(blob.compute_crc());
177
178 Ok(blob)
179 }
180
181 /// Get the encryption mode for this blob.
182 pub fn crypt_mode(&self) -> Result<CryptMode, Error> {
183 let magic = self.magic();
184
185 Ok(
186 if magic == &UNCOMPRESSED_BLOB_MAGIC_1_0 || magic == &COMPRESSED_BLOB_MAGIC_1_0 {
187 CryptMode::None
188 } else if magic == &ENCR_COMPR_BLOB_MAGIC_1_0 || magic == &ENCRYPTED_BLOB_MAGIC_1_0 {
189 CryptMode::Encrypt
190 } else {
191 bail!("Invalid blob magic number.");
192 },
193 )
194 }
195
196 /// Decode blob data
197 pub fn decode(
198 &self,
199 config: Option<&CryptConfig>,
200 digest: Option<&[u8; 32]>,
201 ) -> Result<Vec<u8>, Error> {
202 let magic = self.magic();
203
204 if magic == &UNCOMPRESSED_BLOB_MAGIC_1_0 {
205 let data_start = std::mem::size_of::<DataBlobHeader>();
206 let data = self.raw_data[data_start..].to_vec();
207 if let Some(digest) = digest {
208 Self::verify_digest(&data, None, digest)?;
209 }
210 Ok(data)
211 } else if magic == &COMPRESSED_BLOB_MAGIC_1_0 {
212 let data_start = std::mem::size_of::<DataBlobHeader>();
213 let mut reader = &self.raw_data[data_start..];
214 let data = zstd::stream::decode_all(&mut reader)?;
215 // zstd::block::decompress is abou 10% slower
216 // let data = zstd::block::decompress(&self.raw_data[data_start..], MAX_BLOB_SIZE)?;
217 if let Some(digest) = digest {
218 Self::verify_digest(&data, None, digest)?;
219 }
220 Ok(data)
221 } else if magic == &ENCR_COMPR_BLOB_MAGIC_1_0 || magic == &ENCRYPTED_BLOB_MAGIC_1_0 {
222 let header_len = std::mem::size_of::<EncryptedDataBlobHeader>();
223 let head = unsafe {
224 (&self.raw_data[..header_len]).read_le_value::<EncryptedDataBlobHeader>()?
225 };
226
227 if let Some(config) = config {
228 let data = if magic == &ENCR_COMPR_BLOB_MAGIC_1_0 {
229 Self::decode_compressed_chunk(
230 config,
231 &self.raw_data[header_len..],
232 &head.iv,
233 &head.tag,
234 )?
235 } else {
236 Self::decode_uncompressed_chunk(
237 config,
238 &self.raw_data[header_len..],
239 &head.iv,
240 &head.tag,
241 )?
242 };
243 if let Some(digest) = digest {
244 Self::verify_digest(&data, Some(config), digest)?;
245 }
246 Ok(data)
247 } else {
248 bail!("unable to decrypt blob - missing CryptConfig");
249 }
250 } else {
251 bail!("Invalid blob magic number.");
252 }
253 }
254
255 /// Load blob from ``reader``, verify CRC
256 pub fn load_from_reader(reader: &mut dyn std::io::Read) -> Result<Self, Error> {
257 let mut data = Vec::with_capacity(1024 * 1024);
258 reader.read_to_end(&mut data)?;
259
260 let blob = Self::from_raw(data)?;
261
262 blob.verify_crc()?;
263
264 Ok(blob)
265 }
266
267 /// Create Instance from raw data
268 pub fn from_raw(data: Vec<u8>) -> Result<Self, Error> {
269 if data.len() < std::mem::size_of::<DataBlobHeader>() {
270 bail!("blob too small ({} bytes).", data.len());
271 }
272
273 let magic = &data[0..8];
274
275 if magic == ENCR_COMPR_BLOB_MAGIC_1_0 || magic == ENCRYPTED_BLOB_MAGIC_1_0 {
276 if data.len() < std::mem::size_of::<EncryptedDataBlobHeader>() {
277 bail!("encrypted blob too small ({} bytes).", data.len());
278 }
279
280 let blob = DataBlob { raw_data: data };
281
282 Ok(blob)
283 } else if magic == COMPRESSED_BLOB_MAGIC_1_0 || magic == UNCOMPRESSED_BLOB_MAGIC_1_0 {
284 let blob = DataBlob { raw_data: data };
285
286 Ok(blob)
287 } else {
288 bail!("unable to parse raw blob - wrong magic");
289 }
290 }
291
292 /// Returns if chunk is encrypted
293 pub fn is_encrypted(&self) -> bool {
294 let magic = self.magic();
295 magic == &ENCR_COMPR_BLOB_MAGIC_1_0 || magic == &ENCRYPTED_BLOB_MAGIC_1_0
296 }
297
298 /// Returns if chunk is compressed
299 pub fn is_compressed(&self) -> bool {
300 let magic = self.magic();
301 magic == &ENCR_COMPR_BLOB_MAGIC_1_0 || magic == &COMPRESSED_BLOB_MAGIC_1_0
302 }
303
304 /// Verify digest and data length for unencrypted chunks.
305 ///
306 /// To do that, we need to decompress data first. Please note that
307 /// this is not possible for encrypted chunks. This function simply return Ok
308 /// for encrypted chunks.
309 /// Note: This does not call verify_crc, because this is usually done in load
310 pub fn verify_unencrypted(
311 &self,
312 expected_chunk_size: usize,
313 expected_digest: &[u8; 32],
314 ) -> Result<(), Error> {
315 let magic = self.magic();
316
317 if magic == &ENCR_COMPR_BLOB_MAGIC_1_0 || magic == &ENCRYPTED_BLOB_MAGIC_1_0 {
318 return Ok(());
319 }
320
321 // verifies digest!
322 let data = self.decode(None, Some(expected_digest))?;
323
324 if expected_chunk_size != data.len() {
325 bail!(
326 "detected chunk with wrong length ({} != {})",
327 expected_chunk_size,
328 data.len()
329 );
330 }
331
332 Ok(())
333 }
334
335 fn verify_digest(
336 data: &[u8],
337 config: Option<&CryptConfig>,
338 expected_digest: &[u8; 32],
339 ) -> Result<(), Error> {
340 let digest = match config {
341 Some(config) => config.compute_digest(data),
342 None => openssl::sha::sha256(data),
343 };
344 if &digest != expected_digest {
345 bail!("detected chunk with wrong digest.");
346 }
347
348 Ok(())
349 }
350
351 /// Benchmark encryption speed
352 pub fn encrypt_benchmark<W: Write>(
353 config: &CryptConfig,
354 data: &[u8],
355 output: W,
356 ) -> Result<(), Error> {
357 let _ = Self::encrypt_to(config, data, output)?;
358 Ok(())
359 }
360
361 // Encrypt data using a random 16 byte IV.
362 //
363 // Writes encrypted data to ``output``, Return the used IV and computed MAC.
364 fn encrypt_to<W: Write>(
365 config: &CryptConfig,
366 data: &[u8],
367 mut output: W,
368 ) -> Result<([u8; 16], [u8; 16]), Error> {
369 let mut iv = [0u8; 16];
370 proxmox_sys::linux::fill_with_random_data(&mut iv)?;
371
372 let mut tag = [0u8; 16];
373
374 let mut c = config.data_crypter(&iv, Mode::Encrypt)?;
375
376 const BUFFER_SIZE: usize = 32 * 1024;
377
378 let mut encr_buf = [0u8; BUFFER_SIZE];
379 let max_encoder_input = BUFFER_SIZE - config.cipher().block_size();
380
381 let mut start = 0;
382 loop {
383 let mut end = start + max_encoder_input;
384 if end > data.len() {
385 end = data.len();
386 }
387 if end > start {
388 let count = c.update(&data[start..end], &mut encr_buf)?;
389 output.write_all(&encr_buf[..count])?;
390 start = end;
391 } else {
392 break;
393 }
394 }
395
396 let rest = c.finalize(&mut encr_buf)?;
397 if rest > 0 {
398 output.write_all(&encr_buf[..rest])?;
399 }
400
401 output.flush()?;
402
403 c.get_tag(&mut tag)?;
404
405 Ok((iv, tag))
406 }
407
408 // Decompress and decrypt data, verify MAC.
409 fn decode_compressed_chunk(
410 config: &CryptConfig,
411 data: &[u8],
412 iv: &[u8; 16],
413 tag: &[u8; 16],
414 ) -> Result<Vec<u8>, Error> {
415 let dec = Vec::with_capacity(1024 * 1024);
416
417 let mut decompressor = zstd::stream::write::Decoder::new(dec)?;
418
419 let mut c = config.data_crypter(iv, Mode::Decrypt)?;
420
421 const BUFFER_SIZE: usize = 32 * 1024;
422
423 let mut decr_buf = [0u8; BUFFER_SIZE];
424 let max_decoder_input = BUFFER_SIZE - config.cipher().block_size();
425
426 let mut start = 0;
427 loop {
428 let mut end = start + max_decoder_input;
429 if end > data.len() {
430 end = data.len();
431 }
432 if end > start {
433 let count = c.update(&data[start..end], &mut decr_buf)?;
434 decompressor.write_all(&decr_buf[0..count])?;
435 start = end;
436 } else {
437 break;
438 }
439 }
440
441 c.set_tag(tag)?;
442 let rest = c.finalize(&mut decr_buf)?;
443 if rest > 0 {
444 decompressor.write_all(&decr_buf[..rest])?;
445 }
446
447 decompressor.flush()?;
448
449 Ok(decompressor.into_inner())
450 }
451
452 // Decrypt data, verify tag.
453 fn decode_uncompressed_chunk(
454 config: &CryptConfig,
455 data: &[u8],
456 iv: &[u8; 16],
457 tag: &[u8; 16],
458 ) -> Result<Vec<u8>, Error> {
459 let decr_data = decrypt_aead(
460 *config.cipher(),
461 config.enc_key(),
462 Some(iv),
463 b"", //??
464 data,
465 tag,
466 )?;
467
468 Ok(decr_data)
469 }
470 }
471
472 /// Builder for chunk DataBlobs
473 ///
474 /// Main purpose is to centralize digest computation. Digest
475 /// computation differ for encryped chunk, and this interface ensures that
476 /// we always compute the correct one.
477 pub struct DataChunkBuilder<'a, 'b> {
478 config: Option<&'b CryptConfig>,
479 orig_data: &'a [u8],
480 digest_computed: bool,
481 digest: [u8; 32],
482 compress: bool,
483 }
484
485 impl<'a, 'b> DataChunkBuilder<'a, 'b> {
486 /// Create a new builder instance.
487 pub fn new(orig_data: &'a [u8]) -> Self {
488 Self {
489 orig_data,
490 config: None,
491 digest_computed: false,
492 digest: [0u8; 32],
493 compress: true,
494 }
495 }
496
497 /// Set compression flag.
498 ///
499 /// If true, chunk data is compressed using zstd (level 1).
500 pub fn compress(mut self, value: bool) -> Self {
501 self.compress = value;
502 self
503 }
504
505 /// Set encryption Configuration
506 ///
507 /// If set, chunks are encrypted
508 pub fn crypt_config(mut self, value: &'b CryptConfig) -> Self {
509 if self.digest_computed {
510 panic!("unable to set crypt_config after compute_digest().");
511 }
512 self.config = Some(value);
513 self
514 }
515
516 fn compute_digest(&mut self) {
517 if !self.digest_computed {
518 if let Some(config) = self.config {
519 self.digest = config.compute_digest(self.orig_data);
520 } else {
521 self.digest = openssl::sha::sha256(self.orig_data);
522 }
523 self.digest_computed = true;
524 }
525 }
526
527 /// Returns the chunk Digest
528 ///
529 /// Note: For encrypted chunks, this needs to be called after
530 /// ``crypt_config``.
531 pub fn digest(&mut self) -> &[u8; 32] {
532 if !self.digest_computed {
533 self.compute_digest();
534 }
535 &self.digest
536 }
537
538 /// Consume self and build the ``DataBlob``.
539 ///
540 /// Returns the blob and the computet digest.
541 pub fn build(mut self) -> Result<(DataBlob, [u8; 32]), Error> {
542 if !self.digest_computed {
543 self.compute_digest();
544 }
545
546 let chunk = DataBlob::encode(self.orig_data, self.config, self.compress)?;
547 Ok((chunk, self.digest))
548 }
549
550 /// Create a chunk filled with zeroes
551 pub fn build_zero_chunk(
552 crypt_config: Option<&CryptConfig>,
553 chunk_size: usize,
554 compress: bool,
555 ) -> Result<(DataBlob, [u8; 32]), Error> {
556 let zero_bytes = vec![0; chunk_size];
557 let mut chunk_builder = DataChunkBuilder::new(&zero_bytes).compress(compress);
558 if let Some(crypt_config) = crypt_config {
559 chunk_builder = chunk_builder.crypt_config(crypt_config);
560 }
561
562 chunk_builder.build()
563 }
564 }