]> git.proxmox.com Git - proxmox-backup.git/blame - src/backup/data_blob.rs
src/backup/verify.rs: use separate thread to load data
[proxmox-backup.git] / src / backup / data_blob.rs
CommitLineData
3638341a 1use anyhow::{bail, Error};
3025b3a5 2use std::convert::TryInto;
3025b3a5 3
5485b579 4use proxmox::tools::io::{ReadExt, WriteExt};
9f83e0f7 5
4ee8f53d 6use super::file_formats::*;
3b66040d
WB
7use super::{CryptConfig, CryptMode};
8
9const MAX_BLOB_SIZE: usize = 128*1024*1024;
4ee8f53d
DM
10
11/// Encoded data chunk with digest and positional information
12pub struct ChunkInfo {
13 pub chunk: DataBlob,
14 pub digest: [u8; 32],
15 pub chunk_len: u64,
16 pub offset: u64,
17}
3025b3a5
DM
18
19/// Data blob binary storage format
20///
863be2e6 21/// Data blobs store arbitrary binary data (< 128MB), and can be
4ee8f53d
DM
22/// compressed and encrypted (or just signed). A simply binary format
23/// is used to store them on disk or transfer them over the network.
39a4df61
DM
24///
25/// Please use index files to store large data files (".fidx" of
26/// ".didx").
3025b3a5 27///
3025b3a5
DM
28pub struct DataBlob {
29 raw_data: Vec<u8>, // tagged, compressed, encryped data
30}
31
32impl DataBlob {
33
34 /// accessor to raw_data field
35 pub fn raw_data(&self) -> &[u8] {
36 &self.raw_data
37 }
38
39f18b30
DM
39 /// Returns raw_data size
40 pub fn raw_size(&self) -> u64 {
41 self.raw_data.len() as u64
42 }
43
cb08ac3e
DM
44 /// Consume self and returns raw_data
45 pub fn into_inner(self) -> Vec<u8> {
46 self.raw_data
47 }
48
3025b3a5
DM
49 /// accessor to chunk type (magic number)
50 pub fn magic(&self) -> &[u8; 8] {
51 self.raw_data[0..8].try_into().unwrap()
52 }
53
b7f4f27d
DM
54 /// accessor to crc32 checksum
55 pub fn crc(&self) -> u32 {
9ea4bce4 56 let crc_o = proxmox::offsetof!(DataBlobHeader, crc);
991abfa8 57 u32::from_le_bytes(self.raw_data[crc_o..crc_o+4].try_into().unwrap())
b7f4f27d
DM
58 }
59
60 // set the CRC checksum field
61 pub fn set_crc(&mut self, crc: u32) {
9ea4bce4 62 let crc_o = proxmox::offsetof!(DataBlobHeader, crc);
991abfa8 63 self.raw_data[crc_o..crc_o+4].copy_from_slice(&crc.to_le_bytes());
b7f4f27d
DM
64 }
65
66 /// compute the CRC32 checksum
cb08ac3e 67 pub fn compute_crc(&self) -> u32 {
b7f4f27d 68 let mut hasher = crc32fast::Hasher::new();
c638542b 69 let start = header_size(self.magic()); // start after HEAD
991abfa8 70 hasher.update(&self.raw_data[start..]);
b7f4f27d
DM
71 hasher.finalize()
72 }
73
39f18b30
DM
74 // verify the CRC32 checksum
75 fn verify_crc(&self) -> Result<(), Error> {
b208da83
DM
76 let expected_crc = self.compute_crc();
77 if expected_crc != self.crc() {
78 bail!("Data blob has wrong CRC checksum.");
79 }
80 Ok(())
81 }
82
69ecd8d5 83 /// Create a DataBlob, optionally compressed and/or encrypted
3025b3a5
DM
84 pub fn encode(
85 data: &[u8],
7123ff7d 86 config: Option<&CryptConfig>,
3025b3a5
DM
87 compress: bool,
88 ) -> Result<Self, Error> {
89
781ac11c 90 if data.len() > MAX_BLOB_SIZE {
3025b3a5
DM
91 bail!("data blob too large ({} bytes).", data.len());
92 }
93
f889b158 94 let mut blob = if let Some(config) = config {
3025b3a5 95
0066c6d9
DM
96 let compr_data;
97 let (_compress, data, magic) = if compress {
98 compr_data = zstd::block::compress(data, 1)?;
99 // Note: We only use compression if result is shorter
100 if compr_data.len() < data.len() {
101 (true, &compr_data[..], ENCR_COMPR_BLOB_MAGIC_1_0)
102 } else {
103 (false, data, ENCRYPTED_BLOB_MAGIC_1_0)
104 }
105 } else {
106 (false, data, ENCRYPTED_BLOB_MAGIC_1_0)
107 };
108
109 let header_len = std::mem::size_of::<EncryptedDataBlobHeader>();
110 let mut raw_data = Vec::with_capacity(data.len() + header_len);
111
112 let dummy_head = EncryptedDataBlobHeader {
113 head: DataBlobHeader { magic: [0u8; 8], crc: [0; 4] },
114 iv: [0u8; 16],
115 tag: [0u8; 16],
116 };
5485b579
WB
117 unsafe {
118 raw_data.write_le_value(dummy_head)?;
119 }
0066c6d9
DM
120
121 let (iv, tag) = config.encrypt_to(data, &mut raw_data)?;
122
123 let head = EncryptedDataBlobHeader {
124 head: DataBlobHeader { magic, crc: [0; 4] }, iv, tag,
125 };
126
5485b579
WB
127 unsafe {
128 (&mut raw_data[0..header_len]).write_le_value(head)?;
129 }
0066c6d9 130
f889b158 131 DataBlob { raw_data }
3025b3a5
DM
132 } else {
133
991abfa8 134 let max_data_len = data.len() + std::mem::size_of::<DataBlobHeader>();
3025b3a5 135 if compress {
991abfa8 136 let mut comp_data = Vec::with_capacity(max_data_len);
3025b3a5 137
991abfa8
DM
138 let head = DataBlobHeader {
139 magic: COMPRESSED_BLOB_MAGIC_1_0,
140 crc: [0; 4],
141 };
5485b579
WB
142 unsafe {
143 comp_data.write_le_value(head)?;
144 }
b7f4f27d 145
3025b3a5
DM
146 zstd::stream::copy_encode(data, &mut comp_data, 1)?;
147
991abfa8 148 if comp_data.len() < max_data_len {
eecb2356
DM
149 let mut blob = DataBlob { raw_data: comp_data };
150 blob.set_crc(blob.compute_crc());
151 return Ok(blob);
3025b3a5
DM
152 }
153 }
154
991abfa8 155 let mut raw_data = Vec::with_capacity(max_data_len);
3025b3a5 156
991abfa8
DM
157 let head = DataBlobHeader {
158 magic: UNCOMPRESSED_BLOB_MAGIC_1_0,
159 crc: [0; 4],
160 };
5485b579
WB
161 unsafe {
162 raw_data.write_le_value(head)?;
163 }
3025b3a5
DM
164 raw_data.extend_from_slice(data);
165
f889b158
DM
166 DataBlob { raw_data }
167 };
168
169 blob.set_crc(blob.compute_crc());
170
171 Ok(blob)
3025b3a5
DM
172 }
173
3b66040d
WB
174 /// Get the encryption mode for this blob.
175 pub fn crypt_mode(&self) -> Result<CryptMode, Error> {
176 let magic = self.magic();
177
178 Ok(if magic == &UNCOMPRESSED_BLOB_MAGIC_1_0 || magic == &COMPRESSED_BLOB_MAGIC_1_0 {
179 CryptMode::None
180 } else if magic == &ENCR_COMPR_BLOB_MAGIC_1_0 || magic == &ENCRYPTED_BLOB_MAGIC_1_0 {
181 CryptMode::Encrypt
3b66040d
WB
182 } else {
183 bail!("Invalid blob magic number.");
184 })
185 }
186
3025b3a5 187 /// Decode blob data
8819d1f2 188 pub fn decode(&self, config: Option<&CryptConfig>, digest: Option<&[u8; 32]>) -> Result<Vec<u8>, Error> {
3025b3a5
DM
189
190 let magic = self.magic();
191
192 if magic == &UNCOMPRESSED_BLOB_MAGIC_1_0 {
991abfa8 193 let data_start = std::mem::size_of::<DataBlobHeader>();
8819d1f2
FG
194 let data = self.raw_data[data_start..].to_vec();
195 if let Some(digest) = digest {
196 Self::verify_digest(&data, None, digest)?;
197 }
198 Ok(data)
3025b3a5 199 } else if magic == &COMPRESSED_BLOB_MAGIC_1_0 {
991abfa8 200 let data_start = std::mem::size_of::<DataBlobHeader>();
781ac11c 201 let data = zstd::block::decompress(&self.raw_data[data_start..], MAX_BLOB_SIZE)?;
8819d1f2
FG
202 if let Some(digest) = digest {
203 Self::verify_digest(&data, None, digest)?;
204 }
62ee2eb4 205 Ok(data)
3025b3a5 206 } else if magic == &ENCR_COMPR_BLOB_MAGIC_1_0 || magic == &ENCRYPTED_BLOB_MAGIC_1_0 {
9f83e0f7 207 let header_len = std::mem::size_of::<EncryptedDataBlobHeader>();
ba01828d
DM
208 let head = unsafe {
209 (&self.raw_data[..header_len]).read_le_value::<EncryptedDataBlobHeader>()?
210 };
9f83e0f7 211
3025b3a5
DM
212 if let Some(config) = config {
213 let data = if magic == &ENCR_COMPR_BLOB_MAGIC_1_0 {
9f83e0f7 214 config.decode_compressed_chunk(&self.raw_data[header_len..], &head.iv, &head.tag)?
3025b3a5 215 } else {
9f83e0f7 216 config.decode_uncompressed_chunk(&self.raw_data[header_len..], &head.iv, &head.tag)?
3025b3a5 217 };
8819d1f2
FG
218 if let Some(digest) = digest {
219 Self::verify_digest(&data, Some(config), digest)?;
220 }
62ee2eb4 221 Ok(data)
3025b3a5
DM
222 } else {
223 bail!("unable to decrypt blob - missing CryptConfig");
224 }
225 } else {
226 bail!("Invalid blob magic number.");
227 }
228 }
a38c5d4d 229
39f18b30
DM
230 /// Load blob from ``reader``, verify CRC
231 pub fn load_from_reader(reader: &mut dyn std::io::Read) -> Result<Self, Error> {
4ee8f53d
DM
232
233 let mut data = Vec::with_capacity(1024*1024);
234 reader.read_to_end(&mut data)?;
235
39f18b30
DM
236 let blob = Self::from_raw(data)?;
237
238 blob.verify_crc()?;
239
240 Ok(blob)
4ee8f53d
DM
241 }
242
a38c5d4d
DM
243 /// Create Instance from raw data
244 pub fn from_raw(data: Vec<u8>) -> Result<Self, Error> {
245
246 if data.len() < std::mem::size_of::<DataBlobHeader>() {
247 bail!("blob too small ({} bytes).", data.len());
248 }
249
250 let magic = &data[0..8];
251
252 if magic == ENCR_COMPR_BLOB_MAGIC_1_0 || magic == ENCRYPTED_BLOB_MAGIC_1_0 {
253
254 if data.len() < std::mem::size_of::<EncryptedDataBlobHeader>() {
255 bail!("encrypted blob too small ({} bytes).", data.len());
256 }
257
258 let blob = DataBlob { raw_data: data };
259
260 Ok(blob)
261 } else if magic == COMPRESSED_BLOB_MAGIC_1_0 || magic == UNCOMPRESSED_BLOB_MAGIC_1_0 {
262
263 let blob = DataBlob { raw_data: data };
264
265 Ok(blob)
266 } else {
267 bail!("unable to parse raw blob - wrong magic");
268 }
269 }
4ee8f53d
DM
270
271 /// Verify digest and data length for unencrypted chunks.
272 ///
273 /// To do that, we need to decompress data first. Please note that
1090fd44
DM
274 /// this is not possible for encrypted chunks. This function simply return Ok
275 /// for encrypted chunks.
39f18b30 276 /// Note: This does not call verify_crc, because this is usually done in load
4ee8f53d
DM
277 pub fn verify_unencrypted(
278 &self,
279 expected_chunk_size: usize,
280 expected_digest: &[u8; 32],
281 ) -> Result<(), Error> {
282
283 let magic = self.magic();
284
1090fd44
DM
285 if magic == &ENCR_COMPR_BLOB_MAGIC_1_0 || magic == &ENCRYPTED_BLOB_MAGIC_1_0 {
286 return Ok(());
287 }
288
8819d1f2
FG
289 // verifies digest!
290 let data = self.decode(None, Some(expected_digest))?;
4ee8f53d 291
1090fd44
DM
292 if expected_chunk_size != data.len() {
293 bail!("detected chunk with wrong length ({} != {})", expected_chunk_size, data.len());
294 }
8819d1f2
FG
295
296 Ok(())
297 }
298
299 fn verify_digest(
300 data: &[u8],
301 config: Option<&CryptConfig>,
302 expected_digest: &[u8; 32],
303 ) -> Result<(), Error> {
304
305 let digest = match config {
306 Some(config) => config.compute_digest(data),
307 None => openssl::sha::sha256(&data),
308 };
1090fd44
DM
309 if &digest != expected_digest {
310 bail!("detected chunk with wrong digest.");
4ee8f53d
DM
311 }
312
313 Ok(())
314 }
315}
316
317/// Builder for chunk DataBlobs
318///
319/// Main purpose is to centralize digest computation. Digest
320/// computation differ for encryped chunk, and this interface ensures that
321/// we always compute the correct one.
7123ff7d
DM
322pub struct DataChunkBuilder<'a, 'b> {
323 config: Option<&'b CryptConfig>,
4ee8f53d
DM
324 orig_data: &'a [u8],
325 digest_computed: bool,
326 digest: [u8; 32],
327 compress: bool,
328}
329
7123ff7d 330impl <'a, 'b> DataChunkBuilder<'a, 'b> {
4ee8f53d
DM
331
332 /// Create a new builder instance.
333 pub fn new(orig_data: &'a [u8]) -> Self {
334 Self {
335 orig_data,
336 config: None,
337 digest_computed: false,
338 digest: [0u8; 32],
339 compress: true,
340 }
341 }
342
343 /// Set compression flag.
344 ///
345 /// If true, chunk data is compressed using zstd (level 1).
346 pub fn compress(mut self, value: bool) -> Self {
347 self.compress = value;
348 self
349 }
350
351 /// Set encryption Configuration
352 ///
3638341a
DM
353 /// If set, chunks are encrypted
354 pub fn crypt_config(mut self, value: &'b CryptConfig) -> Self {
4ee8f53d
DM
355 if self.digest_computed {
356 panic!("unable to set crypt_config after compute_digest().");
357 }
3638341a 358 self.config = Some(value);
4ee8f53d
DM
359 self
360 }
361
362 fn compute_digest(&mut self) {
363 if !self.digest_computed {
364 if let Some(ref config) = self.config {
365 self.digest = config.compute_digest(self.orig_data);
366 } else {
367 self.digest = openssl::sha::sha256(self.orig_data);
368 }
369 self.digest_computed = true;
370 }
371 }
372
373 /// Returns the chunk Digest
374 ///
375 /// Note: For encrypted chunks, this needs to be called after
376 /// ``crypt_config``.
377 pub fn digest(&mut self) -> &[u8; 32] {
378 if !self.digest_computed {
379 self.compute_digest();
380 }
381 &self.digest
382 }
383
384 /// Consume self and build the ``DataBlob``.
385 ///
386 /// Returns the blob and the computet digest.
387 pub fn build(mut self) -> Result<(DataBlob, [u8; 32]), Error> {
388 if !self.digest_computed {
389 self.compute_digest();
390 }
391
3638341a 392 let chunk = DataBlob::encode(self.orig_data, self.config, self.compress)?;
4ee8f53d
DM
393 Ok((chunk, self.digest))
394 }
2745151d 395
4c9f753c
DM
396 /// Create a chunk filled with zeroes
397 pub fn build_zero_chunk(
398 crypt_config: Option<&CryptConfig>,
399 chunk_size: usize,
400 compress: bool,
401 ) -> Result<(DataBlob, [u8; 32]), Error> {
402
403 let mut zero_bytes = Vec::with_capacity(chunk_size);
404 zero_bytes.resize(chunk_size, 0u8);
405 let mut chunk_builder = DataChunkBuilder::new(&zero_bytes).compress(compress);
406 if let Some(ref crypt_config) = crypt_config {
3638341a 407 chunk_builder = chunk_builder.crypt_config(crypt_config);
4c9f753c 408 }
2745151d 409
4c9f753c 410 chunk_builder.build()
2745151d
DM
411 }
412
2745151d 413}