]> git.proxmox.com Git - proxmox-backup.git/blame - src/backup/data_blob.rs
switch from failure to anyhow
[proxmox-backup.git] / src / backup / data_blob.rs
CommitLineData
f7d4e4b5 1use anyhow::{bail, Error};
3025b3a5 2use std::convert::TryInto;
3025b3a5 3
5485b579 4use proxmox::tools::io::{ReadExt, WriteExt};
9f83e0f7 5
781ac11c
DM
6const MAX_BLOB_SIZE: usize = 128*1024*1024;
7
4ee8f53d
DM
8use super::file_formats::*;
9use super::CryptConfig;
10
11/// Encoded data chunk with digest and positional information
12pub struct ChunkInfo {
13 pub chunk: DataBlob,
14 pub digest: [u8; 32],
15 pub chunk_len: u64,
16 pub offset: u64,
17}
3025b3a5
DM
18
19/// Data blob binary storage format
20///
863be2e6 21/// Data blobs store arbitrary binary data (< 128MB), and can be
4ee8f53d
DM
22/// compressed and encrypted (or just signed). A simply binary format
23/// is used to store them on disk or transfer them over the network.
39a4df61
DM
24///
25/// Please use index files to store large data files (".fidx" of
26/// ".didx").
3025b3a5 27///
3025b3a5
DM
28pub struct DataBlob {
29 raw_data: Vec<u8>, // tagged, compressed, encryped data
30}
31
32impl DataBlob {
33
34 /// accessor to raw_data field
35 pub fn raw_data(&self) -> &[u8] {
36 &self.raw_data
37 }
38
cb08ac3e
DM
39 /// Consume self and returns raw_data
40 pub fn into_inner(self) -> Vec<u8> {
41 self.raw_data
42 }
43
3025b3a5
DM
44 /// accessor to chunk type (magic number)
45 pub fn magic(&self) -> &[u8; 8] {
46 self.raw_data[0..8].try_into().unwrap()
47 }
48
b7f4f27d
DM
49 /// accessor to crc32 checksum
50 pub fn crc(&self) -> u32 {
9ea4bce4 51 let crc_o = proxmox::offsetof!(DataBlobHeader, crc);
991abfa8 52 u32::from_le_bytes(self.raw_data[crc_o..crc_o+4].try_into().unwrap())
b7f4f27d
DM
53 }
54
55 // set the CRC checksum field
56 pub fn set_crc(&mut self, crc: u32) {
9ea4bce4 57 let crc_o = proxmox::offsetof!(DataBlobHeader, crc);
991abfa8 58 self.raw_data[crc_o..crc_o+4].copy_from_slice(&crc.to_le_bytes());
b7f4f27d
DM
59 }
60
61 /// compute the CRC32 checksum
cb08ac3e 62 pub fn compute_crc(&self) -> u32 {
b7f4f27d 63 let mut hasher = crc32fast::Hasher::new();
c638542b 64 let start = header_size(self.magic()); // start after HEAD
991abfa8 65 hasher.update(&self.raw_data[start..]);
b7f4f27d
DM
66 hasher.finalize()
67 }
68
b208da83
DM
69 /// verify the CRC32 checksum
70 pub fn verify_crc(&self) -> Result<(), Error> {
71 let expected_crc = self.compute_crc();
72 if expected_crc != self.crc() {
73 bail!("Data blob has wrong CRC checksum.");
74 }
75 Ok(())
76 }
77
69ecd8d5 78 /// Create a DataBlob, optionally compressed and/or encrypted
3025b3a5
DM
79 pub fn encode(
80 data: &[u8],
7123ff7d 81 config: Option<&CryptConfig>,
3025b3a5
DM
82 compress: bool,
83 ) -> Result<Self, Error> {
84
781ac11c 85 if data.len() > MAX_BLOB_SIZE {
3025b3a5
DM
86 bail!("data blob too large ({} bytes).", data.len());
87 }
88
f889b158 89 let mut blob = if let Some(config) = config {
3025b3a5 90
0066c6d9
DM
91 let compr_data;
92 let (_compress, data, magic) = if compress {
93 compr_data = zstd::block::compress(data, 1)?;
94 // Note: We only use compression if result is shorter
95 if compr_data.len() < data.len() {
96 (true, &compr_data[..], ENCR_COMPR_BLOB_MAGIC_1_0)
97 } else {
98 (false, data, ENCRYPTED_BLOB_MAGIC_1_0)
99 }
100 } else {
101 (false, data, ENCRYPTED_BLOB_MAGIC_1_0)
102 };
103
104 let header_len = std::mem::size_of::<EncryptedDataBlobHeader>();
105 let mut raw_data = Vec::with_capacity(data.len() + header_len);
106
107 let dummy_head = EncryptedDataBlobHeader {
108 head: DataBlobHeader { magic: [0u8; 8], crc: [0; 4] },
109 iv: [0u8; 16],
110 tag: [0u8; 16],
111 };
5485b579
WB
112 unsafe {
113 raw_data.write_le_value(dummy_head)?;
114 }
0066c6d9
DM
115
116 let (iv, tag) = config.encrypt_to(data, &mut raw_data)?;
117
118 let head = EncryptedDataBlobHeader {
119 head: DataBlobHeader { magic, crc: [0; 4] }, iv, tag,
120 };
121
5485b579
WB
122 unsafe {
123 (&mut raw_data[0..header_len]).write_le_value(head)?;
124 }
0066c6d9 125
f889b158 126 DataBlob { raw_data }
3025b3a5
DM
127 } else {
128
991abfa8 129 let max_data_len = data.len() + std::mem::size_of::<DataBlobHeader>();
3025b3a5 130 if compress {
991abfa8 131 let mut comp_data = Vec::with_capacity(max_data_len);
3025b3a5 132
991abfa8
DM
133 let head = DataBlobHeader {
134 magic: COMPRESSED_BLOB_MAGIC_1_0,
135 crc: [0; 4],
136 };
5485b579
WB
137 unsafe {
138 comp_data.write_le_value(head)?;
139 }
b7f4f27d 140
3025b3a5
DM
141 zstd::stream::copy_encode(data, &mut comp_data, 1)?;
142
991abfa8 143 if comp_data.len() < max_data_len {
eecb2356
DM
144 let mut blob = DataBlob { raw_data: comp_data };
145 blob.set_crc(blob.compute_crc());
146 return Ok(blob);
3025b3a5
DM
147 }
148 }
149
991abfa8 150 let mut raw_data = Vec::with_capacity(max_data_len);
3025b3a5 151
991abfa8
DM
152 let head = DataBlobHeader {
153 magic: UNCOMPRESSED_BLOB_MAGIC_1_0,
154 crc: [0; 4],
155 };
5485b579
WB
156 unsafe {
157 raw_data.write_le_value(head)?;
158 }
3025b3a5
DM
159 raw_data.extend_from_slice(data);
160
f889b158
DM
161 DataBlob { raw_data }
162 };
163
164 blob.set_crc(blob.compute_crc());
165
166 Ok(blob)
3025b3a5
DM
167 }
168
169 /// Decode blob data
7123ff7d 170 pub fn decode(self, config: Option<&CryptConfig>) -> Result<Vec<u8>, Error> {
3025b3a5
DM
171
172 let magic = self.magic();
173
174 if magic == &UNCOMPRESSED_BLOB_MAGIC_1_0 {
991abfa8 175 let data_start = std::mem::size_of::<DataBlobHeader>();
62ee2eb4 176 Ok(self.raw_data[data_start..].to_vec())
3025b3a5 177 } else if magic == &COMPRESSED_BLOB_MAGIC_1_0 {
991abfa8 178 let data_start = std::mem::size_of::<DataBlobHeader>();
781ac11c 179 let data = zstd::block::decompress(&self.raw_data[data_start..], MAX_BLOB_SIZE)?;
62ee2eb4 180 Ok(data)
3025b3a5 181 } else if magic == &ENCR_COMPR_BLOB_MAGIC_1_0 || magic == &ENCRYPTED_BLOB_MAGIC_1_0 {
9f83e0f7 182 let header_len = std::mem::size_of::<EncryptedDataBlobHeader>();
ba01828d
DM
183 let head = unsafe {
184 (&self.raw_data[..header_len]).read_le_value::<EncryptedDataBlobHeader>()?
185 };
9f83e0f7 186
3025b3a5
DM
187 if let Some(config) = config {
188 let data = if magic == &ENCR_COMPR_BLOB_MAGIC_1_0 {
9f83e0f7 189 config.decode_compressed_chunk(&self.raw_data[header_len..], &head.iv, &head.tag)?
3025b3a5 190 } else {
9f83e0f7 191 config.decode_uncompressed_chunk(&self.raw_data[header_len..], &head.iv, &head.tag)?
3025b3a5 192 };
62ee2eb4 193 Ok(data)
3025b3a5
DM
194 } else {
195 bail!("unable to decrypt blob - missing CryptConfig");
196 }
69ecd8d5
DM
197 } else if magic == &AUTH_COMPR_BLOB_MAGIC_1_0 || magic == &AUTHENTICATED_BLOB_MAGIC_1_0 {
198 let header_len = std::mem::size_of::<AuthenticatedDataBlobHeader>();
199 let head = unsafe {
200 (&self.raw_data[..header_len]).read_le_value::<AuthenticatedDataBlobHeader>()?
201 };
202
203 let data_start = std::mem::size_of::<AuthenticatedDataBlobHeader>();
204
205 // Note: only verify if we have a crypt config
206 if let Some(config) = config {
207 let signature = config.compute_auth_tag(&self.raw_data[data_start..]);
208 if signature != head.tag {
209 bail!("verifying blob signature failed");
210 }
211 }
212
213 if magic == &AUTH_COMPR_BLOB_MAGIC_1_0 {
214 let data = zstd::block::decompress(&self.raw_data[data_start..], 16*1024*1024)?;
62ee2eb4 215 Ok(data)
69ecd8d5 216 } else {
62ee2eb4 217 Ok(self.raw_data[data_start..].to_vec())
69ecd8d5 218 }
3025b3a5
DM
219 } else {
220 bail!("Invalid blob magic number.");
221 }
222 }
a38c5d4d 223
69ecd8d5
DM
224 /// Create a signed DataBlob, optionally compressed
225 pub fn create_signed(
226 data: &[u8],
7123ff7d 227 config: &CryptConfig,
69ecd8d5
DM
228 compress: bool,
229 ) -> Result<Self, Error> {
230
781ac11c 231 if data.len() > MAX_BLOB_SIZE {
69ecd8d5
DM
232 bail!("data blob too large ({} bytes).", data.len());
233 }
234
235 let compr_data;
236 let (_compress, data, magic) = if compress {
237 compr_data = zstd::block::compress(data, 1)?;
238 // Note: We only use compression if result is shorter
239 if compr_data.len() < data.len() {
240 (true, &compr_data[..], AUTH_COMPR_BLOB_MAGIC_1_0)
241 } else {
242 (false, data, AUTHENTICATED_BLOB_MAGIC_1_0)
243 }
244 } else {
245 (false, data, AUTHENTICATED_BLOB_MAGIC_1_0)
246 };
247
248 let header_len = std::mem::size_of::<AuthenticatedDataBlobHeader>();
249 let mut raw_data = Vec::with_capacity(data.len() + header_len);
250
251 let head = AuthenticatedDataBlobHeader {
252 head: DataBlobHeader { magic, crc: [0; 4] },
253 tag: config.compute_auth_tag(data),
254 };
255 unsafe {
256 raw_data.write_le_value(head)?;
257 }
258 raw_data.extend_from_slice(data);
259
f889b158
DM
260 let mut blob = DataBlob { raw_data };
261 blob.set_crc(blob.compute_crc());
262
62ee2eb4 263 Ok(blob)
69ecd8d5
DM
264 }
265
4ee8f53d
DM
266 /// Load blob from ``reader``
267 pub fn load(reader: &mut dyn std::io::Read) -> Result<Self, Error> {
268
269 let mut data = Vec::with_capacity(1024*1024);
270 reader.read_to_end(&mut data)?;
271
272 Self::from_raw(data)
273 }
274
a38c5d4d
DM
275 /// Create Instance from raw data
276 pub fn from_raw(data: Vec<u8>) -> Result<Self, Error> {
277
278 if data.len() < std::mem::size_of::<DataBlobHeader>() {
279 bail!("blob too small ({} bytes).", data.len());
280 }
281
282 let magic = &data[0..8];
283
284 if magic == ENCR_COMPR_BLOB_MAGIC_1_0 || magic == ENCRYPTED_BLOB_MAGIC_1_0 {
285
286 if data.len() < std::mem::size_of::<EncryptedDataBlobHeader>() {
287 bail!("encrypted blob too small ({} bytes).", data.len());
288 }
289
290 let blob = DataBlob { raw_data: data };
291
292 Ok(blob)
293 } else if magic == COMPRESSED_BLOB_MAGIC_1_0 || magic == UNCOMPRESSED_BLOB_MAGIC_1_0 {
294
295 let blob = DataBlob { raw_data: data };
296
69ecd8d5
DM
297 Ok(blob)
298 } else if magic == AUTH_COMPR_BLOB_MAGIC_1_0 || magic == AUTHENTICATED_BLOB_MAGIC_1_0 {
299 if data.len() < std::mem::size_of::<AuthenticatedDataBlobHeader>() {
300 bail!("authenticated blob too small ({} bytes).", data.len());
301 }
302
303 let blob = DataBlob { raw_data: data };
304
a38c5d4d
DM
305 Ok(blob)
306 } else {
307 bail!("unable to parse raw blob - wrong magic");
308 }
309 }
4ee8f53d
DM
310
311 /// Verify digest and data length for unencrypted chunks.
312 ///
313 /// To do that, we need to decompress data first. Please note that
314 /// this is noth possible for encrypted chunks.
315 pub fn verify_unencrypted(
316 &self,
317 expected_chunk_size: usize,
318 expected_digest: &[u8; 32],
319 ) -> Result<(), Error> {
320
321 let magic = self.magic();
322
323 let verify_raw_data = |data: &[u8]| {
324 if expected_chunk_size != data.len() {
325 bail!("detected chunk with wrong length ({} != {})", expected_chunk_size, data.len());
326 }
327 let digest = openssl::sha::sha256(data);
328 if &digest != expected_digest {
329 bail!("detected chunk with wrong digest.");
330 }
331 Ok(())
332 };
333
334 if magic == &COMPRESSED_BLOB_MAGIC_1_0 {
335 let data = zstd::block::decompress(&self.raw_data[12..], 16*1024*1024)?;
336 verify_raw_data(&data)?;
337 } else if magic == &UNCOMPRESSED_BLOB_MAGIC_1_0 {
338 verify_raw_data(&self.raw_data[12..])?;
339 }
340
341 Ok(())
342 }
343}
344
345/// Builder for chunk DataBlobs
346///
347/// Main purpose is to centralize digest computation. Digest
348/// computation differ for encryped chunk, and this interface ensures that
349/// we always compute the correct one.
7123ff7d
DM
350pub struct DataChunkBuilder<'a, 'b> {
351 config: Option<&'b CryptConfig>,
4ee8f53d
DM
352 orig_data: &'a [u8],
353 digest_computed: bool,
354 digest: [u8; 32],
355 compress: bool,
356}
357
7123ff7d 358impl <'a, 'b> DataChunkBuilder<'a, 'b> {
4ee8f53d
DM
359
360 /// Create a new builder instance.
361 pub fn new(orig_data: &'a [u8]) -> Self {
362 Self {
363 orig_data,
364 config: None,
365 digest_computed: false,
366 digest: [0u8; 32],
367 compress: true,
368 }
369 }
370
371 /// Set compression flag.
372 ///
373 /// If true, chunk data is compressed using zstd (level 1).
374 pub fn compress(mut self, value: bool) -> Self {
375 self.compress = value;
376 self
377 }
378
379 /// Set encryption Configuration
380 ///
381 /// If set, chunks are encrypted.
7123ff7d 382 pub fn crypt_config(mut self, value: &'b CryptConfig) -> Self {
4ee8f53d
DM
383 if self.digest_computed {
384 panic!("unable to set crypt_config after compute_digest().");
385 }
386 self.config = Some(value);
387 self
388 }
389
390 fn compute_digest(&mut self) {
391 if !self.digest_computed {
392 if let Some(ref config) = self.config {
393 self.digest = config.compute_digest(self.orig_data);
394 } else {
395 self.digest = openssl::sha::sha256(self.orig_data);
396 }
397 self.digest_computed = true;
398 }
399 }
400
401 /// Returns the chunk Digest
402 ///
403 /// Note: For encrypted chunks, this needs to be called after
404 /// ``crypt_config``.
405 pub fn digest(&mut self) -> &[u8; 32] {
406 if !self.digest_computed {
407 self.compute_digest();
408 }
409 &self.digest
410 }
411
412 /// Consume self and build the ``DataBlob``.
413 ///
414 /// Returns the blob and the computet digest.
415 pub fn build(mut self) -> Result<(DataBlob, [u8; 32]), Error> {
416 if !self.digest_computed {
417 self.compute_digest();
418 }
419
420 let chunk = DataBlob::encode(
421 self.orig_data,
422 self.config,
423 self.compress,
424 )?;
425
426 Ok((chunk, self.digest))
427 }
2745151d 428
4c9f753c
DM
429 /// Create a chunk filled with zeroes
430 pub fn build_zero_chunk(
431 crypt_config: Option<&CryptConfig>,
432 chunk_size: usize,
433 compress: bool,
434 ) -> Result<(DataBlob, [u8; 32]), Error> {
435
436 let mut zero_bytes = Vec::with_capacity(chunk_size);
437 zero_bytes.resize(chunk_size, 0u8);
438 let mut chunk_builder = DataChunkBuilder::new(&zero_bytes).compress(compress);
439 if let Some(ref crypt_config) = crypt_config {
440 chunk_builder = chunk_builder.crypt_config(crypt_config);
441 }
2745151d 442
4c9f753c 443 chunk_builder.build()
2745151d
DM
444 }
445
2745151d 446}