]> git.proxmox.com Git - proxmox-backup.git/blob - src/backup/data_blob.rs
12e1984242eaac8eafb12fcd2b0c0f4b473fe3df
[proxmox-backup.git] / src / backup / data_blob.rs
1 use failure::*;
2 use std::convert::TryInto;
3 use std::sync::Arc;
4
5 use proxmox::tools::io::{ReadExt, WriteExt};
6
7 const MAX_BLOB_SIZE: usize = 128*1024*1024;
8
9 use super::file_formats::*;
10 use super::CryptConfig;
11
12 /// Encoded data chunk with digest and positional information
13 pub struct ChunkInfo {
14 pub chunk: DataBlob,
15 pub digest: [u8; 32],
16 pub chunk_len: u64,
17 pub offset: u64,
18 }
19
20 /// Data blob binary storage format
21 ///
22 /// Data blobs store arbitrary binary data (< 128MB), and can be
23 /// compressed and encrypted (or just signed). A simply binary format
24 /// is used to store them on disk or transfer them over the network.
25 ///
26 /// Please use index files to store large data files (".fidx" of
27 /// ".didx").
28 ///
29 pub struct DataBlob {
30 raw_data: Vec<u8>, // tagged, compressed, encryped data
31 }
32
33 impl DataBlob {
34
35 /// accessor to raw_data field
36 pub fn raw_data(&self) -> &[u8] {
37 &self.raw_data
38 }
39
40 /// Consume self and returns raw_data
41 pub fn into_inner(self) -> Vec<u8> {
42 self.raw_data
43 }
44
45 /// accessor to chunk type (magic number)
46 pub fn magic(&self) -> &[u8; 8] {
47 self.raw_data[0..8].try_into().unwrap()
48 }
49
50 /// accessor to crc32 checksum
51 pub fn crc(&self) -> u32 {
52 let crc_o = proxmox::tools::offsetof!(DataBlobHeader, crc);
53 u32::from_le_bytes(self.raw_data[crc_o..crc_o+4].try_into().unwrap())
54 }
55
56 // set the CRC checksum field
57 pub fn set_crc(&mut self, crc: u32) {
58 let crc_o = proxmox::tools::offsetof!(DataBlobHeader, crc);
59 self.raw_data[crc_o..crc_o+4].copy_from_slice(&crc.to_le_bytes());
60 }
61
62 /// compute the CRC32 checksum
63 pub fn compute_crc(&self) -> u32 {
64 let mut hasher = crc32fast::Hasher::new();
65 let start = header_size(self.magic()); // start after HEAD
66 hasher.update(&self.raw_data[start..]);
67 hasher.finalize()
68 }
69
70 /// verify the CRC32 checksum
71 pub fn verify_crc(&self) -> Result<(), Error> {
72 let expected_crc = self.compute_crc();
73 if expected_crc != self.crc() {
74 bail!("Data blob has wrong CRC checksum.");
75 }
76 Ok(())
77 }
78
79 /// Create a DataBlob, optionally compressed and/or encrypted
80 pub fn encode(
81 data: &[u8],
82 config: Option<Arc<CryptConfig>>,
83 compress: bool,
84 ) -> Result<Self, Error> {
85
86 if data.len() > MAX_BLOB_SIZE {
87 bail!("data blob too large ({} bytes).", data.len());
88 }
89
90 let mut blob = if let Some(config) = config {
91
92 let compr_data;
93 let (_compress, data, magic) = if compress {
94 compr_data = zstd::block::compress(data, 1)?;
95 // Note: We only use compression if result is shorter
96 if compr_data.len() < data.len() {
97 (true, &compr_data[..], ENCR_COMPR_BLOB_MAGIC_1_0)
98 } else {
99 (false, data, ENCRYPTED_BLOB_MAGIC_1_0)
100 }
101 } else {
102 (false, data, ENCRYPTED_BLOB_MAGIC_1_0)
103 };
104
105 let header_len = std::mem::size_of::<EncryptedDataBlobHeader>();
106 let mut raw_data = Vec::with_capacity(data.len() + header_len);
107
108 let dummy_head = EncryptedDataBlobHeader {
109 head: DataBlobHeader { magic: [0u8; 8], crc: [0; 4] },
110 iv: [0u8; 16],
111 tag: [0u8; 16],
112 };
113 unsafe {
114 raw_data.write_le_value(dummy_head)?;
115 }
116
117 let (iv, tag) = config.encrypt_to(data, &mut raw_data)?;
118
119 let head = EncryptedDataBlobHeader {
120 head: DataBlobHeader { magic, crc: [0; 4] }, iv, tag,
121 };
122
123 unsafe {
124 (&mut raw_data[0..header_len]).write_le_value(head)?;
125 }
126
127 DataBlob { raw_data }
128 } else {
129
130 let max_data_len = data.len() + std::mem::size_of::<DataBlobHeader>();
131 if compress {
132 let mut comp_data = Vec::with_capacity(max_data_len);
133
134 let head = DataBlobHeader {
135 magic: COMPRESSED_BLOB_MAGIC_1_0,
136 crc: [0; 4],
137 };
138 unsafe {
139 comp_data.write_le_value(head)?;
140 }
141
142 zstd::stream::copy_encode(data, &mut comp_data, 1)?;
143
144 if comp_data.len() < max_data_len {
145 let mut blob = DataBlob { raw_data: comp_data };
146 blob.set_crc(blob.compute_crc());
147 return Ok(blob);
148 }
149 }
150
151 let mut raw_data = Vec::with_capacity(max_data_len);
152
153 let head = DataBlobHeader {
154 magic: UNCOMPRESSED_BLOB_MAGIC_1_0,
155 crc: [0; 4],
156 };
157 unsafe {
158 raw_data.write_le_value(head)?;
159 }
160 raw_data.extend_from_slice(data);
161
162 DataBlob { raw_data }
163 };
164
165 blob.set_crc(blob.compute_crc());
166
167 Ok(blob)
168 }
169
170 /// Decode blob data
171 pub fn decode(self, config: Option<Arc<CryptConfig>>) -> Result<Vec<u8>, Error> {
172
173 let magic = self.magic();
174
175 if magic == &UNCOMPRESSED_BLOB_MAGIC_1_0 {
176 let data_start = std::mem::size_of::<DataBlobHeader>();
177 return Ok(self.raw_data[data_start..].to_vec());
178 } else if magic == &COMPRESSED_BLOB_MAGIC_1_0 {
179 let data_start = std::mem::size_of::<DataBlobHeader>();
180 let data = zstd::block::decompress(&self.raw_data[data_start..], MAX_BLOB_SIZE)?;
181 return Ok(data);
182 } else if magic == &ENCR_COMPR_BLOB_MAGIC_1_0 || magic == &ENCRYPTED_BLOB_MAGIC_1_0 {
183 let header_len = std::mem::size_of::<EncryptedDataBlobHeader>();
184 let head = unsafe {
185 (&self.raw_data[..header_len]).read_le_value::<EncryptedDataBlobHeader>()?
186 };
187
188 if let Some(config) = config {
189 let data = if magic == &ENCR_COMPR_BLOB_MAGIC_1_0 {
190 config.decode_compressed_chunk(&self.raw_data[header_len..], &head.iv, &head.tag)?
191 } else {
192 config.decode_uncompressed_chunk(&self.raw_data[header_len..], &head.iv, &head.tag)?
193 };
194 return Ok(data);
195 } else {
196 bail!("unable to decrypt blob - missing CryptConfig");
197 }
198 } else if magic == &AUTH_COMPR_BLOB_MAGIC_1_0 || magic == &AUTHENTICATED_BLOB_MAGIC_1_0 {
199 let header_len = std::mem::size_of::<AuthenticatedDataBlobHeader>();
200 let head = unsafe {
201 (&self.raw_data[..header_len]).read_le_value::<AuthenticatedDataBlobHeader>()?
202 };
203
204 let data_start = std::mem::size_of::<AuthenticatedDataBlobHeader>();
205
206 // Note: only verify if we have a crypt config
207 if let Some(config) = config {
208 let signature = config.compute_auth_tag(&self.raw_data[data_start..]);
209 if signature != head.tag {
210 bail!("verifying blob signature failed");
211 }
212 }
213
214 if magic == &AUTH_COMPR_BLOB_MAGIC_1_0 {
215 let data = zstd::block::decompress(&self.raw_data[data_start..], 16*1024*1024)?;
216 return Ok(data);
217 } else {
218 return Ok(self.raw_data[data_start..].to_vec());
219 }
220 } else {
221 bail!("Invalid blob magic number.");
222 }
223 }
224
225 /// Create a signed DataBlob, optionally compressed
226 pub fn create_signed(
227 data: &[u8],
228 config: Arc<CryptConfig>,
229 compress: bool,
230 ) -> Result<Self, Error> {
231
232 if data.len() > MAX_BLOB_SIZE {
233 bail!("data blob too large ({} bytes).", data.len());
234 }
235
236 let compr_data;
237 let (_compress, data, magic) = if compress {
238 compr_data = zstd::block::compress(data, 1)?;
239 // Note: We only use compression if result is shorter
240 if compr_data.len() < data.len() {
241 (true, &compr_data[..], AUTH_COMPR_BLOB_MAGIC_1_0)
242 } else {
243 (false, data, AUTHENTICATED_BLOB_MAGIC_1_0)
244 }
245 } else {
246 (false, data, AUTHENTICATED_BLOB_MAGIC_1_0)
247 };
248
249 let header_len = std::mem::size_of::<AuthenticatedDataBlobHeader>();
250 let mut raw_data = Vec::with_capacity(data.len() + header_len);
251
252 let head = AuthenticatedDataBlobHeader {
253 head: DataBlobHeader { magic, crc: [0; 4] },
254 tag: config.compute_auth_tag(data),
255 };
256 unsafe {
257 raw_data.write_le_value(head)?;
258 }
259 raw_data.extend_from_slice(data);
260
261 let mut blob = DataBlob { raw_data };
262 blob.set_crc(blob.compute_crc());
263
264 return Ok(blob);
265 }
266
267 /// Load blob from ``reader``
268 pub fn load(reader: &mut dyn std::io::Read) -> Result<Self, Error> {
269
270 let mut data = Vec::with_capacity(1024*1024);
271 reader.read_to_end(&mut data)?;
272
273 Self::from_raw(data)
274 }
275
276 /// Create Instance from raw data
277 pub fn from_raw(data: Vec<u8>) -> Result<Self, Error> {
278
279 if data.len() < std::mem::size_of::<DataBlobHeader>() {
280 bail!("blob too small ({} bytes).", data.len());
281 }
282
283 let magic = &data[0..8];
284
285 if magic == ENCR_COMPR_BLOB_MAGIC_1_0 || magic == ENCRYPTED_BLOB_MAGIC_1_0 {
286
287 if data.len() < std::mem::size_of::<EncryptedDataBlobHeader>() {
288 bail!("encrypted blob too small ({} bytes).", data.len());
289 }
290
291 let blob = DataBlob { raw_data: data };
292
293 Ok(blob)
294 } else if magic == COMPRESSED_BLOB_MAGIC_1_0 || magic == UNCOMPRESSED_BLOB_MAGIC_1_0 {
295
296 let blob = DataBlob { raw_data: data };
297
298 Ok(blob)
299 } else if magic == AUTH_COMPR_BLOB_MAGIC_1_0 || magic == AUTHENTICATED_BLOB_MAGIC_1_0 {
300 if data.len() < std::mem::size_of::<AuthenticatedDataBlobHeader>() {
301 bail!("authenticated blob too small ({} bytes).", data.len());
302 }
303
304 let blob = DataBlob { raw_data: data };
305
306 Ok(blob)
307 } else {
308 bail!("unable to parse raw blob - wrong magic");
309 }
310 }
311
312 /// Verify digest and data length for unencrypted chunks.
313 ///
314 /// To do that, we need to decompress data first. Please note that
315 /// this is noth possible for encrypted chunks.
316 pub fn verify_unencrypted(
317 &self,
318 expected_chunk_size: usize,
319 expected_digest: &[u8; 32],
320 ) -> Result<(), Error> {
321
322 let magic = self.magic();
323
324 let verify_raw_data = |data: &[u8]| {
325 if expected_chunk_size != data.len() {
326 bail!("detected chunk with wrong length ({} != {})", expected_chunk_size, data.len());
327 }
328 let digest = openssl::sha::sha256(data);
329 if &digest != expected_digest {
330 bail!("detected chunk with wrong digest.");
331 }
332 Ok(())
333 };
334
335 if magic == &COMPRESSED_BLOB_MAGIC_1_0 {
336 let data = zstd::block::decompress(&self.raw_data[12..], 16*1024*1024)?;
337 verify_raw_data(&data)?;
338 } else if magic == &UNCOMPRESSED_BLOB_MAGIC_1_0 {
339 verify_raw_data(&self.raw_data[12..])?;
340 }
341
342 Ok(())
343 }
344 }
345
346 /// Builder for chunk DataBlobs
347 ///
348 /// Main purpose is to centralize digest computation. Digest
349 /// computation differ for encryped chunk, and this interface ensures that
350 /// we always compute the correct one.
351 pub struct DataChunkBuilder<'a> {
352 config: Option<Arc<CryptConfig>>,
353 orig_data: &'a [u8],
354 digest_computed: bool,
355 digest: [u8; 32],
356 compress: bool,
357 }
358
359 impl <'a> DataChunkBuilder<'a> {
360
361 /// Create a new builder instance.
362 pub fn new(orig_data: &'a [u8]) -> Self {
363 Self {
364 orig_data,
365 config: None,
366 digest_computed: false,
367 digest: [0u8; 32],
368 compress: true,
369 }
370 }
371
372 /// Set compression flag.
373 ///
374 /// If true, chunk data is compressed using zstd (level 1).
375 pub fn compress(mut self, value: bool) -> Self {
376 self.compress = value;
377 self
378 }
379
380 /// Set encryption Configuration
381 ///
382 /// If set, chunks are encrypted.
383 pub fn crypt_config(mut self, value: Arc<CryptConfig>) -> Self {
384 if self.digest_computed {
385 panic!("unable to set crypt_config after compute_digest().");
386 }
387 self.config = Some(value);
388 self
389 }
390
391 fn compute_digest(&mut self) {
392 if !self.digest_computed {
393 if let Some(ref config) = self.config {
394 self.digest = config.compute_digest(self.orig_data);
395 } else {
396 self.digest = openssl::sha::sha256(self.orig_data);
397 }
398 self.digest_computed = true;
399 }
400 }
401
402 /// Returns the chunk Digest
403 ///
404 /// Note: For encrypted chunks, this needs to be called after
405 /// ``crypt_config``.
406 pub fn digest(&mut self) -> &[u8; 32] {
407 if !self.digest_computed {
408 self.compute_digest();
409 }
410 &self.digest
411 }
412
413 /// Consume self and build the ``DataBlob``.
414 ///
415 /// Returns the blob and the computet digest.
416 pub fn build(mut self) -> Result<(DataBlob, [u8; 32]), Error> {
417 if !self.digest_computed {
418 self.compute_digest();
419 }
420
421 let chunk = DataBlob::encode(
422 self.orig_data,
423 self.config,
424 self.compress,
425 )?;
426
427 Ok((chunk, self.digest))
428 }
429 }