]> git.proxmox.com Git - proxmox-backup.git/blame - src/backup/data_chunk.rs
src/backup/backup_info.rs: also list .blob files
[proxmox-backup.git] / src / backup / data_chunk.rs
CommitLineData
b595cb9d
DM
1use failure::*;
2use std::convert::TryInto;
ba01828d 3use proxmox::tools::io::ops::ReadExtOps;
9f83e0f7 4use crate::tools::write::WriteUtilOps;
b595cb9d
DM
5
6use super::*;
7
bd0e3c7c
DM
8/// Data chunk with positional information
9pub struct ChunkInfo {
10 pub chunk: DataChunk,
11 pub chunk_len: u64,
12 pub offset: u64,
13}
b595cb9d
DM
14
15/// Data chunk binary storage format
16///
17/// Data chunks are identified by a unique digest, and can be
18/// compressed and encrypted. A simply binary format is used to store
19/// them on disk or transfer them over the network.
20///
b595cb9d
DM
21/// Please use the ``DataChunkBuilder`` to create new instances.
22pub struct DataChunk {
23 digest: [u8; 32],
24 raw_data: Vec<u8>, // tagged, compressed, encryped data
25}
26
27impl DataChunk {
28
29 /// accessor to raw_data field
30 pub fn raw_data(&self) -> &[u8] {
31 &self.raw_data
32 }
33
34 /// accessor to chunk digest field
35 pub fn digest(&self) -> &[u8; 32] {
36 &self.digest
37 }
38
39 /// accessor to chunk type (magic number)
40 pub fn magic(&self) -> &[u8; 8] {
41 self.raw_data[0..8].try_into().unwrap()
42 }
43
b7f4f27d
DM
44 /// accessor to crc32 checksum
45 pub fn crc(&self) -> u32 {
991abfa8
DM
46 let crc_o = proxmox::tools::offsetof!(DataChunkHeader, crc);
47 u32::from_le_bytes(self.raw_data[crc_o..crc_o+4].try_into().unwrap())
b7f4f27d 48 }
b595cb9d 49
b7f4f27d
DM
50 // set the CRC checksum field
51 pub fn set_crc(&mut self, crc: u32) {
991abfa8
DM
52 let crc_o = proxmox::tools::offsetof!(DataChunkHeader, crc);
53 self.raw_data[crc_o..crc_o+4].copy_from_slice(&crc.to_le_bytes());
b7f4f27d
DM
54 }
55
56 /// compute the CRC32 checksum
6f083b7a 57 pub fn compute_crc(&self) -> u32 {
b7f4f27d 58 let mut hasher = crc32fast::Hasher::new();
991abfa8
DM
59 let start = std::mem::size_of::<DataChunkHeader>(); // start after HEAD
60 hasher.update(&self.raw_data[start..]);
b7f4f27d
DM
61 hasher.finalize()
62 }
b595cb9d 63
991abfa8 64 fn encode(
b595cb9d
DM
65 data: &[u8],
66 config: Option<&CryptConfig>,
67 digest: [u8;32],
68 compress: bool,
69 ) -> Result<Self, Error> {
70
71 if let Some(config) = config {
72
0066c6d9
DM
73 let compr_data;
74 let (_compress, data, magic) = if compress {
75 compr_data = zstd::block::compress(data, 1)?;
76 // Note: We only use compression if result is shorter
77 if compr_data.len() < data.len() {
78 (true, &compr_data[..], ENCR_COMPR_CHUNK_MAGIC_1_0)
79 } else {
80 (false, data, ENCRYPTED_CHUNK_MAGIC_1_0)
81 }
82 } else {
83 (false, data, ENCRYPTED_CHUNK_MAGIC_1_0)
84 };
85
86 let header_len = std::mem::size_of::<EncryptedDataChunkHeader>();
87 let mut raw_data = Vec::with_capacity(data.len() + header_len);
88
89 let dummy_head = EncryptedDataChunkHeader {
90 head: DataChunkHeader { magic: [0u8; 8], crc: [0; 4] },
91 iv: [0u8; 16],
92 tag: [0u8; 16],
93 };
94 raw_data.write_value(&dummy_head)?;
95
96 let (iv, tag) = config.encrypt_to(data, &mut raw_data)?;
97
98 let head = EncryptedDataChunkHeader {
99 head: DataChunkHeader { magic, crc: [0; 4] }, iv, tag,
100 };
101
102 (&mut raw_data[0..header_len]).write_value(&head)?;
103
104 return Ok(DataChunk { digest, raw_data });
b595cb9d
DM
105 } else {
106
991abfa8 107 let max_data_len = data.len() + std::mem::size_of::<DataChunkHeader>();
b595cb9d 108 if compress {
991abfa8
DM
109 let mut comp_data = Vec::with_capacity(max_data_len);
110
111 let head = DataChunkHeader {
112 magic: COMPRESSED_CHUNK_MAGIC_1_0,
113 crc: [0; 4],
114 };
115 comp_data.write_value(&head)?;
b595cb9d 116
b595cb9d
DM
117 zstd::stream::copy_encode(data, &mut comp_data, 1)?;
118
991abfa8 119 if comp_data.len() < max_data_len {
dba72d50
DM
120 let chunk = DataChunk { digest, raw_data: comp_data };
121 return Ok(chunk);
122 }
123 }
b595cb9d 124
991abfa8 125 let mut raw_data = Vec::with_capacity(max_data_len);
b595cb9d 126
991abfa8
DM
127 let head = DataChunkHeader {
128 magic: UNCOMPRESSED_CHUNK_MAGIC_1_0,
129 crc: [0; 4],
130 };
131 raw_data.write_value(&head)?;
dba72d50 132 raw_data.extend_from_slice(data);
b595cb9d 133
dba72d50
DM
134 let chunk = DataChunk { digest, raw_data };
135 return Ok(chunk);
b595cb9d
DM
136 }
137 }
138
139 /// Decode chunk data
51929e45 140 pub fn decode(self, config: Option<&CryptConfig>) -> Result<Vec<u8>, Error> {
b595cb9d
DM
141
142 let magic = self.magic();
143
144 if magic == &UNCOMPRESSED_CHUNK_MAGIC_1_0 {
991abfa8
DM
145 let data_start = std::mem::size_of::<DataChunkHeader>();
146 return Ok(self.raw_data[data_start..].to_vec());
b595cb9d 147 } else if magic == &COMPRESSED_CHUNK_MAGIC_1_0 {
991abfa8
DM
148 let data_start = std::mem::size_of::<DataChunkHeader>();
149 let data = zstd::block::decompress(&self.raw_data[data_start..], 16*1024*1024)?;
b595cb9d 150 return Ok(data);
b595cb9d 151 } else if magic == &ENCR_COMPR_CHUNK_MAGIC_1_0 || magic == &ENCRYPTED_CHUNK_MAGIC_1_0 {
9f83e0f7 152 let header_len = std::mem::size_of::<EncryptedDataChunkHeader>();
ba01828d
DM
153 let head = unsafe {
154 (&self.raw_data[..header_len]).read_le_value::<EncryptedDataChunkHeader>()?
155 };
9f83e0f7 156
51929e45 157 if let Some(config) = config {
077a8cae 158 let data = if magic == &ENCR_COMPR_CHUNK_MAGIC_1_0 {
9f83e0f7 159 config.decode_compressed_chunk(&self.raw_data[header_len..], &head.iv, &head.tag)?
077a8cae 160 } else {
9f83e0f7 161 config.decode_uncompressed_chunk(&self.raw_data[header_len..], &head.iv, &head.tag)?
077a8cae 162 };
51929e45
DM
163 return Ok(data);
164 } else {
165 bail!("unable to decrypt chunk - missing CryptConfig");
166 }
b595cb9d
DM
167 } else {
168 bail!("Invalid chunk magic number.");
169 }
170 }
171
172 /// Load chunk data from ``reader``
173 ///
174 /// Please note that it is impossible to compute the digest for
175 /// encrypted chunks, so we need to trust and use the provided
176 /// ``digest``.
177 pub fn load(reader: &mut dyn std::io::Read, digest: [u8; 32]) -> Result<Self, Error> {
178
179 let mut data = Vec::with_capacity(1024*1024);
180 reader.read_to_end(&mut data)?;
181
bd0e3c7c
DM
182 Self::from_raw(data, digest)
183 }
184
185 /// Create Instance from raw data
186 pub fn from_raw(data: Vec<u8>, digest: [u8;32]) -> Result<Self, Error> {
187
991abfa8 188 if data.len() < std::mem::size_of::<DataChunkHeader>() {
b595cb9d
DM
189 bail!("chunk too small ({} bytes).", data.len());
190 }
191
192 let magic = &data[0..8];
193
194 if magic == ENCR_COMPR_CHUNK_MAGIC_1_0 || magic == ENCRYPTED_CHUNK_MAGIC_1_0 {
195
991abfa8 196 if data.len() < std::mem::size_of::<EncryptedDataChunkHeader>() {
b595cb9d
DM
197 bail!("encrypted chunk too small ({} bytes).", data.len());
198 }
199
200 let chunk = DataChunk { digest: digest, raw_data: data };
201
202 Ok(chunk)
203 } else if magic == COMPRESSED_CHUNK_MAGIC_1_0 || magic == UNCOMPRESSED_CHUNK_MAGIC_1_0 {
204
205 let chunk = DataChunk { digest: digest, raw_data: data };
206
207 Ok(chunk)
208 } else {
bd0e3c7c 209 bail!("unable to parse raw chunk - wrong magic");
b595cb9d
DM
210 }
211 }
fa148dbd
DM
212
213 /// Verify digest and data length for unencrypted chunks.
214 ///
215 /// To do that, we need to decompress data first. Please note that
216 /// this is noth possible for encrypted chunks.
217 pub fn verify_unencrypted(&self, expected_chunk_size: usize) -> Result<(), Error> {
218
991abfa8 219 let magic = self.magic();
fa148dbd
DM
220
221 let verify_raw_data = |data: &[u8]| {
222 if expected_chunk_size != data.len() {
223 bail!("detected chunk with wrong length ({} != {})", expected_chunk_size, data.len());
224 }
225 let digest = openssl::sha::sha256(data);
226 if digest != self.digest {
227 bail!("detected chunk with wrong digest.");
228 }
229 Ok(())
230 };
231
991abfa8 232 if magic == &COMPRESSED_CHUNK_MAGIC_1_0 {
6f083b7a
DM
233 let data = zstd::block::decompress(&self.raw_data[12..], 16*1024*1024)?;
234 verify_raw_data(&data)?;
991abfa8 235 } else if magic == &UNCOMPRESSED_CHUNK_MAGIC_1_0 {
b7f4f27d 236 verify_raw_data(&self.raw_data[12..])?;
fa148dbd
DM
237 }
238
239 Ok(())
240 }
b595cb9d
DM
241}
242
243/// Builder for DataChunk
244///
245/// Main purpose is to centralize digest computation. Digest
246/// computation differ for encryped chunk, and this interface ensures that
247/// we always compute the correct one.
248pub struct DataChunkBuilder<'a, 'b> {
249 config: Option<&'b CryptConfig>,
250 orig_data: &'a [u8],
251 digest_computed: bool,
252 digest: [u8; 32],
253 compress: bool,
254}
255
256impl <'a, 'b> DataChunkBuilder<'a, 'b> {
257
258 /// Create a new builder instance.
259 pub fn new(orig_data: &'a [u8]) -> Self {
260 Self {
261 orig_data,
262 config: None,
263 digest_computed: false,
264 digest: [0u8; 32],
bd0e3c7c 265 compress: true,
b595cb9d
DM
266 }
267 }
268
269 /// Set compression flag.
270 ///
271 /// If true, chunk data is compressed using zstd (level 1).
272 pub fn compress(mut self, value: bool) -> Self {
273 self.compress = value;
274 self
275 }
276
277 /// Set encryption Configuration
278 ///
279 /// If set, chunks are encrypted.
280 pub fn crypt_config(mut self, value: &'b CryptConfig) -> Self {
281 if self.digest_computed {
282 panic!("unable to set crypt_config after compute_digest().");
283 }
284 self.config = Some(value);
285 self
286 }
287
288 fn compute_digest(&mut self) {
289 if !self.digest_computed {
290 if let Some(config) = self.config {
291 self.digest = config.compute_digest(self.orig_data);
292 } else {
293 self.digest = openssl::sha::sha256(self.orig_data);
294 }
295 self.digest_computed = true;
296 }
297 }
298
299 /// Returns the chunk Digest
300 ///
301 /// Note: For encrypted chunks, this needs to be called after
302 /// ``crypt_config``.
303 pub fn digest(&mut self) -> &[u8; 32] {
304 if !self.digest_computed {
305 self.compute_digest();
306 }
307 &self.digest
308 }
309
310 /// Consume self and build the ``DataChunk``.
311 pub fn build(mut self) -> Result<DataChunk, Error> {
312 if !self.digest_computed {
313 self.compute_digest();
314 }
315
991abfa8
DM
316 let chunk = DataChunk::encode(
317 self.orig_data,
318 self.config,
319 self.digest,
320 self.compress,
321 )?;
b595cb9d
DM
322
323 Ok(chunk)
324 }
325}