]> git.proxmox.com Git - proxmox-backup.git/blame - src/backup/verify.rs
src/backup/verify.rs: cleanup use clause
[proxmox-backup.git] / src / backup / verify.rs
CommitLineData
2aaae970 1use std::collections::HashSet;
6b809ff5
DM
2use std::sync::{Arc, Mutex};
3use std::sync::atomic::{Ordering, AtomicUsize};
4use std::time::Instant;
2aaae970 5
3b2046d2 6use anyhow::{bail, format_err, Error};
c2009e53 7
ee7a308d
DM
8use crate::{
9 server::WorkerTask,
10 api2::types::*,
11 backup::{
12 DataStore,
13 DataBlob,
14 BackupGroup,
15 BackupDir,
16 BackupInfo,
17 IndexFile,
18 CryptMode,
19 FileInfo,
20 ArchiveType,
21 archive_type,
22 },
c2009e53
DM
23};
24
6b809ff5 25fn verify_blob(datastore: Arc<DataStore>, backup_dir: &BackupDir, info: &FileInfo) -> Result<(), Error> {
c2009e53 26
39f18b30 27 let blob = datastore.load_blob(backup_dir, &info.filename)?;
c2009e53 28
2aaae970 29 let raw_size = blob.raw_size();
c2009e53
DM
30 if raw_size != info.size {
31 bail!("wrong size ({} != {})", info.size, raw_size);
32 }
33
39f18b30 34 let csum = openssl::sha::sha256(blob.raw_data());
c2009e53
DM
35 if csum != info.csum {
36 bail!("wrong index checksum");
37 }
38
8819d1f2
FG
39 match blob.crypt_mode()? {
40 CryptMode::Encrypt => Ok(()),
41 CryptMode::None => {
42 // digest already verified above
43 blob.decode(None, None)?;
44 Ok(())
45 },
46 CryptMode::SignOnly => bail!("Invalid CryptMode for blob"),
c2009e53 47 }
c2009e53
DM
48}
49
0f3b7efa
SR
50fn rename_corrupted_chunk(
51 datastore: Arc<DataStore>,
52 digest: &[u8;32],
53 worker: Arc<WorkerTask>,
54) {
55 let (path, digest_str) = datastore.chunk_path(digest);
56
57 let mut counter = 0;
58 let mut new_path = path.clone();
aadcc281 59 loop {
0f3b7efa 60 new_path.set_file_name(format!("{}.{}.bad", digest_str, counter));
aadcc281 61 if new_path.exists() && counter < 9 { counter += 1; } else { break; }
0f3b7efa
SR
62 }
63
64 match std::fs::rename(&path, &new_path) {
65 Ok(_) => {
66 worker.log(format!("corrupted chunk renamed to {:?}", &new_path));
67 },
68 Err(err) => {
69 match err.kind() {
70 std::io::ErrorKind::NotFound => { /* ignored */ },
71 _ => worker.log(format!("could not rename corrupted chunk {:?} - {}", &path, err))
72 }
73 }
74 };
75}
76
6b809ff5
DM
77// We use a separate thread to read/load chunks, so that we can do
78// load and verify in parallel to increase performance.
79fn chunk_reader_thread(
80 datastore: Arc<DataStore>,
81 index: Box<dyn IndexFile + Send>,
82 verified_chunks: Arc<Mutex<HashSet<[u8;32]>>>,
83 corrupt_chunks: Arc<Mutex<HashSet<[u8;32]>>>,
84 errors: Arc<AtomicUsize>,
85 worker: Arc<WorkerTask>,
86) -> std::sync::mpsc::Receiver<(DataBlob, [u8;32], u64)> {
87
88 let (sender, receiver) = std::sync::mpsc::sync_channel(3); // buffer up to 3 chunks
89
90 std::thread::spawn(move|| {
91 for pos in 0..index.index_count() {
92 let info = index.chunk_info(pos).unwrap();
93 let size = info.range.end - info.range.start;
94
95 if verified_chunks.lock().unwrap().contains(&info.digest) {
96 continue; // already verified
97 }
98
99 if corrupt_chunks.lock().unwrap().contains(&info.digest) {
100 let digest_str = proxmox::tools::digest_to_hex(&info.digest);
101 worker.log(format!("chunk {} was marked as corrupt", digest_str));
102 errors.fetch_add(1, Ordering::SeqCst);
103 continue;
104 }
105
106 match datastore.load_chunk(&info.digest) {
107 Err(err) => {
108 corrupt_chunks.lock().unwrap().insert(info.digest);
109 worker.log(format!("can't verify chunk, load failed - {}", err));
110 errors.fetch_add(1, Ordering::SeqCst);
0f3b7efa 111 rename_corrupted_chunk(datastore.clone(), &info.digest, worker.clone());
6b809ff5
DM
112 continue;
113 }
114 Ok(chunk) => {
115 if sender.send((chunk, info.digest, size)).is_err() {
116 break; // receiver gone - simply stop
117 }
118 }
119 }
120 }
121 });
122
123 receiver
124}
125
fdaab0df 126fn verify_index_chunks(
6b809ff5
DM
127 datastore: Arc<DataStore>,
128 index: Box<dyn IndexFile + Send>,
129 verified_chunks: Arc<Mutex<HashSet<[u8;32]>>>,
130 corrupt_chunks: Arc<Mutex<HashSet<[u8; 32]>>>,
9a38fa29 131 crypt_mode: CryptMode,
6b809ff5 132 worker: Arc<WorkerTask>,
fdaab0df
DM
133) -> Result<(), Error> {
134
6b809ff5 135 let errors = Arc::new(AtomicUsize::new(0));
fdaab0df 136
6b809ff5 137 let start_time = Instant::now();
fdaab0df 138
6b809ff5 139 let chunk_channel = chunk_reader_thread(
0f3b7efa 140 datastore.clone(),
6b809ff5
DM
141 index,
142 verified_chunks.clone(),
143 corrupt_chunks.clone(),
144 errors.clone(),
145 worker.clone(),
146 );
7ae571e7 147
6b809ff5
DM
148 let mut read_bytes = 0;
149 let mut decoded_bytes = 0;
7ae571e7 150
6b809ff5 151 loop {
2aaae970 152
6b809ff5 153 worker.fail_on_abort()?;
deef6369 154 crate::tools::fail_on_shutdown()?;
6b809ff5
DM
155
156 let (chunk, digest, size) = match chunk_channel.recv() {
157 Ok(tuple) => tuple,
158 Err(std::sync::mpsc::RecvError) => break,
9a38fa29
FG
159 };
160
6b809ff5
DM
161 read_bytes += chunk.raw_size();
162 decoded_bytes += size;
163
9a38fa29
FG
164 let chunk_crypt_mode = match chunk.crypt_mode() {
165 Err(err) => {
6b809ff5 166 corrupt_chunks.lock().unwrap().insert(digest);
9a38fa29 167 worker.log(format!("can't verify chunk, unknown CryptMode - {}", err));
6b809ff5 168 errors.fetch_add(1, Ordering::SeqCst);
9a38fa29
FG
169 continue;
170 },
171 Ok(mode) => mode,
172 };
173
174 if chunk_crypt_mode != crypt_mode {
175 worker.log(format!(
176 "chunk CryptMode {:?} does not match index CryptMode {:?}",
177 chunk_crypt_mode,
178 crypt_mode
179 ));
6b809ff5 180 errors.fetch_add(1, Ordering::SeqCst);
9a38fa29
FG
181 }
182
6b809ff5
DM
183 if let Err(err) = chunk.verify_unencrypted(size as usize, &digest) {
184 corrupt_chunks.lock().unwrap().insert(digest);
7ae571e7 185 worker.log(format!("{}", err));
6b809ff5 186 errors.fetch_add(1, Ordering::SeqCst);
0f3b7efa 187 rename_corrupted_chunk(datastore.clone(), &digest, worker.clone());
7ae571e7 188 } else {
6b809ff5 189 verified_chunks.lock().unwrap().insert(digest);
2aaae970 190 }
fdaab0df
DM
191 }
192
6b809ff5
DM
193 let elapsed = start_time.elapsed().as_secs_f64();
194
195 let read_bytes_mib = (read_bytes as f64)/(1024.0*1024.0);
196 let decoded_bytes_mib = (decoded_bytes as f64)/(1024.0*1024.0);
197
198 let read_speed = read_bytes_mib/elapsed;
199 let decode_speed = decoded_bytes_mib/elapsed;
200
201 let error_count = errors.load(Ordering::SeqCst);
202
7c77e2f9 203 worker.log(format!(" verified {:.2}/{:.2} MiB in {:.2} seconds, speed {:.2}/{:.2} MiB/s ({} errors)",
6b809ff5
DM
204 read_bytes_mib, decoded_bytes_mib, elapsed, read_speed, decode_speed, error_count));
205
206 if errors.load(Ordering::SeqCst) > 0 {
f66f537d
DC
207 bail!("chunks could not be verified");
208 }
209
fdaab0df
DM
210 Ok(())
211}
212
2aaae970 213fn verify_fixed_index(
6b809ff5 214 datastore: Arc<DataStore>,
2aaae970
DM
215 backup_dir: &BackupDir,
216 info: &FileInfo,
6b809ff5
DM
217 verified_chunks: Arc<Mutex<HashSet<[u8;32]>>>,
218 corrupt_chunks: Arc<Mutex<HashSet<[u8;32]>>>,
219 worker: Arc<WorkerTask>,
2aaae970 220) -> Result<(), Error> {
c2009e53
DM
221
222 let mut path = backup_dir.relative_path();
223 path.push(&info.filename);
224
225 let index = datastore.open_fixed_reader(&path)?;
226
227 let (csum, size) = index.compute_csum();
228 if size != info.size {
229 bail!("wrong size ({} != {})", info.size, size);
230 }
231
232 if csum != info.csum {
233 bail!("wrong index checksum");
234 }
235
9a38fa29 236 verify_index_chunks(datastore, Box::new(index), verified_chunks, corrupt_chunks, info.chunk_crypt_mode(), worker)
c2009e53
DM
237}
238
2aaae970 239fn verify_dynamic_index(
6b809ff5 240 datastore: Arc<DataStore>,
2aaae970
DM
241 backup_dir: &BackupDir,
242 info: &FileInfo,
6b809ff5
DM
243 verified_chunks: Arc<Mutex<HashSet<[u8;32]>>>,
244 corrupt_chunks: Arc<Mutex<HashSet<[u8;32]>>>,
245 worker: Arc<WorkerTask>,
2aaae970
DM
246) -> Result<(), Error> {
247
c2009e53
DM
248 let mut path = backup_dir.relative_path();
249 path.push(&info.filename);
250
251 let index = datastore.open_dynamic_reader(&path)?;
252
253 let (csum, size) = index.compute_csum();
254 if size != info.size {
255 bail!("wrong size ({} != {})", info.size, size);
256 }
257
258 if csum != info.csum {
259 bail!("wrong index checksum");
260 }
261
9a38fa29 262 verify_index_chunks(datastore, Box::new(index), verified_chunks, corrupt_chunks, info.chunk_crypt_mode(), worker)
c2009e53
DM
263}
264
265/// Verify a single backup snapshot
266///
267/// This checks all archives inside a backup snapshot.
268/// Errors are logged to the worker log.
269///
8ea00f6e
DM
270/// Returns
271/// - Ok(true) if verify is successful
272/// - Ok(false) if there were verification errors
273/// - Err(_) if task was aborted
2aaae970 274pub fn verify_backup_dir(
6b809ff5 275 datastore: Arc<DataStore>,
2aaae970 276 backup_dir: &BackupDir,
6b809ff5
DM
277 verified_chunks: Arc<Mutex<HashSet<[u8;32]>>>,
278 corrupt_chunks: Arc<Mutex<HashSet<[u8;32]>>>,
279 worker: Arc<WorkerTask>
2aaae970 280) -> Result<bool, Error> {
c2009e53 281
3b2046d2 282 let mut manifest = match datastore.load_manifest(&backup_dir) {
ff86ef00 283 Ok((manifest, _)) => manifest,
c2009e53
DM
284 Err(err) => {
285 worker.log(format!("verify {}:{} - manifest load error: {}", datastore.name(), backup_dir, err));
8ea00f6e 286 return Ok(false);
c2009e53
DM
287 }
288 };
289
290 worker.log(format!("verify {}:{}", datastore.name(), backup_dir));
291
292 let mut error_count = 0;
293
d10332a1 294 let mut verify_result = VerifyState::Ok;
c2009e53
DM
295 for info in manifest.files() {
296 let result = proxmox::try_block!({
297 worker.log(format!(" check {}", info.filename));
298 match archive_type(&info.filename)? {
d8594d87
DC
299 ArchiveType::FixedIndex =>
300 verify_fixed_index(
6b809ff5 301 datastore.clone(),
d8594d87
DC
302 &backup_dir,
303 info,
6b809ff5
DM
304 verified_chunks.clone(),
305 corrupt_chunks.clone(),
306 worker.clone(),
d8594d87
DC
307 ),
308 ArchiveType::DynamicIndex =>
309 verify_dynamic_index(
6b809ff5 310 datastore.clone(),
d8594d87
DC
311 &backup_dir,
312 info,
6b809ff5
DM
313 verified_chunks.clone(),
314 corrupt_chunks.clone(),
315 worker.clone(),
d8594d87 316 ),
6b809ff5 317 ArchiveType::Blob => verify_blob(datastore.clone(), &backup_dir, info),
c2009e53
DM
318 }
319 });
8ea00f6e
DM
320
321 worker.fail_on_abort()?;
deef6369 322 crate::tools::fail_on_shutdown()?;
8ea00f6e 323
c2009e53
DM
324 if let Err(err) = result {
325 worker.log(format!("verify {}:{}/{} failed: {}", datastore.name(), backup_dir, info.filename, err));
326 error_count += 1;
d10332a1 327 verify_result = VerifyState::Failed;
c2009e53 328 }
3b2046d2 329
c2009e53
DM
330 }
331
3b2046d2 332 let verify_state = SnapshotVerifyState {
d10332a1 333 state: verify_result,
3b2046d2
TL
334 upid: worker.upid().clone(),
335 };
336 manifest.unprotected["verify_state"] = serde_json::to_value(verify_state)?;
337 datastore.store_manifest(&backup_dir, serde_json::to_value(manifest)?)
338 .map_err(|err| format_err!("unable to store manifest blob - {}", err))?;
339
8ea00f6e 340 Ok(error_count == 0)
c2009e53
DM
341}
342
8ea00f6e
DM
343/// Verify all backups inside a backup group
344///
345/// Errors are logged to the worker log.
346///
347/// Returns
63d9aca9 348/// - Ok((count, failed_dirs)) where failed_dirs had verification errors
8ea00f6e 349/// - Err(_) if task was aborted
4f09d310
DM
350pub fn verify_backup_group(
351 datastore: Arc<DataStore>,
352 group: &BackupGroup,
353 verified_chunks: Arc<Mutex<HashSet<[u8;32]>>>,
354 corrupt_chunks: Arc<Mutex<HashSet<[u8;32]>>>,
63d9aca9 355 progress: Option<(usize, usize)>, // (done, snapshot_count)
4f09d310 356 worker: Arc<WorkerTask>,
63d9aca9 357) -> Result<(usize, Vec<String>), Error> {
c2009e53 358
adfdc369 359 let mut errors = Vec::new();
c2009e53
DM
360 let mut list = match group.list_backups(&datastore.base_path()) {
361 Ok(list) => list,
362 Err(err) => {
363 worker.log(format!("verify group {}:{} - unable to list backups: {}", datastore.name(), group, err));
63d9aca9 364 return Ok((0, errors));
c2009e53
DM
365 }
366 };
367
368 worker.log(format!("verify group {}:{}", datastore.name(), group));
369
63d9aca9
DM
370 let (done, snapshot_count) = progress.unwrap_or((0, list.len()));
371
372 let mut count = 0;
c2009e53
DM
373 BackupInfo::sort_list(&mut list, false); // newest first
374 for info in list {
63d9aca9 375 count += 1;
6b809ff5 376 if !verify_backup_dir(datastore.clone(), &info.backup_dir, verified_chunks.clone(), corrupt_chunks.clone(), worker.clone())?{
adfdc369 377 errors.push(info.backup_dir.to_string());
c2009e53 378 }
63d9aca9
DM
379 if snapshot_count != 0 {
380 let pos = done + count;
381 let percentage = ((pos as f64) * 100.0)/(snapshot_count as f64);
382 worker.log(format!("percentage done: {:.2}% ({} of {} snapshots)", percentage, pos, snapshot_count));
383 }
c2009e53
DM
384 }
385
63d9aca9 386 Ok((count, errors))
c2009e53
DM
387}
388
8ea00f6e
DM
389/// Verify all backups inside a datastore
390///
391/// Errors are logged to the worker log.
392///
393/// Returns
adfdc369 394/// - Ok(failed_dirs) where failed_dirs had verification errors
8ea00f6e 395/// - Err(_) if task was aborted
6b809ff5 396pub fn verify_all_backups(datastore: Arc<DataStore>, worker: Arc<WorkerTask>) -> Result<Vec<String>, Error> {
adfdc369
DC
397
398 let mut errors = Vec::new();
c2009e53 399
4264c502 400 let mut list = match BackupGroup::list_groups(&datastore.base_path()) {
5656888c
DM
401 Ok(list) => list
402 .into_iter()
403 .filter(|group| !(group.backup_type() == "host" && group.backup_id() == "benchmark"))
404 .collect::<Vec<BackupGroup>>(),
c2009e53
DM
405 Err(err) => {
406 worker.log(format!("verify datastore {} - unable to list backups: {}", datastore.name(), err));
adfdc369 407 return Ok(errors);
c2009e53
DM
408 }
409 };
410
4264c502
DM
411 list.sort_unstable();
412
63d9aca9
DM
413 let mut snapshot_count = 0;
414 for group in list.iter() {
415 snapshot_count += group.list_backups(&datastore.base_path())?.len();
416 }
417
4f09d310
DM
418 // start with 16384 chunks (up to 65GB)
419 let verified_chunks = Arc::new(Mutex::new(HashSet::with_capacity(1024*16)));
420
421 // start with 64 chunks since we assume there are few corrupt ones
422 let corrupt_chunks = Arc::new(Mutex::new(HashSet::with_capacity(64)));
423
63d9aca9 424 worker.log(format!("verify datastore {} ({} snapshots)", datastore.name(), snapshot_count));
c2009e53 425
63d9aca9 426 let mut done = 0;
c2009e53 427 for group in list {
63d9aca9 428 let (count, mut group_errors) = verify_backup_group(
4f09d310
DM
429 datastore.clone(),
430 &group,
431 verified_chunks.clone(),
432 corrupt_chunks.clone(),
63d9aca9 433 Some((done, snapshot_count)),
4f09d310
DM
434 worker.clone(),
435 )?;
adfdc369 436 errors.append(&mut group_errors);
63d9aca9
DM
437
438 done += count;
c2009e53
DM
439 }
440
adfdc369 441 Ok(errors)
c2009e53 442}