X-Git-Url: https://git.proxmox.com/?a=blobdiff_plain;f=src%2Fbackup%2Fverify.rs;h=e0e28ee9bbd057b43d4ee054969131961c9851ad;hb=09f6a24078661985e48b377a4bb933781ae2f495;hp=fd48d9074e46920d9831f54db82d6d5300d290c0;hpb=f21508b9e1cead5a790570cfa22f5af270c94511;p=proxmox-backup.git diff --git a/src/backup/verify.rs b/src/backup/verify.rs index fd48d907..e0e28ee9 100644 --- a/src/backup/verify.rs +++ b/src/backup/verify.rs @@ -2,25 +2,30 @@ use std::collections::HashSet; use std::sync::{Arc, Mutex}; use std::sync::atomic::{Ordering, AtomicUsize}; use std::time::Instant; +use nix::dir::Dir; use anyhow::{bail, format_err, Error}; use crate::{ - server::WorkerTask, api2::types::*, - tools::ParallelHandler, backup::{ DataStore, DataBlob, BackupGroup, BackupDir, BackupInfo, + BackupManifest, IndexFile, CryptMode, FileInfo, ArchiveType, archive_type, }, + server::UPID, + task::TaskState, + task_log, + tools::ParallelHandler, + tools::fs::lock_dir_noblock_shared, }; fn verify_blob(datastore: Arc, backup_dir: &BackupDir, info: &FileInfo) -> Result<(), Error> { @@ -51,7 +56,7 @@ fn verify_blob(datastore: Arc, backup_dir: &BackupDir, info: &FileInf fn rename_corrupted_chunk( datastore: Arc, digest: &[u8;32], - worker: Arc, + worker: &dyn TaskState, ) { let (path, digest_str) = datastore.chunk_path(digest); @@ -64,12 +69,12 @@ fn rename_corrupted_chunk( match std::fs::rename(&path, &new_path) { Ok(_) => { - worker.log(format!("corrupted chunk renamed to {:?}", &new_path)); + task_log!(worker, "corrupted chunk renamed to {:?}", &new_path); }, Err(err) => { match err.kind() { std::io::ErrorKind::NotFound => { /* ignored */ }, - _ => worker.log(format!("could not rename corrupted chunk {:?} - {}", &path, err)) + _ => task_log!(worker, "could not rename corrupted chunk {:?} - {}", &path, err) } } }; @@ -81,10 +86,10 @@ fn verify_index_chunks( verified_chunks: Arc>>, corrupt_chunks: Arc>>, crypt_mode: CryptMode, - worker: Arc, + worker: Arc, ) -> Result<(), Error> { - let errors = AtomicUsize::new(0); + let errors = Arc::new(AtomicUsize::new(0)); let start_time = Instant::now(); @@ -95,7 +100,7 @@ fn verify_index_chunks( let datastore2 = Arc::clone(&datastore); let corrupt_chunks2 = Arc::clone(&corrupt_chunks); let verified_chunks2 = Arc::clone(&verified_chunks); - let errors2 = &errors; + let errors2 = Arc::clone(&errors); let decoder_pool = ParallelHandler::new( "verify chunk decoder", 4, @@ -103,7 +108,7 @@ fn verify_index_chunks( let chunk_crypt_mode = match chunk.crypt_mode() { Err(err) => { corrupt_chunks2.lock().unwrap().insert(digest); - worker2.log(format!("can't verify chunk, unknown CryptMode - {}", err)); + task_log!(worker2, "can't verify chunk, unknown CryptMode - {}", err); errors2.fetch_add(1, Ordering::SeqCst); return Ok(()); }, @@ -111,19 +116,20 @@ fn verify_index_chunks( }; if chunk_crypt_mode != crypt_mode { - worker2.log(format!( + task_log!( + worker2, "chunk CryptMode {:?} does not match index CryptMode {:?}", chunk_crypt_mode, crypt_mode - )); + ); errors2.fetch_add(1, Ordering::SeqCst); } if let Err(err) = chunk.verify_unencrypted(size as usize, &digest) { corrupt_chunks2.lock().unwrap().insert(digest); - worker2.log(format!("{}", err)); + task_log!(worker2, "{}", err); errors2.fetch_add(1, Ordering::SeqCst); - rename_corrupted_chunk(datastore2.clone(), &digest, worker2.clone()); + rename_corrupted_chunk(datastore2.clone(), &digest, &worker2); } else { verified_chunks2.lock().unwrap().insert(digest); } @@ -134,7 +140,7 @@ fn verify_index_chunks( for pos in 0..index.index_count() { - worker.fail_on_abort()?; + worker.check_abort()?; crate::tools::fail_on_shutdown()?; let info = index.chunk_info(pos).unwrap(); @@ -146,7 +152,7 @@ fn verify_index_chunks( if corrupt_chunks.lock().unwrap().contains(&info.digest) { let digest_str = proxmox::tools::digest_to_hex(&info.digest); - worker.log(format!("chunk {} was marked as corrupt", digest_str)); + task_log!(worker, "chunk {} was marked as corrupt", digest_str); errors.fetch_add(1, Ordering::SeqCst); continue; } @@ -154,9 +160,9 @@ fn verify_index_chunks( match datastore.load_chunk(&info.digest) { Err(err) => { corrupt_chunks.lock().unwrap().insert(info.digest); - worker.log(format!("can't verify chunk, load failed - {}", err)); + task_log!(worker, "can't verify chunk, load failed - {}", err); errors.fetch_add(1, Ordering::SeqCst); - rename_corrupted_chunk(datastore.clone(), &info.digest, worker.clone()); + rename_corrupted_chunk(datastore.clone(), &info.digest, &worker); continue; } Ok(chunk) => { @@ -179,8 +185,16 @@ fn verify_index_chunks( let error_count = errors.load(Ordering::SeqCst); - worker.log(format!(" verified {:.2}/{:.2} MiB in {:.2} seconds, speed {:.2}/{:.2} MiB/s ({} errors)", - read_bytes_mib, decoded_bytes_mib, elapsed, read_speed, decode_speed, error_count)); + task_log!( + worker, + " verified {:.2}/{:.2} MiB in {:.2} seconds, speed {:.2}/{:.2} MiB/s ({} errors)", + read_bytes_mib, + decoded_bytes_mib, + elapsed, + read_speed, + decode_speed, + error_count, + ); if errors.load(Ordering::SeqCst) > 0 { bail!("chunks could not be verified"); @@ -195,7 +209,7 @@ fn verify_fixed_index( info: &FileInfo, verified_chunks: Arc>>, corrupt_chunks: Arc>>, - worker: Arc, + worker: Arc, ) -> Result<(), Error> { let mut path = backup_dir.relative_path(); @@ -212,7 +226,14 @@ fn verify_fixed_index( bail!("wrong index checksum"); } - verify_index_chunks(datastore, Box::new(index), verified_chunks, corrupt_chunks, info.chunk_crypt_mode(), worker) + verify_index_chunks( + datastore, + Box::new(index), + verified_chunks, + corrupt_chunks, + info.chunk_crypt_mode(), + worker, + ) } fn verify_dynamic_index( @@ -221,7 +242,7 @@ fn verify_dynamic_index( info: &FileInfo, verified_chunks: Arc>>, corrupt_chunks: Arc>>, - worker: Arc, + worker: Arc, ) -> Result<(), Error> { let mut path = backup_dir.relative_path(); @@ -238,7 +259,14 @@ fn verify_dynamic_index( bail!("wrong index checksum"); } - verify_index_chunks(datastore, Box::new(index), verified_chunks, corrupt_chunks, info.chunk_crypt_mode(), worker) + verify_index_chunks( + datastore, + Box::new(index), + verified_chunks, + corrupt_chunks, + info.chunk_crypt_mode(), + worker, + ) } /// Verify a single backup snapshot @@ -255,25 +283,83 @@ pub fn verify_backup_dir( backup_dir: &BackupDir, verified_chunks: Arc>>, corrupt_chunks: Arc>>, - worker: Arc + worker: Arc, + upid: UPID, + filter: Option<&dyn Fn(&BackupManifest) -> bool>, ) -> Result { + let snap_lock = lock_dir_noblock_shared( + &datastore.snapshot_path(&backup_dir), + "snapshot", + "locked by another operation"); + match snap_lock { + Ok(snap_lock) => verify_backup_dir_with_lock( + datastore, + backup_dir, + verified_chunks, + corrupt_chunks, + worker, + upid, + filter, + snap_lock + ), + Err(err) => { + task_log!( + worker, + "SKIPPED: verify {}:{} - could not acquire snapshot lock: {}", + datastore.name(), + backup_dir, + err, + ); + Ok(true) + } + } +} - let mut manifest = match datastore.load_manifest(&backup_dir) { +/// See verify_backup_dir +pub fn verify_backup_dir_with_lock( + datastore: Arc, + backup_dir: &BackupDir, + verified_chunks: Arc>>, + corrupt_chunks: Arc>>, + worker: Arc, + upid: UPID, + filter: Option<&dyn Fn(&BackupManifest) -> bool>, + _snap_lock: Dir, +) -> Result { + let manifest = match datastore.load_manifest(&backup_dir) { Ok((manifest, _)) => manifest, Err(err) => { - worker.log(format!("verify {}:{} - manifest load error: {}", datastore.name(), backup_dir, err)); + task_log!( + worker, + "verify {}:{} - manifest load error: {}", + datastore.name(), + backup_dir, + err, + ); return Ok(false); } }; - worker.log(format!("verify {}:{}", datastore.name(), backup_dir)); + if let Some(filter) = filter { + if filter(&manifest) == false { + task_log!( + worker, + "SKIPPED: verify {}:{} (recently verified)", + datastore.name(), + backup_dir, + ); + return Ok(true); + } + } + + task_log!(worker, "verify {}:{}", datastore.name(), backup_dir); let mut error_count = 0; let mut verify_result = VerifyState::Ok; for info in manifest.files() { let result = proxmox::try_block!({ - worker.log(format!(" check {}", info.filename)); + task_log!(worker, " check {}", info.filename); match archive_type(&info.filename)? { ArchiveType::FixedIndex => verify_fixed_index( @@ -297,11 +383,18 @@ pub fn verify_backup_dir( } }); - worker.fail_on_abort()?; + worker.check_abort()?; crate::tools::fail_on_shutdown()?; if let Err(err) = result { - worker.log(format!("verify {}:{}/{} failed: {}", datastore.name(), backup_dir, info.filename, err)); + task_log!( + worker, + "verify {}:{}/{} failed: {}", + datastore.name(), + backup_dir, + info.filename, + err, + ); error_count += 1; verify_result = VerifyState::Failed; } @@ -310,11 +403,12 @@ pub fn verify_backup_dir( let verify_state = SnapshotVerifyState { state: verify_result, - upid: worker.upid().clone(), + upid, }; - manifest.unprotected["verify_state"] = serde_json::to_value(verify_state)?; - datastore.store_manifest(&backup_dir, serde_json::to_value(manifest)?) - .map_err(|err| format_err!("unable to store manifest blob - {}", err))?; + let verify_state = serde_json::to_value(verify_state)?; + datastore.update_manifest(&backup_dir, |manifest| { + manifest.unprotected["verify_state"] = verify_state; + }).map_err(|err| format_err!("unable to update manifest blob - {}", err))?; Ok(error_count == 0) } @@ -332,19 +426,27 @@ pub fn verify_backup_group( verified_chunks: Arc>>, corrupt_chunks: Arc>>, progress: Option<(usize, usize)>, // (done, snapshot_count) - worker: Arc, + worker: Arc, + upid: &UPID, + filter: Option<&dyn Fn(&BackupManifest) -> bool>, ) -> Result<(usize, Vec), Error> { let mut errors = Vec::new(); let mut list = match group.list_backups(&datastore.base_path()) { Ok(list) => list, Err(err) => { - worker.log(format!("verify group {}:{} - unable to list backups: {}", datastore.name(), group, err)); + task_log!( + worker, + "verify group {}:{} - unable to list backups: {}", + datastore.name(), + group, + err, + ); return Ok((0, errors)); } }; - worker.log(format!("verify group {}:{}", datastore.name(), group)); + task_log!(worker, "verify group {}:{}", datastore.name(), group); let (done, snapshot_count) = progress.unwrap_or((0, list.len())); @@ -352,37 +454,88 @@ pub fn verify_backup_group( BackupInfo::sort_list(&mut list, false); // newest first for info in list { count += 1; - if !verify_backup_dir(datastore.clone(), &info.backup_dir, verified_chunks.clone(), corrupt_chunks.clone(), worker.clone())?{ + + if !verify_backup_dir( + datastore.clone(), + &info.backup_dir, + verified_chunks.clone(), + corrupt_chunks.clone(), + worker.clone(), + upid.clone(), + filter, + )? { errors.push(info.backup_dir.to_string()); } if snapshot_count != 0 { let pos = done + count; let percentage = ((pos as f64) * 100.0)/(snapshot_count as f64); - worker.log(format!("percentage done: {:.2}% ({} of {} snapshots)", percentage, pos, snapshot_count)); + task_log!( + worker, + "percentage done: {:.2}% ({} of {} snapshots)", + percentage, + pos, + snapshot_count, + ); } } Ok((count, errors)) } -/// Verify all backups inside a datastore +/// Verify all (owned) backups inside a datastore /// /// Errors are logged to the worker log. /// /// Returns /// - Ok(failed_dirs) where failed_dirs had verification errors /// - Err(_) if task was aborted -pub fn verify_all_backups(datastore: Arc, worker: Arc) -> Result, Error> { - +pub fn verify_all_backups( + datastore: Arc, + worker: Arc, + upid: &UPID, + owner: Option, + filter: Option<&dyn Fn(&BackupManifest) -> bool>, +) -> Result, Error> { let mut errors = Vec::new(); + if let Some(owner) = &owner { + task_log!( + worker, + "verify datastore {} - limiting to backups owned by {}", + datastore.name(), + owner + ); + } + + let filter_by_owner = |group: &BackupGroup| { + if let Some(owner) = &owner { + match datastore.get_owner(group) { + Ok(ref group_owner) => { + group_owner == owner + || (group_owner.is_token() + && !owner.is_token() + && group_owner.user() == owner.user()) + }, + Err(_) => false, + } + } else { + true + } + }; + let mut list = match BackupGroup::list_groups(&datastore.base_path()) { Ok(list) => list .into_iter() .filter(|group| !(group.backup_type() == "host" && group.backup_id() == "benchmark")) + .filter(filter_by_owner) .collect::>(), Err(err) => { - worker.log(format!("verify datastore {} - unable to list backups: {}", datastore.name(), err)); + task_log!( + worker, + "verify datastore {} - unable to list backups: {}", + datastore.name(), + err, + ); return Ok(errors); } }; @@ -400,7 +553,7 @@ pub fn verify_all_backups(datastore: Arc, worker: Arc) -> // start with 64 chunks since we assume there are few corrupt ones let corrupt_chunks = Arc::new(Mutex::new(HashSet::with_capacity(64))); - worker.log(format!("verify datastore {} ({} snapshots)", datastore.name(), snapshot_count)); + task_log!(worker, "verify datastore {} ({} snapshots)", datastore.name(), snapshot_count); let mut done = 0; for group in list { @@ -411,6 +564,8 @@ pub fn verify_all_backups(datastore: Arc, worker: Arc) -> corrupt_chunks.clone(), Some((done, snapshot_count)), worker.clone(), + upid, + filter, )?; errors.append(&mut group_errors);