]> git.proxmox.com Git - proxmox-backup.git/blame - src/backup/verify.rs
verify: introduce & use new Datastore.Verify privilege
[proxmox-backup.git] / src / backup / verify.rs
CommitLineData
2aaae970 1use std::collections::HashSet;
6b809ff5
DM
2use std::sync::{Arc, Mutex};
3use std::sync::atomic::{Ordering, AtomicUsize};
4use std::time::Instant;
bcc28804 5use nix::dir::Dir;
2aaae970 6
3b2046d2 7use anyhow::{bail, format_err, Error};
c2009e53 8
ee7a308d 9use crate::{
ee7a308d
DM
10 api2::types::*,
11 backup::{
12 DataStore,
13 DataBlob,
14 BackupGroup,
15 BackupDir,
16 BackupInfo,
d771a608 17 BackupManifest,
ee7a308d
DM
18 IndexFile,
19 CryptMode,
20 FileInfo,
21 ArchiveType,
22 archive_type,
23 },
f6b1d1cc
WB
24 server::UPID,
25 task::TaskState,
26 task_log,
27 tools::ParallelHandler,
bfa54f2e 28 tools::fs::lock_dir_noblock_shared,
c2009e53
DM
29};
30
6b809ff5 31fn verify_blob(datastore: Arc<DataStore>, backup_dir: &BackupDir, info: &FileInfo) -> Result<(), Error> {
c2009e53 32
39f18b30 33 let blob = datastore.load_blob(backup_dir, &info.filename)?;
c2009e53 34
2aaae970 35 let raw_size = blob.raw_size();
c2009e53
DM
36 if raw_size != info.size {
37 bail!("wrong size ({} != {})", info.size, raw_size);
38 }
39
39f18b30 40 let csum = openssl::sha::sha256(blob.raw_data());
c2009e53
DM
41 if csum != info.csum {
42 bail!("wrong index checksum");
43 }
44
8819d1f2
FG
45 match blob.crypt_mode()? {
46 CryptMode::Encrypt => Ok(()),
47 CryptMode::None => {
48 // digest already verified above
49 blob.decode(None, None)?;
50 Ok(())
51 },
52 CryptMode::SignOnly => bail!("Invalid CryptMode for blob"),
c2009e53 53 }
c2009e53
DM
54}
55
0f3b7efa
SR
56fn rename_corrupted_chunk(
57 datastore: Arc<DataStore>,
58 digest: &[u8;32],
f6b1d1cc 59 worker: &dyn TaskState,
0f3b7efa
SR
60) {
61 let (path, digest_str) = datastore.chunk_path(digest);
62
63 let mut counter = 0;
64 let mut new_path = path.clone();
aadcc281 65 loop {
0f3b7efa 66 new_path.set_file_name(format!("{}.{}.bad", digest_str, counter));
aadcc281 67 if new_path.exists() && counter < 9 { counter += 1; } else { break; }
0f3b7efa
SR
68 }
69
70 match std::fs::rename(&path, &new_path) {
71 Ok(_) => {
f6b1d1cc 72 task_log!(worker, "corrupted chunk renamed to {:?}", &new_path);
0f3b7efa
SR
73 },
74 Err(err) => {
75 match err.kind() {
76 std::io::ErrorKind::NotFound => { /* ignored */ },
f6b1d1cc 77 _ => task_log!(worker, "could not rename corrupted chunk {:?} - {}", &path, err)
0f3b7efa
SR
78 }
79 }
80 };
81}
82
fdaab0df 83fn verify_index_chunks(
6b809ff5
DM
84 datastore: Arc<DataStore>,
85 index: Box<dyn IndexFile + Send>,
86 verified_chunks: Arc<Mutex<HashSet<[u8;32]>>>,
87 corrupt_chunks: Arc<Mutex<HashSet<[u8; 32]>>>,
9a38fa29 88 crypt_mode: CryptMode,
f6b1d1cc 89 worker: Arc<dyn TaskState + Send + Sync>,
fdaab0df
DM
90) -> Result<(), Error> {
91
a71bc08f 92 let errors = Arc::new(AtomicUsize::new(0));
fdaab0df 93
6b809ff5 94 let start_time = Instant::now();
fdaab0df 95
6b809ff5
DM
96 let mut read_bytes = 0;
97 let mut decoded_bytes = 0;
7ae571e7 98
f21508b9
DM
99 let worker2 = Arc::clone(&worker);
100 let datastore2 = Arc::clone(&datastore);
101 let corrupt_chunks2 = Arc::clone(&corrupt_chunks);
102 let verified_chunks2 = Arc::clone(&verified_chunks);
a71bc08f 103 let errors2 = Arc::clone(&errors);
f21508b9
DM
104
105 let decoder_pool = ParallelHandler::new(
106 "verify chunk decoder", 4,
107 move |(chunk, digest, size): (DataBlob, [u8;32], u64)| {
108 let chunk_crypt_mode = match chunk.crypt_mode() {
109 Err(err) => {
110 corrupt_chunks2.lock().unwrap().insert(digest);
f6b1d1cc 111 task_log!(worker2, "can't verify chunk, unknown CryptMode - {}", err);
f21508b9
DM
112 errors2.fetch_add(1, Ordering::SeqCst);
113 return Ok(());
114 },
115 Ok(mode) => mode,
116 };
117
118 if chunk_crypt_mode != crypt_mode {
f6b1d1cc
WB
119 task_log!(
120 worker2,
f21508b9
DM
121 "chunk CryptMode {:?} does not match index CryptMode {:?}",
122 chunk_crypt_mode,
123 crypt_mode
f6b1d1cc 124 );
f21508b9
DM
125 errors2.fetch_add(1, Ordering::SeqCst);
126 }
127
128 if let Err(err) = chunk.verify_unencrypted(size as usize, &digest) {
129 corrupt_chunks2.lock().unwrap().insert(digest);
f6b1d1cc 130 task_log!(worker2, "{}", err);
f21508b9 131 errors2.fetch_add(1, Ordering::SeqCst);
f6b1d1cc 132 rename_corrupted_chunk(datastore2.clone(), &digest, &worker2);
f21508b9
DM
133 } else {
134 verified_chunks2.lock().unwrap().insert(digest);
135 }
136
137 Ok(())
138 }
139 );
140
141 for pos in 0..index.index_count() {
2aaae970 142
f6b1d1cc 143 worker.check_abort()?;
deef6369 144 crate::tools::fail_on_shutdown()?;
6b809ff5 145
f21508b9
DM
146 let info = index.chunk_info(pos).unwrap();
147 let size = info.size();
9a38fa29 148
f21508b9
DM
149 if verified_chunks.lock().unwrap().contains(&info.digest) {
150 continue; // already verified
151 }
6b809ff5 152
f21508b9
DM
153 if corrupt_chunks.lock().unwrap().contains(&info.digest) {
154 let digest_str = proxmox::tools::digest_to_hex(&info.digest);
f6b1d1cc 155 task_log!(worker, "chunk {} was marked as corrupt", digest_str);
6b809ff5 156 errors.fetch_add(1, Ordering::SeqCst);
f21508b9 157 continue;
9a38fa29
FG
158 }
159
f21508b9
DM
160 match datastore.load_chunk(&info.digest) {
161 Err(err) => {
162 corrupt_chunks.lock().unwrap().insert(info.digest);
f6b1d1cc 163 task_log!(worker, "can't verify chunk, load failed - {}", err);
f21508b9 164 errors.fetch_add(1, Ordering::SeqCst);
f6b1d1cc 165 rename_corrupted_chunk(datastore.clone(), &info.digest, &worker);
f21508b9
DM
166 continue;
167 }
168 Ok(chunk) => {
169 read_bytes += chunk.raw_size();
170 decoder_pool.send((chunk, info.digest, size))?;
171 decoded_bytes += size;
172 }
2aaae970 173 }
fdaab0df
DM
174 }
175
f21508b9
DM
176 decoder_pool.complete()?;
177
6b809ff5
DM
178 let elapsed = start_time.elapsed().as_secs_f64();
179
180 let read_bytes_mib = (read_bytes as f64)/(1024.0*1024.0);
181 let decoded_bytes_mib = (decoded_bytes as f64)/(1024.0*1024.0);
182
183 let read_speed = read_bytes_mib/elapsed;
184 let decode_speed = decoded_bytes_mib/elapsed;
185
186 let error_count = errors.load(Ordering::SeqCst);
187
f6b1d1cc
WB
188 task_log!(
189 worker,
190 " verified {:.2}/{:.2} MiB in {:.2} seconds, speed {:.2}/{:.2} MiB/s ({} errors)",
191 read_bytes_mib,
192 decoded_bytes_mib,
193 elapsed,
194 read_speed,
195 decode_speed,
196 error_count,
197 );
6b809ff5
DM
198
199 if errors.load(Ordering::SeqCst) > 0 {
f66f537d
DC
200 bail!("chunks could not be verified");
201 }
202
fdaab0df
DM
203 Ok(())
204}
205
2aaae970 206fn verify_fixed_index(
6b809ff5 207 datastore: Arc<DataStore>,
2aaae970
DM
208 backup_dir: &BackupDir,
209 info: &FileInfo,
6b809ff5
DM
210 verified_chunks: Arc<Mutex<HashSet<[u8;32]>>>,
211 corrupt_chunks: Arc<Mutex<HashSet<[u8;32]>>>,
f6b1d1cc 212 worker: Arc<dyn TaskState + Send + Sync>,
2aaae970 213) -> Result<(), Error> {
c2009e53
DM
214
215 let mut path = backup_dir.relative_path();
216 path.push(&info.filename);
217
218 let index = datastore.open_fixed_reader(&path)?;
219
220 let (csum, size) = index.compute_csum();
221 if size != info.size {
222 bail!("wrong size ({} != {})", info.size, size);
223 }
224
225 if csum != info.csum {
226 bail!("wrong index checksum");
227 }
228
f6b1d1cc
WB
229 verify_index_chunks(
230 datastore,
231 Box::new(index),
232 verified_chunks,
233 corrupt_chunks,
234 info.chunk_crypt_mode(),
235 worker,
236 )
c2009e53
DM
237}
238
2aaae970 239fn verify_dynamic_index(
6b809ff5 240 datastore: Arc<DataStore>,
2aaae970
DM
241 backup_dir: &BackupDir,
242 info: &FileInfo,
6b809ff5
DM
243 verified_chunks: Arc<Mutex<HashSet<[u8;32]>>>,
244 corrupt_chunks: Arc<Mutex<HashSet<[u8;32]>>>,
f6b1d1cc 245 worker: Arc<dyn TaskState + Send + Sync>,
2aaae970
DM
246) -> Result<(), Error> {
247
c2009e53
DM
248 let mut path = backup_dir.relative_path();
249 path.push(&info.filename);
250
251 let index = datastore.open_dynamic_reader(&path)?;
252
253 let (csum, size) = index.compute_csum();
254 if size != info.size {
255 bail!("wrong size ({} != {})", info.size, size);
256 }
257
258 if csum != info.csum {
259 bail!("wrong index checksum");
260 }
261
f6b1d1cc
WB
262 verify_index_chunks(
263 datastore,
264 Box::new(index),
265 verified_chunks,
266 corrupt_chunks,
267 info.chunk_crypt_mode(),
268 worker,
269 )
c2009e53
DM
270}
271
272/// Verify a single backup snapshot
273///
274/// This checks all archives inside a backup snapshot.
275/// Errors are logged to the worker log.
276///
8ea00f6e
DM
277/// Returns
278/// - Ok(true) if verify is successful
279/// - Ok(false) if there were verification errors
280/// - Err(_) if task was aborted
2aaae970 281pub fn verify_backup_dir(
6b809ff5 282 datastore: Arc<DataStore>,
2aaae970 283 backup_dir: &BackupDir,
6b809ff5
DM
284 verified_chunks: Arc<Mutex<HashSet<[u8;32]>>>,
285 corrupt_chunks: Arc<Mutex<HashSet<[u8;32]>>>,
f6b1d1cc
WB
286 worker: Arc<dyn TaskState + Send + Sync>,
287 upid: UPID,
d771a608 288 filter: Option<&dyn Fn(&BackupManifest) -> bool>,
2aaae970 289) -> Result<bool, Error> {
bcc28804 290 let snap_lock = lock_dir_noblock_shared(
bfa54f2e
SR
291 &datastore.snapshot_path(&backup_dir),
292 "snapshot",
293 "locked by another operation");
bcc28804
SR
294 match snap_lock {
295 Ok(snap_lock) => verify_backup_dir_with_lock(
296 datastore,
bfa54f2e 297 backup_dir,
bcc28804
SR
298 verified_chunks,
299 corrupt_chunks,
300 worker,
301 upid,
d771a608 302 filter,
bcc28804
SR
303 snap_lock
304 ),
305 Err(err) => {
306 task_log!(
307 worker,
308 "SKIPPED: verify {}:{} - could not acquire snapshot lock: {}",
309 datastore.name(),
310 backup_dir,
311 err,
312 );
313 Ok(true)
314 }
bfa54f2e 315 }
bcc28804 316}
bfa54f2e 317
bcc28804
SR
318/// See verify_backup_dir
319pub fn verify_backup_dir_with_lock(
320 datastore: Arc<DataStore>,
321 backup_dir: &BackupDir,
322 verified_chunks: Arc<Mutex<HashSet<[u8;32]>>>,
323 corrupt_chunks: Arc<Mutex<HashSet<[u8;32]>>>,
324 worker: Arc<dyn TaskState + Send + Sync>,
325 upid: UPID,
d771a608 326 filter: Option<&dyn Fn(&BackupManifest) -> bool>,
bcc28804
SR
327 _snap_lock: Dir,
328) -> Result<bool, Error> {
1a374fcf 329 let manifest = match datastore.load_manifest(&backup_dir) {
ff86ef00 330 Ok((manifest, _)) => manifest,
c2009e53 331 Err(err) => {
f6b1d1cc
WB
332 task_log!(
333 worker,
334 "verify {}:{} - manifest load error: {}",
335 datastore.name(),
336 backup_dir,
337 err,
338 );
8ea00f6e 339 return Ok(false);
c2009e53
DM
340 }
341 };
342
d771a608
DM
343 if let Some(filter) = filter {
344 if filter(&manifest) == false {
345 task_log!(
346 worker,
347 "SKIPPED: verify {}:{} (recently verified)",
348 datastore.name(),
349 backup_dir,
350 );
351 return Ok(true);
352 }
353 }
354
f6b1d1cc 355 task_log!(worker, "verify {}:{}", datastore.name(), backup_dir);
c2009e53
DM
356
357 let mut error_count = 0;
358
d10332a1 359 let mut verify_result = VerifyState::Ok;
c2009e53
DM
360 for info in manifest.files() {
361 let result = proxmox::try_block!({
f6b1d1cc 362 task_log!(worker, " check {}", info.filename);
c2009e53 363 match archive_type(&info.filename)? {
d8594d87
DC
364 ArchiveType::FixedIndex =>
365 verify_fixed_index(
6b809ff5 366 datastore.clone(),
d8594d87
DC
367 &backup_dir,
368 info,
6b809ff5
DM
369 verified_chunks.clone(),
370 corrupt_chunks.clone(),
371 worker.clone(),
d8594d87
DC
372 ),
373 ArchiveType::DynamicIndex =>
374 verify_dynamic_index(
6b809ff5 375 datastore.clone(),
d8594d87
DC
376 &backup_dir,
377 info,
6b809ff5
DM
378 verified_chunks.clone(),
379 corrupt_chunks.clone(),
380 worker.clone(),
d8594d87 381 ),
6b809ff5 382 ArchiveType::Blob => verify_blob(datastore.clone(), &backup_dir, info),
c2009e53
DM
383 }
384 });
8ea00f6e 385
f6b1d1cc 386 worker.check_abort()?;
deef6369 387 crate::tools::fail_on_shutdown()?;
8ea00f6e 388
c2009e53 389 if let Err(err) = result {
f6b1d1cc
WB
390 task_log!(
391 worker,
392 "verify {}:{}/{} failed: {}",
393 datastore.name(),
394 backup_dir,
395 info.filename,
396 err,
397 );
c2009e53 398 error_count += 1;
d10332a1 399 verify_result = VerifyState::Failed;
c2009e53 400 }
3b2046d2 401
c2009e53
DM
402 }
403
3b2046d2 404 let verify_state = SnapshotVerifyState {
d10332a1 405 state: verify_result,
f6b1d1cc 406 upid,
3b2046d2 407 };
1a374fcf
SR
408 let verify_state = serde_json::to_value(verify_state)?;
409 datastore.update_manifest(&backup_dir, |manifest| {
410 manifest.unprotected["verify_state"] = verify_state;
411 }).map_err(|err| format_err!("unable to update manifest blob - {}", err))?;
3b2046d2 412
8ea00f6e 413 Ok(error_count == 0)
c2009e53
DM
414}
415
8ea00f6e
DM
416/// Verify all backups inside a backup group
417///
418/// Errors are logged to the worker log.
419///
420/// Returns
63d9aca9 421/// - Ok((count, failed_dirs)) where failed_dirs had verification errors
8ea00f6e 422/// - Err(_) if task was aborted
328df3b5 423pub fn verify_backup_group(
4f09d310
DM
424 datastore: Arc<DataStore>,
425 group: &BackupGroup,
426 verified_chunks: Arc<Mutex<HashSet<[u8;32]>>>,
427 corrupt_chunks: Arc<Mutex<HashSet<[u8;32]>>>,
63d9aca9 428 progress: Option<(usize, usize)>, // (done, snapshot_count)
f6b1d1cc
WB
429 worker: Arc<dyn TaskState + Send + Sync>,
430 upid: &UPID,
d771a608 431 filter: Option<&dyn Fn(&BackupManifest) -> bool>,
63d9aca9 432) -> Result<(usize, Vec<String>), Error> {
c2009e53 433
adfdc369 434 let mut errors = Vec::new();
c2009e53
DM
435 let mut list = match group.list_backups(&datastore.base_path()) {
436 Ok(list) => list,
437 Err(err) => {
f6b1d1cc
WB
438 task_log!(
439 worker,
440 "verify group {}:{} - unable to list backups: {}",
441 datastore.name(),
442 group,
443 err,
444 );
63d9aca9 445 return Ok((0, errors));
c2009e53
DM
446 }
447 };
448
f6b1d1cc 449 task_log!(worker, "verify group {}:{}", datastore.name(), group);
c2009e53 450
63d9aca9
DM
451 let (done, snapshot_count) = progress.unwrap_or((0, list.len()));
452
453 let mut count = 0;
c2009e53
DM
454 BackupInfo::sort_list(&mut list, false); // newest first
455 for info in list {
63d9aca9 456 count += 1;
a4915dfc 457
f6b1d1cc
WB
458 if !verify_backup_dir(
459 datastore.clone(),
460 &info.backup_dir,
461 verified_chunks.clone(),
462 corrupt_chunks.clone(),
463 worker.clone(),
464 upid.clone(),
d771a608 465 filter,
f6b1d1cc 466 )? {
adfdc369 467 errors.push(info.backup_dir.to_string());
c2009e53 468 }
63d9aca9
DM
469 if snapshot_count != 0 {
470 let pos = done + count;
471 let percentage = ((pos as f64) * 100.0)/(snapshot_count as f64);
f6b1d1cc
WB
472 task_log!(
473 worker,
474 "percentage done: {:.2}% ({} of {} snapshots)",
475 percentage,
476 pos,
477 snapshot_count,
478 );
63d9aca9 479 }
c2009e53
DM
480 }
481
63d9aca9 482 Ok((count, errors))
c2009e53
DM
483}
484
09f6a240 485/// Verify all (owned) backups inside a datastore
8ea00f6e
DM
486///
487/// Errors are logged to the worker log.
488///
489/// Returns
adfdc369 490/// - Ok(failed_dirs) where failed_dirs had verification errors
8ea00f6e 491/// - Err(_) if task was aborted
328df3b5 492pub fn verify_all_backups(
f6b1d1cc
WB
493 datastore: Arc<DataStore>,
494 worker: Arc<dyn TaskState + Send + Sync>,
495 upid: &UPID,
09f6a240 496 owner: Option<Authid>,
d771a608 497 filter: Option<&dyn Fn(&BackupManifest) -> bool>,
f6b1d1cc 498) -> Result<Vec<String>, Error> {
adfdc369 499 let mut errors = Vec::new();
c2009e53 500
09f6a240
FG
501 if let Some(owner) = &owner {
502 task_log!(
503 worker,
504 "verify datastore {} - limiting to backups owned by {}",
505 datastore.name(),
506 owner
507 );
508 }
509
510 let filter_by_owner = |group: &BackupGroup| {
511 if let Some(owner) = &owner {
512 match datastore.get_owner(group) {
513 Ok(ref group_owner) => {
514 group_owner == owner
515 || (group_owner.is_token()
516 && !owner.is_token()
517 && group_owner.user() == owner.user())
518 },
519 Err(_) => false,
520 }
521 } else {
522 true
523 }
524 };
525
4264c502 526 let mut list = match BackupGroup::list_groups(&datastore.base_path()) {
5656888c
DM
527 Ok(list) => list
528 .into_iter()
529 .filter(|group| !(group.backup_type() == "host" && group.backup_id() == "benchmark"))
09f6a240 530 .filter(filter_by_owner)
5656888c 531 .collect::<Vec<BackupGroup>>(),
c2009e53 532 Err(err) => {
f6b1d1cc
WB
533 task_log!(
534 worker,
535 "verify datastore {} - unable to list backups: {}",
536 datastore.name(),
537 err,
538 );
adfdc369 539 return Ok(errors);
c2009e53
DM
540 }
541 };
542
4264c502
DM
543 list.sort_unstable();
544
63d9aca9
DM
545 let mut snapshot_count = 0;
546 for group in list.iter() {
547 snapshot_count += group.list_backups(&datastore.base_path())?.len();
548 }
549
4f09d310
DM
550 // start with 16384 chunks (up to 65GB)
551 let verified_chunks = Arc::new(Mutex::new(HashSet::with_capacity(1024*16)));
552
553 // start with 64 chunks since we assume there are few corrupt ones
554 let corrupt_chunks = Arc::new(Mutex::new(HashSet::with_capacity(64)));
555
f6b1d1cc 556 task_log!(worker, "verify datastore {} ({} snapshots)", datastore.name(), snapshot_count);
c2009e53 557
63d9aca9 558 let mut done = 0;
c2009e53 559 for group in list {
63d9aca9 560 let (count, mut group_errors) = verify_backup_group(
4f09d310
DM
561 datastore.clone(),
562 &group,
563 verified_chunks.clone(),
564 corrupt_chunks.clone(),
63d9aca9 565 Some((done, snapshot_count)),
4f09d310 566 worker.clone(),
f6b1d1cc 567 upid,
a4915dfc 568 filter,
4f09d310 569 )?;
adfdc369 570 errors.append(&mut group_errors);
63d9aca9
DM
571
572 done += count;
c2009e53
DM
573 }
574
adfdc369 575 Ok(errors)
c2009e53 576}