]>
Commit | Line | Data |
---|---|---|
c894909e | 1 | use nix::dir::Dir; |
2aaae970 | 2 | use std::collections::HashSet; |
c894909e | 3 | use std::sync::atomic::{AtomicUsize, Ordering}; |
6b809ff5 | 4 | use std::sync::{Arc, Mutex}; |
6b809ff5 | 5 | use std::time::Instant; |
2aaae970 | 6 | |
3b2046d2 | 7 | use anyhow::{bail, format_err, Error}; |
c2009e53 | 8 | |
25877d05 | 9 | use proxmox_sys::{task_log, WorkerTaskContext}; |
d5790a9f | 10 | |
133d718f WB |
11 | use pbs_api_types::{ |
12 | Authid, BackupNamespace, BackupType, CryptMode, SnapshotVerifyState, VerifyState, UPID, | |
13 | }; | |
9531d2c5 | 14 | use pbs_datastore::backup_info::{BackupDir, BackupGroup, BackupInfo}; |
b2065dc7 WB |
15 | use pbs_datastore::index::IndexFile; |
16 | use pbs_datastore::manifest::{archive_type, ArchiveType, BackupManifest, FileInfo}; | |
9531d2c5 | 17 | use pbs_datastore::{DataBlob, DataStore, StoreProgress}; |
25877d05 | 18 | use proxmox_sys::fs::lock_dir_noblock_shared; |
770a36e5 | 19 | |
260147bd | 20 | use crate::tools::parallel_handler::ParallelHandler; |
c2009e53 | 21 | |
59229bd7 TL |
22 | use crate::backup::hierarchy::ListAccessibleBackupGroups; |
23 | ||
9c26a3d6 FG |
24 | /// A VerifyWorker encapsulates a task worker, datastore and information about which chunks have |
25 | /// already been verified or detected as corrupt. | |
26 | pub struct VerifyWorker { | |
b446fa14 | 27 | worker: Arc<dyn WorkerTaskContext>, |
9c26a3d6 | 28 | datastore: Arc<DataStore>, |
c894909e TL |
29 | verified_chunks: Arc<Mutex<HashSet<[u8; 32]>>>, |
30 | corrupt_chunks: Arc<Mutex<HashSet<[u8; 32]>>>, | |
9c26a3d6 FG |
31 | } |
32 | ||
33 | impl VerifyWorker { | |
34 | /// Creates a new VerifyWorker for a given task worker and datastore. | |
b446fa14 | 35 | pub fn new(worker: Arc<dyn WorkerTaskContext>, datastore: Arc<DataStore>) -> Self { |
9c26a3d6 FG |
36 | Self { |
37 | worker, | |
38 | datastore, | |
39 | // start with 16k chunks == up to 64G data | |
c894909e | 40 | verified_chunks: Arc::new(Mutex::new(HashSet::with_capacity(16 * 1024))), |
9c26a3d6 FG |
41 | // start with 64 chunks since we assume there are few corrupt ones |
42 | corrupt_chunks: Arc::new(Mutex::new(HashSet::with_capacity(64))), | |
43 | } | |
44 | } | |
45 | } | |
46 | ||
0f198b82 | 47 | fn verify_blob(backup_dir: &BackupDir, info: &FileInfo) -> Result<(), Error> { |
1eef52c2 | 48 | let blob = backup_dir.load_blob(&info.filename)?; |
c2009e53 | 49 | |
2aaae970 | 50 | let raw_size = blob.raw_size(); |
c2009e53 DM |
51 | if raw_size != info.size { |
52 | bail!("wrong size ({} != {})", info.size, raw_size); | |
53 | } | |
54 | ||
39f18b30 | 55 | let csum = openssl::sha::sha256(blob.raw_data()); |
c2009e53 DM |
56 | if csum != info.csum { |
57 | bail!("wrong index checksum"); | |
58 | } | |
59 | ||
8819d1f2 FG |
60 | match blob.crypt_mode()? { |
61 | CryptMode::Encrypt => Ok(()), | |
62 | CryptMode::None => { | |
63 | // digest already verified above | |
64 | blob.decode(None, None)?; | |
65 | Ok(()) | |
9531d2c5 | 66 | } |
8819d1f2 | 67 | CryptMode::SignOnly => bail!("Invalid CryptMode for blob"), |
c2009e53 | 68 | } |
c2009e53 DM |
69 | } |
70 | ||
0f3b7efa SR |
71 | fn rename_corrupted_chunk( |
72 | datastore: Arc<DataStore>, | |
9531d2c5 | 73 | digest: &[u8; 32], |
c8449217 | 74 | worker: &dyn WorkerTaskContext, |
0f3b7efa SR |
75 | ) { |
76 | let (path, digest_str) = datastore.chunk_path(digest); | |
77 | ||
78 | let mut counter = 0; | |
79 | let mut new_path = path.clone(); | |
aadcc281 | 80 | loop { |
0f3b7efa | 81 | new_path.set_file_name(format!("{}.{}.bad", digest_str, counter)); |
c894909e TL |
82 | if new_path.exists() && counter < 9 { |
83 | counter += 1; | |
84 | } else { | |
85 | break; | |
86 | } | |
0f3b7efa SR |
87 | } |
88 | ||
89 | match std::fs::rename(&path, &new_path) { | |
90 | Ok(_) => { | |
f6b1d1cc | 91 | task_log!(worker, "corrupted chunk renamed to {:?}", &new_path); |
9531d2c5 | 92 | } |
0f3b7efa SR |
93 | Err(err) => { |
94 | match err.kind() { | |
9531d2c5 TL |
95 | std::io::ErrorKind::NotFound => { /* ignored */ } |
96 | _ => task_log!( | |
97 | worker, | |
98 | "could not rename corrupted chunk {:?} - {}", | |
99 | &path, | |
100 | err | |
101 | ), | |
0f3b7efa SR |
102 | } |
103 | } | |
104 | }; | |
105 | } | |
106 | ||
fdaab0df | 107 | fn verify_index_chunks( |
9c26a3d6 | 108 | verify_worker: &VerifyWorker, |
6b809ff5 | 109 | index: Box<dyn IndexFile + Send>, |
9a38fa29 | 110 | crypt_mode: CryptMode, |
fdaab0df | 111 | ) -> Result<(), Error> { |
a71bc08f | 112 | let errors = Arc::new(AtomicUsize::new(0)); |
fdaab0df | 113 | |
6b809ff5 | 114 | let start_time = Instant::now(); |
fdaab0df | 115 | |
6b809ff5 DM |
116 | let mut read_bytes = 0; |
117 | let mut decoded_bytes = 0; | |
7ae571e7 | 118 | |
9c26a3d6 FG |
119 | let worker2 = Arc::clone(&verify_worker.worker); |
120 | let datastore2 = Arc::clone(&verify_worker.datastore); | |
121 | let corrupt_chunks2 = Arc::clone(&verify_worker.corrupt_chunks); | |
122 | let verified_chunks2 = Arc::clone(&verify_worker.verified_chunks); | |
a71bc08f | 123 | let errors2 = Arc::clone(&errors); |
f21508b9 DM |
124 | |
125 | let decoder_pool = ParallelHandler::new( | |
c894909e TL |
126 | "verify chunk decoder", |
127 | 4, | |
128 | move |(chunk, digest, size): (DataBlob, [u8; 32], u64)| { | |
f21508b9 DM |
129 | let chunk_crypt_mode = match chunk.crypt_mode() { |
130 | Err(err) => { | |
131 | corrupt_chunks2.lock().unwrap().insert(digest); | |
f6b1d1cc | 132 | task_log!(worker2, "can't verify chunk, unknown CryptMode - {}", err); |
f21508b9 DM |
133 | errors2.fetch_add(1, Ordering::SeqCst); |
134 | return Ok(()); | |
9531d2c5 | 135 | } |
f21508b9 DM |
136 | Ok(mode) => mode, |
137 | }; | |
138 | ||
139 | if chunk_crypt_mode != crypt_mode { | |
f6b1d1cc WB |
140 | task_log!( |
141 | worker2, | |
f21508b9 DM |
142 | "chunk CryptMode {:?} does not match index CryptMode {:?}", |
143 | chunk_crypt_mode, | |
144 | crypt_mode | |
f6b1d1cc | 145 | ); |
f21508b9 DM |
146 | errors2.fetch_add(1, Ordering::SeqCst); |
147 | } | |
148 | ||
149 | if let Err(err) = chunk.verify_unencrypted(size as usize, &digest) { | |
150 | corrupt_chunks2.lock().unwrap().insert(digest); | |
f6b1d1cc | 151 | task_log!(worker2, "{}", err); |
f21508b9 | 152 | errors2.fetch_add(1, Ordering::SeqCst); |
f6b1d1cc | 153 | rename_corrupted_chunk(datastore2.clone(), &digest, &worker2); |
f21508b9 DM |
154 | } else { |
155 | verified_chunks2.lock().unwrap().insert(digest); | |
156 | } | |
157 | ||
158 | Ok(()) | |
9531d2c5 | 159 | }, |
f21508b9 DM |
160 | ); |
161 | ||
7b2d3a5f | 162 | let skip_chunk = |digest: &[u8; 32]| -> bool { |
9531d2c5 TL |
163 | if verify_worker |
164 | .verified_chunks | |
165 | .lock() | |
166 | .unwrap() | |
167 | .contains(digest) | |
168 | { | |
7b2d3a5f | 169 | true |
9531d2c5 TL |
170 | } else if verify_worker |
171 | .corrupt_chunks | |
172 | .lock() | |
173 | .unwrap() | |
174 | .contains(digest) | |
175 | { | |
25877d05 | 176 | let digest_str = hex::encode(digest); |
9531d2c5 TL |
177 | task_log!( |
178 | verify_worker.worker, | |
179 | "chunk {} was marked as corrupt", | |
180 | digest_str | |
181 | ); | |
7b2d3a5f TL |
182 | errors.fetch_add(1, Ordering::SeqCst); |
183 | true | |
184 | } else { | |
185 | false | |
186 | } | |
187 | }; | |
188 | ||
4921a411 | 189 | let check_abort = |pos: usize| -> Result<(), Error> { |
2e1b63fb TL |
190 | if pos & 1023 == 0 { |
191 | verify_worker.worker.check_abort()?; | |
0fd55b08 | 192 | verify_worker.worker.fail_on_shutdown()?; |
2e1b63fb | 193 | } |
4921a411 DC |
194 | Ok(()) |
195 | }; | |
6b809ff5 | 196 | |
4921a411 DC |
197 | let chunk_list = |
198 | verify_worker | |
199 | .datastore | |
200 | .get_chunks_in_order(&index, skip_chunk, check_abort)?; | |
7f394c80 DC |
201 | |
202 | for (pos, _) in chunk_list { | |
203 | verify_worker.worker.check_abort()?; | |
0fd55b08 | 204 | verify_worker.worker.fail_on_shutdown()?; |
7f394c80 DC |
205 | |
206 | let info = index.chunk_info(pos).unwrap(); | |
207 | ||
26af61de | 208 | // we must always recheck this here, the parallel worker below alter it! |
7b2d3a5f TL |
209 | if skip_chunk(&info.digest) { |
210 | continue; // already verified or marked corrupt | |
26af61de TL |
211 | } |
212 | ||
9c26a3d6 | 213 | match verify_worker.datastore.load_chunk(&info.digest) { |
f21508b9 | 214 | Err(err) => { |
9531d2c5 TL |
215 | verify_worker |
216 | .corrupt_chunks | |
217 | .lock() | |
218 | .unwrap() | |
219 | .insert(info.digest); | |
220 | task_log!( | |
221 | verify_worker.worker, | |
222 | "can't verify chunk, load failed - {}", | |
223 | err | |
224 | ); | |
f21508b9 | 225 | errors.fetch_add(1, Ordering::SeqCst); |
c894909e TL |
226 | rename_corrupted_chunk( |
227 | verify_worker.datastore.clone(), | |
228 | &info.digest, | |
229 | &verify_worker.worker, | |
230 | ); | |
f21508b9 DM |
231 | } |
232 | Ok(chunk) => { | |
7f394c80 | 233 | let size = info.size(); |
f21508b9 DM |
234 | read_bytes += chunk.raw_size(); |
235 | decoder_pool.send((chunk, info.digest, size))?; | |
236 | decoded_bytes += size; | |
237 | } | |
2aaae970 | 238 | } |
fdaab0df DM |
239 | } |
240 | ||
f21508b9 DM |
241 | decoder_pool.complete()?; |
242 | ||
6b809ff5 DM |
243 | let elapsed = start_time.elapsed().as_secs_f64(); |
244 | ||
c894909e TL |
245 | let read_bytes_mib = (read_bytes as f64) / (1024.0 * 1024.0); |
246 | let decoded_bytes_mib = (decoded_bytes as f64) / (1024.0 * 1024.0); | |
6b809ff5 | 247 | |
c894909e TL |
248 | let read_speed = read_bytes_mib / elapsed; |
249 | let decode_speed = decoded_bytes_mib / elapsed; | |
6b809ff5 DM |
250 | |
251 | let error_count = errors.load(Ordering::SeqCst); | |
252 | ||
f6b1d1cc | 253 | task_log!( |
9c26a3d6 | 254 | verify_worker.worker, |
f6b1d1cc WB |
255 | " verified {:.2}/{:.2} MiB in {:.2} seconds, speed {:.2}/{:.2} MiB/s ({} errors)", |
256 | read_bytes_mib, | |
257 | decoded_bytes_mib, | |
258 | elapsed, | |
259 | read_speed, | |
260 | decode_speed, | |
261 | error_count, | |
262 | ); | |
6b809ff5 DM |
263 | |
264 | if errors.load(Ordering::SeqCst) > 0 { | |
f66f537d DC |
265 | bail!("chunks could not be verified"); |
266 | } | |
267 | ||
fdaab0df DM |
268 | Ok(()) |
269 | } | |
270 | ||
2aaae970 | 271 | fn verify_fixed_index( |
9c26a3d6 | 272 | verify_worker: &VerifyWorker, |
2aaae970 DM |
273 | backup_dir: &BackupDir, |
274 | info: &FileInfo, | |
2aaae970 | 275 | ) -> Result<(), Error> { |
c2009e53 DM |
276 | let mut path = backup_dir.relative_path(); |
277 | path.push(&info.filename); | |
278 | ||
9c26a3d6 | 279 | let index = verify_worker.datastore.open_fixed_reader(&path)?; |
c2009e53 DM |
280 | |
281 | let (csum, size) = index.compute_csum(); | |
282 | if size != info.size { | |
283 | bail!("wrong size ({} != {})", info.size, size); | |
284 | } | |
285 | ||
286 | if csum != info.csum { | |
287 | bail!("wrong index checksum"); | |
288 | } | |
289 | ||
c894909e | 290 | verify_index_chunks(verify_worker, Box::new(index), info.chunk_crypt_mode()) |
c2009e53 DM |
291 | } |
292 | ||
2aaae970 | 293 | fn verify_dynamic_index( |
9c26a3d6 | 294 | verify_worker: &VerifyWorker, |
2aaae970 DM |
295 | backup_dir: &BackupDir, |
296 | info: &FileInfo, | |
2aaae970 | 297 | ) -> Result<(), Error> { |
c2009e53 DM |
298 | let mut path = backup_dir.relative_path(); |
299 | path.push(&info.filename); | |
300 | ||
9c26a3d6 | 301 | let index = verify_worker.datastore.open_dynamic_reader(&path)?; |
c2009e53 DM |
302 | |
303 | let (csum, size) = index.compute_csum(); | |
304 | if size != info.size { | |
305 | bail!("wrong size ({} != {})", info.size, size); | |
306 | } | |
307 | ||
308 | if csum != info.csum { | |
309 | bail!("wrong index checksum"); | |
310 | } | |
311 | ||
c894909e | 312 | verify_index_chunks(verify_worker, Box::new(index), info.chunk_crypt_mode()) |
c2009e53 DM |
313 | } |
314 | ||
315 | /// Verify a single backup snapshot | |
316 | /// | |
317 | /// This checks all archives inside a backup snapshot. | |
318 | /// Errors are logged to the worker log. | |
319 | /// | |
8ea00f6e DM |
320 | /// Returns |
321 | /// - Ok(true) if verify is successful | |
322 | /// - Ok(false) if there were verification errors | |
323 | /// - Err(_) if task was aborted | |
2aaae970 | 324 | pub fn verify_backup_dir( |
9c26a3d6 | 325 | verify_worker: &VerifyWorker, |
2aaae970 | 326 | backup_dir: &BackupDir, |
f6b1d1cc | 327 | upid: UPID, |
d771a608 | 328 | filter: Option<&dyn Fn(&BackupManifest) -> bool>, |
2aaae970 | 329 | ) -> Result<bool, Error> { |
bcc28804 | 330 | let snap_lock = lock_dir_noblock_shared( |
133d718f | 331 | &backup_dir.full_path(), |
bfa54f2e | 332 | "snapshot", |
c894909e TL |
333 | "locked by another operation", |
334 | ); | |
bcc28804 | 335 | match snap_lock { |
c894909e TL |
336 | Ok(snap_lock) => { |
337 | verify_backup_dir_with_lock(verify_worker, backup_dir, upid, filter, snap_lock) | |
338 | } | |
bcc28804 SR |
339 | Err(err) => { |
340 | task_log!( | |
9c26a3d6 | 341 | verify_worker.worker, |
bcc28804 | 342 | "SKIPPED: verify {}:{} - could not acquire snapshot lock: {}", |
9c26a3d6 | 343 | verify_worker.datastore.name(), |
1afce610 | 344 | backup_dir.dir(), |
bcc28804 SR |
345 | err, |
346 | ); | |
347 | Ok(true) | |
348 | } | |
bfa54f2e | 349 | } |
bcc28804 | 350 | } |
bfa54f2e | 351 | |
bcc28804 SR |
352 | /// See verify_backup_dir |
353 | pub fn verify_backup_dir_with_lock( | |
9c26a3d6 | 354 | verify_worker: &VerifyWorker, |
bcc28804 | 355 | backup_dir: &BackupDir, |
bcc28804 | 356 | upid: UPID, |
d771a608 | 357 | filter: Option<&dyn Fn(&BackupManifest) -> bool>, |
bcc28804 SR |
358 | _snap_lock: Dir, |
359 | ) -> Result<bool, Error> { | |
9ccf933b | 360 | let manifest = match backup_dir.load_manifest() { |
ff86ef00 | 361 | Ok((manifest, _)) => manifest, |
c2009e53 | 362 | Err(err) => { |
f6b1d1cc | 363 | task_log!( |
9c26a3d6 | 364 | verify_worker.worker, |
f6b1d1cc | 365 | "verify {}:{} - manifest load error: {}", |
9c26a3d6 | 366 | verify_worker.datastore.name(), |
1afce610 | 367 | backup_dir.dir(), |
f6b1d1cc WB |
368 | err, |
369 | ); | |
8ea00f6e | 370 | return Ok(false); |
c2009e53 DM |
371 | } |
372 | }; | |
373 | ||
d771a608 | 374 | if let Some(filter) = filter { |
39735609 | 375 | if !filter(&manifest) { |
d771a608 | 376 | task_log!( |
9c26a3d6 | 377 | verify_worker.worker, |
d771a608 | 378 | "SKIPPED: verify {}:{} (recently verified)", |
9c26a3d6 | 379 | verify_worker.datastore.name(), |
1afce610 | 380 | backup_dir.dir(), |
d771a608 DM |
381 | ); |
382 | return Ok(true); | |
383 | } | |
384 | } | |
385 | ||
9531d2c5 TL |
386 | task_log!( |
387 | verify_worker.worker, | |
388 | "verify {}:{}", | |
389 | verify_worker.datastore.name(), | |
1afce610 | 390 | backup_dir.dir() |
9531d2c5 | 391 | ); |
c2009e53 DM |
392 | |
393 | let mut error_count = 0; | |
394 | ||
d10332a1 | 395 | let mut verify_result = VerifyState::Ok; |
c2009e53 | 396 | for info in manifest.files() { |
6ef1b649 | 397 | let result = proxmox_lang::try_block!({ |
9c26a3d6 | 398 | task_log!(verify_worker.worker, " check {}", info.filename); |
c2009e53 | 399 | match archive_type(&info.filename)? { |
9a37bd6c FG |
400 | ArchiveType::FixedIndex => verify_fixed_index(verify_worker, backup_dir, info), |
401 | ArchiveType::DynamicIndex => verify_dynamic_index(verify_worker, backup_dir, info), | |
1eef52c2 | 402 | ArchiveType::Blob => verify_blob(backup_dir, info), |
c2009e53 DM |
403 | } |
404 | }); | |
8ea00f6e | 405 | |
9c26a3d6 | 406 | verify_worker.worker.check_abort()?; |
0fd55b08 | 407 | verify_worker.worker.fail_on_shutdown()?; |
8ea00f6e | 408 | |
c2009e53 | 409 | if let Err(err) = result { |
f6b1d1cc | 410 | task_log!( |
9c26a3d6 | 411 | verify_worker.worker, |
f6b1d1cc | 412 | "verify {}:{}/{} failed: {}", |
9c26a3d6 | 413 | verify_worker.datastore.name(), |
1afce610 | 414 | backup_dir.dir(), |
f6b1d1cc WB |
415 | info.filename, |
416 | err, | |
417 | ); | |
c2009e53 | 418 | error_count += 1; |
d10332a1 | 419 | verify_result = VerifyState::Failed; |
c2009e53 DM |
420 | } |
421 | } | |
422 | ||
3b2046d2 | 423 | let verify_state = SnapshotVerifyState { |
d10332a1 | 424 | state: verify_result, |
f6b1d1cc | 425 | upid, |
3b2046d2 | 426 | }; |
1a374fcf | 427 | let verify_state = serde_json::to_value(verify_state)?; |
9ccf933b TL |
428 | backup_dir |
429 | .update_manifest(|manifest| { | |
c894909e TL |
430 | manifest.unprotected["verify_state"] = verify_state; |
431 | }) | |
432 | .map_err(|err| format_err!("unable to update manifest blob - {}", err))?; | |
3b2046d2 | 433 | |
8ea00f6e | 434 | Ok(error_count == 0) |
c2009e53 DM |
435 | } |
436 | ||
8ea00f6e DM |
437 | /// Verify all backups inside a backup group |
438 | /// | |
439 | /// Errors are logged to the worker log. | |
440 | /// | |
441 | /// Returns | |
63d9aca9 | 442 | /// - Ok((count, failed_dirs)) where failed_dirs had verification errors |
8ea00f6e | 443 | /// - Err(_) if task was aborted |
328df3b5 | 444 | pub fn verify_backup_group( |
9c26a3d6 | 445 | verify_worker: &VerifyWorker, |
4f09d310 | 446 | group: &BackupGroup, |
7e25b9aa | 447 | progress: &mut StoreProgress, |
f6b1d1cc | 448 | upid: &UPID, |
d771a608 | 449 | filter: Option<&dyn Fn(&BackupManifest) -> bool>, |
7e25b9aa | 450 | ) -> Result<Vec<String>, Error> { |
adfdc369 | 451 | let mut errors = Vec::new(); |
6da20161 | 452 | let mut list = match group.list_backups() { |
c2009e53 DM |
453 | Ok(list) => list, |
454 | Err(err) => { | |
f6b1d1cc | 455 | task_log!( |
9c26a3d6 | 456 | verify_worker.worker, |
f6b1d1cc | 457 | "verify group {}:{} - unable to list backups: {}", |
9c26a3d6 | 458 | verify_worker.datastore.name(), |
f6b1d1cc WB |
459 | group, |
460 | err, | |
461 | ); | |
7e25b9aa | 462 | return Ok(errors); |
c2009e53 DM |
463 | } |
464 | }; | |
465 | ||
7e25b9aa | 466 | let snapshot_count = list.len(); |
c894909e TL |
467 | task_log!( |
468 | verify_worker.worker, | |
469 | "verify group {}:{} ({} snapshots)", | |
470 | verify_worker.datastore.name(), | |
471 | group, | |
472 | snapshot_count | |
473 | ); | |
c2009e53 | 474 | |
7e25b9aa | 475 | progress.group_snapshots = snapshot_count as u64; |
63d9aca9 | 476 | |
c2009e53 | 477 | BackupInfo::sort_list(&mut list, false); // newest first |
7e25b9aa | 478 | for (pos, info) in list.into_iter().enumerate() { |
c894909e | 479 | if !verify_backup_dir(verify_worker, &info.backup_dir, upid.clone(), filter)? { |
adfdc369 | 480 | errors.push(info.backup_dir.to_string()); |
c2009e53 | 481 | } |
7e25b9aa | 482 | progress.done_snapshots = pos as u64 + 1; |
c894909e | 483 | task_log!(verify_worker.worker, "percentage done: {}", progress); |
c2009e53 DM |
484 | } |
485 | ||
7e25b9aa | 486 | Ok(errors) |
c2009e53 DM |
487 | } |
488 | ||
09f6a240 | 489 | /// Verify all (owned) backups inside a datastore |
8ea00f6e DM |
490 | /// |
491 | /// Errors are logged to the worker log. | |
492 | /// | |
493 | /// Returns | |
adfdc369 | 494 | /// - Ok(failed_dirs) where failed_dirs had verification errors |
8ea00f6e | 495 | /// - Err(_) if task was aborted |
328df3b5 | 496 | pub fn verify_all_backups( |
9c26a3d6 | 497 | verify_worker: &VerifyWorker, |
f6b1d1cc | 498 | upid: &UPID, |
59229bd7 TL |
499 | ns: BackupNamespace, |
500 | max_depth: Option<usize>, | |
de27ebc6 | 501 | owner: Option<&Authid>, |
d771a608 | 502 | filter: Option<&dyn Fn(&BackupManifest) -> bool>, |
f6b1d1cc | 503 | ) -> Result<Vec<String>, Error> { |
adfdc369 | 504 | let mut errors = Vec::new(); |
9c26a3d6 | 505 | let worker = Arc::clone(&verify_worker.worker); |
c2009e53 | 506 | |
9531d2c5 TL |
507 | task_log!( |
508 | worker, | |
509 | "verify datastore {}", | |
510 | verify_worker.datastore.name() | |
511 | ); | |
9f9a661b | 512 | |
59229bd7 | 513 | let owner_filtered = if let Some(owner) = &owner { |
9f9a661b | 514 | task_log!(worker, "limiting to backups owned by {}", owner); |
59229bd7 TL |
515 | true |
516 | } else { | |
517 | false | |
09f6a240 FG |
518 | }; |
519 | ||
8c74349b | 520 | // FIXME: This should probably simply enable recursion (or the call have a recursion parameter) |
7da520ae | 521 | let store = &verify_worker.datastore; |
59229bd7 TL |
522 | let max_depth = max_depth.unwrap_or(pbs_api_types::MAX_NAMESPACE_DEPTH); |
523 | ||
524 | let mut list = match ListAccessibleBackupGroups::new(store, ns.clone(), max_depth, owner) { | |
5656888c | 525 | Ok(list) => list |
59229bd7 TL |
526 | .filter_map(|group| match group { |
527 | Ok(group) => Some(group), | |
528 | Err(err) if owner_filtered => { | |
529 | // intentionally not in task log, the user might not see this group! | |
530 | println!("error on iterating groups in ns '{ns}' - {err}"); | |
531 | None | |
532 | } | |
533 | Err(err) => { | |
534 | // we don't filter by owner, but we want to log the error | |
535 | task_log!(worker, "error on iterating groups in ns '{ns}' - {err}"); | |
536 | errors.push(err.to_string()); | |
537 | None | |
538 | } | |
539 | }) | |
988d575d WB |
540 | .filter(|group| { |
541 | !(group.backup_type() == BackupType::Host && group.backup_id() == "benchmark") | |
542 | }) | |
5656888c | 543 | .collect::<Vec<BackupGroup>>(), |
c2009e53 | 544 | Err(err) => { |
c894909e | 545 | task_log!(worker, "unable to list backups: {}", err,); |
adfdc369 | 546 | return Ok(errors); |
c2009e53 DM |
547 | } |
548 | }; | |
549 | ||
5116453b | 550 | list.sort_unstable_by(|a, b| a.group().cmp(b.group())); |
4264c502 | 551 | |
7e25b9aa FG |
552 | let group_count = list.len(); |
553 | task_log!(worker, "found {} groups", group_count); | |
c2009e53 | 554 | |
7e25b9aa FG |
555 | let mut progress = StoreProgress::new(group_count as u64); |
556 | ||
557 | for (pos, group) in list.into_iter().enumerate() { | |
558 | progress.done_groups = pos as u64; | |
559 | progress.done_snapshots = 0; | |
560 | progress.group_snapshots = 0; | |
561 | ||
c894909e TL |
562 | let mut group_errors = |
563 | verify_backup_group(verify_worker, &group, &mut progress, upid, filter)?; | |
adfdc369 | 564 | errors.append(&mut group_errors); |
c2009e53 DM |
565 | } |
566 | ||
adfdc369 | 567 | Ok(errors) |
c2009e53 | 568 | } |
037e6c0c HL |
569 | |
570 | /// Filter for the verification of snapshots | |
571 | pub fn verify_filter( | |
572 | ignore_verified_snapshots: bool, | |
573 | outdated_after: Option<i64>, | |
574 | manifest: &BackupManifest, | |
575 | ) -> bool { | |
576 | if !ignore_verified_snapshots { | |
577 | return true; | |
578 | } | |
579 | ||
580 | let raw_verify_state = manifest.unprotected["verify_state"].clone(); | |
581 | match serde_json::from_value::<SnapshotVerifyState>(raw_verify_state) { | |
582 | Err(_) => true, // no last verification, always include | |
583 | Ok(last_verify) => { | |
584 | match outdated_after { | |
585 | None => false, // never re-verify if ignored and no max age | |
586 | Some(max_age) => { | |
6ef1b649 | 587 | let now = proxmox_time::epoch_i64(); |
037e6c0c HL |
588 | let days_since_last_verify = (now - last_verify.upid.starttime) / 86400; |
589 | ||
7a1a5d20 | 590 | max_age == 0 || days_since_last_verify > max_age |
037e6c0c HL |
591 | } |
592 | } | |
593 | } | |
594 | } | |
770a36e5 | 595 | } |