]> git.proxmox.com Git - proxmox-backup.git/blob - src/tape/pool_writer.rs
tape: set media status if we detect damaged medium at start of backup
[proxmox-backup.git] / src / tape / pool_writer.rs
1 use std::collections::HashSet;
2 use std::path::Path;
3 use std::time::SystemTime;
4
5 use anyhow::{bail, Error};
6
7 use proxmox::tools::Uuid;
8
9 use crate::{
10 task_log,
11 backup::{
12 DataStore,
13 },
14 server::WorkerTask,
15 tape::{
16 TAPE_STATUS_DIR,
17 MAX_CHUNK_ARCHIVE_SIZE,
18 COMMIT_BLOCK_SIZE,
19 TapeWrite,
20 SnapshotReader,
21 SnapshotChunkIterator,
22 MediaPool,
23 MediaId,
24 MediaCatalog,
25 MediaSetCatalog,
26 file_formats::{
27 MediaSetLabel,
28 ChunkArchiveWriter,
29 tape_write_snapshot_archive,
30 },
31 drive::{
32 TapeDriver,
33 request_and_load_media,
34 tape_alert_flags_critical,
35 media_changer,
36 },
37 },
38 config::tape_encryption_keys::load_key_configs,
39 };
40
41
42 struct PoolWriterState {
43 drive: Box<dyn TapeDriver>,
44 catalog: MediaCatalog,
45 // tell if we already moved to EOM
46 at_eom: bool,
47 // bytes written after the last tape fush/sync
48 bytes_written: usize,
49 }
50
51 impl PoolWriterState {
52
53 fn commit(&mut self) -> Result<(), Error> {
54 self.drive.sync()?; // sync all data to the tape
55 self.catalog.commit()?; // then commit the catalog
56 self.bytes_written = 0;
57 Ok(())
58 }
59 }
60
61 /// Helper to manage a backup job, writing several tapes of a pool
62 pub struct PoolWriter {
63 pool: MediaPool,
64 drive_name: String,
65 status: Option<PoolWriterState>,
66 media_set_catalog: MediaSetCatalog,
67 }
68
69 impl PoolWriter {
70
71 pub fn new(mut pool: MediaPool, drive_name: &str) -> Result<Self, Error> {
72
73 let current_time = proxmox::tools::time::epoch_i64();
74
75 pool.start_write_session(current_time)?;
76
77 let mut media_set_catalog = MediaSetCatalog::new();
78
79 // load all catalogs read-only at start
80 for media_uuid in pool.current_media_list()? {
81 let media_catalog = MediaCatalog::open(
82 Path::new(TAPE_STATUS_DIR),
83 &media_uuid,
84 false,
85 false,
86 )?;
87 media_set_catalog.append_catalog(media_catalog)?;
88 }
89
90 Ok(Self {
91 pool,
92 drive_name: drive_name.to_string(),
93 status: None,
94 media_set_catalog,
95 })
96 }
97
98 pub fn pool(&mut self) -> &mut MediaPool {
99 &mut self.pool
100 }
101
102 /// Set media status to FULL (persistent - stores pool status)
103 pub fn set_media_status_full(&mut self, uuid: &Uuid) -> Result<(), Error> {
104 self.pool.set_media_status_full(&uuid)?;
105 Ok(())
106 }
107
108 pub fn contains_snapshot(&self, snapshot: &str) -> bool {
109 if let Some(PoolWriterState { ref catalog, .. }) = self.status {
110 if catalog.contains_snapshot(snapshot) {
111 return true;
112 }
113 }
114 self.media_set_catalog.contains_snapshot(snapshot)
115 }
116
117 /// Eject media and drop PoolWriterState (close drive)
118 pub fn eject_media(&mut self, worker: &WorkerTask) -> Result<(), Error> {
119 let mut status = match self.status.take() {
120 Some(status) => status,
121 None => return Ok(()), // no media loaded
122 };
123
124 let (drive_config, _digest) = crate::config::drive::config()?;
125
126 if let Some((mut changer, _)) = media_changer(&drive_config, &self.drive_name)? {
127 worker.log("eject media");
128 status.drive.eject_media()?; // rewind and eject early, so that unload_media is faster
129 drop(status); // close drive
130 worker.log("unload media");
131 changer.unload_media(None)?; //eject and unload
132 } else {
133 worker.log("standalone drive - ejecting media");
134 status.drive.eject_media()?;
135 }
136
137 Ok(())
138 }
139
140 /// Export current media set and drop PoolWriterState (close drive)
141 pub fn export_media_set(&mut self, worker: &WorkerTask) -> Result<(), Error> {
142 let mut status = self.status.take();
143
144 let (drive_config, _digest) = crate::config::drive::config()?;
145
146 if let Some((mut changer, _)) = media_changer(&drive_config, &self.drive_name)? {
147
148 if let Some(ref mut status) = status {
149 worker.log("eject media");
150 status.drive.eject_media()?; // rewind and eject early, so that unload_media is faster
151 }
152 drop(status); // close drive
153
154 worker.log("unload media");
155 changer.unload_media(None)?;
156
157 for media_uuid in self.pool.current_media_list()? {
158 let media = self.pool.lookup_media(media_uuid)?;
159 let label_text = media.label_text();
160 if let Some(slot) = changer.export_media(label_text)? {
161 worker.log(format!("exported media '{}' to import/export slot {}", label_text, slot));
162 } else {
163 worker.warn(format!("export failed - media '{}' is not online", label_text));
164 }
165 }
166
167 } else if let Some(mut status) = status {
168 worker.log("standalone drive - ejecting media instead of export");
169 status.drive.eject_media()?;
170 }
171
172 Ok(())
173 }
174
175 /// commit changes to tape and catalog
176 ///
177 /// This is done automatically during a backupsession, but needs to
178 /// be called explicitly before dropping the PoolWriter
179 pub fn commit(&mut self) -> Result<(), Error> {
180 if let Some(ref mut status) = self.status {
181 status.commit()?;
182 }
183 Ok(())
184 }
185
186 /// Load a writable media into the drive
187 pub fn load_writable_media(&mut self, worker: &WorkerTask) -> Result<Uuid, Error> {
188 let last_media_uuid = match self.status {
189 Some(PoolWriterState { ref catalog, .. }) => Some(catalog.uuid().clone()),
190 None => None,
191 };
192
193 let current_time = proxmox::tools::time::epoch_i64();
194 let media_uuid = self.pool.alloc_writable_media(current_time)?;
195
196 let media = self.pool.lookup_media(&media_uuid).unwrap();
197
198 let media_changed = match last_media_uuid {
199 Some(ref last_media_uuid) => last_media_uuid != &media_uuid,
200 None => true,
201 };
202
203 if !media_changed {
204 return Ok(media_uuid);
205 }
206
207 task_log!(worker, "allocated new writable media '{}'", media.label_text());
208
209 // remove read-only catalog (we store a writable version in status)
210 self.media_set_catalog.remove_catalog(&media_uuid);
211
212 if let Some(PoolWriterState {mut drive, catalog, .. }) = self.status.take() {
213 self.media_set_catalog.append_catalog(catalog)?;
214 task_log!(worker, "eject current media");
215 drive.eject_media()?;
216 }
217
218 let (drive_config, _digest) = crate::config::drive::config()?;
219
220 let (mut drive, old_media_id) =
221 request_and_load_media(worker, &drive_config, &self.drive_name, media.label())?;
222
223 // test for critical tape alert flags
224 if let Ok(alert_flags) = drive.tape_alert_flags() {
225 if !alert_flags.is_empty() {
226 worker.log(format!("TapeAlertFlags: {:?}", alert_flags));
227 if tape_alert_flags_critical(alert_flags) {
228 self.pool.set_media_status_damaged(&media_uuid)?;
229 bail!("aborting due to critical tape alert flags: {:?}", alert_flags);
230 }
231 }
232 }
233
234 let catalog = update_media_set_label(
235 worker,
236 drive.as_mut(),
237 old_media_id.media_set_label,
238 media.id(),
239 )?;
240
241 let media_set = media.media_set_label().clone().unwrap();
242
243 let encrypt_fingerprint = media_set
244 .encryption_key_fingerprint
245 .clone()
246 .map(|fp| (fp, media_set.uuid.clone()));
247
248 drive.set_encryption(encrypt_fingerprint)?;
249
250 self.status = Some(PoolWriterState { drive, catalog, at_eom: false, bytes_written: 0 });
251
252 Ok(media_uuid)
253 }
254
255 /// uuid of currently loaded BackupMedia
256 pub fn current_media_uuid(&self) -> Result<&Uuid, Error> {
257 match self.status {
258 Some(PoolWriterState { ref catalog, ..}) => Ok(catalog.uuid()),
259 None => bail!("PoolWriter - no media loaded"),
260 }
261 }
262
263 /// Move to EOM (if not aleady there), then creates a new snapshot
264 /// archive writing specified files (as .pxar) into it. On
265 /// success, this return 'Ok(true)' and the media catalog gets
266 /// updated.
267
268 /// Please note that this may fail when there is not enough space
269 /// on the media (return value 'Ok(false, _)'). In that case, the
270 /// archive is marked incomplete, and we do not use it. The caller
271 /// should mark the media as full and try again using another
272 /// media.
273 pub fn append_snapshot_archive(
274 &mut self,
275 worker: &WorkerTask,
276 snapshot_reader: &SnapshotReader,
277 ) -> Result<(bool, usize), Error> {
278
279 let status = match self.status {
280 Some(ref mut status) => status,
281 None => bail!("PoolWriter - no media loaded"),
282 };
283
284 if !status.at_eom {
285 worker.log(String::from("moving to end of media"));
286 status.drive.move_to_eom()?;
287 status.at_eom = true;
288 }
289
290 let current_file_number = status.drive.current_file_number()?;
291 if current_file_number < 2 {
292 bail!("got strange file position number from drive ({})", current_file_number);
293 }
294
295 let (done, bytes_written) = {
296 let mut writer: Box<dyn TapeWrite> = status.drive.write_file()?;
297
298 match tape_write_snapshot_archive(writer.as_mut(), snapshot_reader)? {
299 Some(content_uuid) => {
300 status.catalog.register_snapshot(
301 content_uuid,
302 current_file_number,
303 &snapshot_reader.snapshot().to_string(),
304 )?;
305 (true, writer.bytes_written())
306 }
307 None => (false, writer.bytes_written()),
308 }
309 };
310
311 status.bytes_written += bytes_written;
312
313 let request_sync = status.bytes_written >= COMMIT_BLOCK_SIZE;
314
315 if !done || request_sync {
316 status.commit()?;
317 }
318
319 Ok((done, bytes_written))
320 }
321
322 /// Move to EOM (if not aleady there), then creates a new chunk
323 /// archive and writes chunks from 'chunk_iter'. This stops when
324 /// it detect LEOM or when we reach max archive size
325 /// (4GB). Written chunks are registered in the media catalog.
326 pub fn append_chunk_archive(
327 &mut self,
328 worker: &WorkerTask,
329 datastore: &DataStore,
330 chunk_iter: &mut std::iter::Peekable<SnapshotChunkIterator>,
331 ) -> Result<(bool, usize), Error> {
332
333 let status = match self.status {
334 Some(ref mut status) => status,
335 None => bail!("PoolWriter - no media loaded"),
336 };
337
338 if !status.at_eom {
339 worker.log(String::from("moving to end of media"));
340 status.drive.move_to_eom()?;
341 status.at_eom = true;
342 }
343
344 let current_file_number = status.drive.current_file_number()?;
345 if current_file_number < 2 {
346 bail!("got strange file position number from drive ({})", current_file_number);
347 }
348 let writer = status.drive.write_file()?;
349
350 let start_time = SystemTime::now();
351
352 let (saved_chunks, content_uuid, leom, bytes_written) = write_chunk_archive(
353 worker,
354 writer,
355 datastore,
356 chunk_iter,
357 &self.media_set_catalog,
358 &status.catalog,
359 MAX_CHUNK_ARCHIVE_SIZE,
360 )?;
361
362 status.bytes_written += bytes_written;
363
364 let elapsed = start_time.elapsed()?.as_secs_f64();
365 worker.log(format!(
366 "wrote {:.2} MB ({} MB/s)",
367 bytes_written as f64 / (1024.0*1024.0),
368 (bytes_written as f64)/(1024.0*1024.0*elapsed),
369 ));
370
371 let request_sync = status.bytes_written >= COMMIT_BLOCK_SIZE;
372
373 // register chunks in media_catalog
374 status.catalog.start_chunk_archive(content_uuid, current_file_number)?;
375 for digest in saved_chunks {
376 status.catalog.register_chunk(&digest)?;
377 }
378 status.catalog.end_chunk_archive()?;
379
380 if leom || request_sync {
381 status.commit()?;
382 }
383
384 Ok((leom, bytes_written))
385 }
386 }
387
388 /// write up to <max_size> of chunks
389 fn write_chunk_archive<'a>(
390 worker: &WorkerTask,
391 writer: Box<dyn 'a + TapeWrite>,
392 datastore: &DataStore,
393 chunk_iter: &mut std::iter::Peekable<SnapshotChunkIterator>,
394 media_set_catalog: &MediaSetCatalog,
395 media_catalog: &MediaCatalog,
396 max_size: usize,
397 ) -> Result<(Vec<[u8;32]>, Uuid, bool, usize), Error> {
398
399 let (mut writer, content_uuid) = ChunkArchiveWriter::new(writer, true)?;
400
401 let mut chunk_index: HashSet<[u8;32]> = HashSet::new();
402
403 // we want to get the chunk list in correct order
404 let mut chunk_list: Vec<[u8;32]> = Vec::new();
405
406 let mut leom = false;
407
408 loop {
409 let digest = match chunk_iter.next() {
410 None => break,
411 Some(digest) => digest?,
412 };
413 if media_catalog.contains_chunk(&digest)
414 || chunk_index.contains(&digest)
415 || media_set_catalog.contains_chunk(&digest)
416 {
417 continue;
418 }
419
420 let blob = datastore.load_chunk(&digest)?;
421 //println!("CHUNK {} size {}", proxmox::tools::digest_to_hex(&digest), blob.raw_size());
422
423 match writer.try_write_chunk(&digest, &blob) {
424 Ok(true) => {
425 chunk_index.insert(digest);
426 chunk_list.push(digest);
427 }
428 Ok(false) => {
429 leom = true;
430 break;
431 }
432 Err(err) => bail!("write chunk failed - {}", err),
433 }
434
435 if writer.bytes_written() > max_size {
436 worker.log("Chunk Archive max size reached, closing archive".to_string());
437 break;
438 }
439 }
440
441 writer.finish()?;
442
443 Ok((chunk_list, content_uuid, leom, writer.bytes_written()))
444 }
445
446 // Compare the media set label. If the media is empty, or the existing
447 // set label does not match the expected media set, overwrite the
448 // media set label.
449 fn update_media_set_label(
450 worker: &WorkerTask,
451 drive: &mut dyn TapeDriver,
452 old_set: Option<MediaSetLabel>,
453 media_id: &MediaId,
454 ) -> Result<MediaCatalog, Error> {
455
456 let media_catalog;
457
458 let new_set = match media_id.media_set_label {
459 None => bail!("got media without media set - internal error"),
460 Some(ref set) => set,
461 };
462
463 let key_config = if let Some(ref fingerprint) = new_set.encryption_key_fingerprint {
464 let (config_map, _digest) = load_key_configs()?;
465 match config_map.get(fingerprint) {
466 Some(key_config) => Some(key_config.clone()),
467 None => {
468 bail!("unable to find tape encryption key config '{}'", fingerprint);
469 }
470 }
471 } else {
472 None
473 };
474
475 let status_path = Path::new(TAPE_STATUS_DIR);
476
477 match old_set {
478 None => {
479 worker.log("wrinting new media set label".to_string());
480 drive.write_media_set_label(new_set, key_config.as_ref())?;
481 media_catalog = MediaCatalog::overwrite(status_path, media_id, false)?;
482 }
483 Some(media_set_label) => {
484 if new_set.uuid == media_set_label.uuid {
485 if new_set.seq_nr != media_set_label.seq_nr {
486 bail!("got media with wrong media sequence number ({} != {}",
487 new_set.seq_nr,media_set_label.seq_nr);
488 }
489 if new_set.encryption_key_fingerprint != media_set_label.encryption_key_fingerprint {
490 bail!("detected changed encryption fingerprint - internal error");
491 }
492 media_catalog = MediaCatalog::open(status_path, &media_id.label.uuid, true, false)?;
493 } else {
494 worker.log(
495 format!("wrinting new media set label (overwrite '{}/{}')",
496 media_set_label.uuid.to_string(), media_set_label.seq_nr)
497 );
498
499 drive.write_media_set_label(new_set, key_config.as_ref())?;
500 media_catalog = MediaCatalog::overwrite(status_path, media_id, false)?;
501 }
502 }
503 }
504
505 // todo: verify last content/media_catalog somehow?
506 drive.move_to_eom()?; // just to be sure
507
508 Ok(media_catalog)
509 }