]> git.proxmox.com Git - proxmox-backup.git/blob - src/api2/tape/backup.rs
c5588ddcdd9d794cdadfe60084a2183e80466814
[proxmox-backup.git] / src / api2 / tape / backup.rs
1 use std::path::Path;
2 use std::sync::{Mutex, Arc};
3
4 use anyhow::{bail, format_err, Error};
5 use serde_json::Value;
6
7 use proxmox::{
8 try_block,
9 api::{
10 api,
11 RpcEnvironment,
12 RpcEnvironmentType,
13 Router,
14 Permission,
15 },
16 };
17
18 use pbs_api_types::{
19 Authid, Userid, TapeBackupJobConfig, TapeBackupJobSetup, TapeBackupJobStatus, MediaPoolConfig,
20 UPID_SCHEMA, JOB_ID_SCHEMA, PRIV_DATASTORE_READ, PRIV_TAPE_AUDIT, PRIV_TAPE_WRITE,
21 };
22
23 use pbs_datastore::StoreProgress;
24 use pbs_datastore::backup_info::{BackupDir, BackupInfo};
25 use pbs_tools::{task_log, task_warn, task::WorkerTaskContext};
26 use pbs_config::CachedUserInfo;
27 use proxmox_rest_server::WorkerTask;
28
29 use crate::{
30 server::{
31 lookup_user_email,
32 TapeBackupJobSummary,
33 jobstate::{
34 Job,
35 JobState,
36 compute_schedule_status,
37 },
38 },
39 backup::{DataStore, SnapshotReader},
40 tape::{
41 TAPE_STATUS_DIR,
42 Inventory,
43 PoolWriter,
44 MediaPool,
45 drive::{
46 media_changer,
47 lock_tape_device,
48 TapeLockError,
49 set_tape_device_state,
50 },
51 changer::update_changer_online_status,
52 },
53 };
54
55 const TAPE_BACKUP_JOB_ROUTER: Router = Router::new()
56 .post(&API_METHOD_RUN_TAPE_BACKUP_JOB);
57
58 pub const ROUTER: Router = Router::new()
59 .get(&API_METHOD_LIST_TAPE_BACKUP_JOBS)
60 .post(&API_METHOD_BACKUP)
61 .match_all("id", &TAPE_BACKUP_JOB_ROUTER);
62
63 fn check_backup_permission(
64 auth_id: &Authid,
65 store: &str,
66 pool: &str,
67 drive: &str,
68 ) -> Result<(), Error> {
69
70 let user_info = CachedUserInfo::new()?;
71
72 let privs = user_info.lookup_privs(auth_id, &["datastore", store]);
73 if (privs & PRIV_DATASTORE_READ) == 0 {
74 bail!("no permissions on /datastore/{}", store);
75 }
76
77 let privs = user_info.lookup_privs(auth_id, &["tape", "drive", drive]);
78 if (privs & PRIV_TAPE_WRITE) == 0 {
79 bail!("no permissions on /tape/drive/{}", drive);
80 }
81
82 let privs = user_info.lookup_privs(auth_id, &["tape", "pool", pool]);
83 if (privs & PRIV_TAPE_WRITE) == 0 {
84 bail!("no permissions on /tape/pool/{}", pool);
85 }
86
87 Ok(())
88 }
89
90 #[api(
91 returns: {
92 description: "List configured thape backup jobs and their status",
93 type: Array,
94 items: { type: TapeBackupJobStatus },
95 },
96 access: {
97 description: "List configured tape jobs filtered by Tape.Audit privileges",
98 permission: &Permission::Anybody,
99 },
100 )]
101 /// List all tape backup jobs
102 pub fn list_tape_backup_jobs(
103 _param: Value,
104 mut rpcenv: &mut dyn RpcEnvironment,
105 ) -> Result<Vec<TapeBackupJobStatus>, Error> {
106 let auth_id: Authid = rpcenv.get_auth_id().unwrap().parse()?;
107 let user_info = CachedUserInfo::new()?;
108
109 let (job_config, digest) = pbs_config::tape_job::config()?;
110 let (pool_config, _pool_digest) = pbs_config::media_pool::config()?;
111 let (drive_config, _digest) = pbs_config::drive::config()?;
112
113 let job_list_iter = job_config
114 .convert_to_typed_array("backup")?
115 .into_iter()
116 .filter(|_job: &TapeBackupJobConfig| {
117 // fixme: check access permission
118 true
119 });
120
121 let mut list = Vec::new();
122 let status_path = Path::new(TAPE_STATUS_DIR);
123 let current_time = proxmox::tools::time::epoch_i64();
124
125 for job in job_list_iter {
126 let privs = user_info.lookup_privs(&auth_id, &["tape", "job", &job.id]);
127 if (privs & PRIV_TAPE_AUDIT) == 0 {
128 continue;
129 }
130
131 let last_state = JobState::load("tape-backup-job", &job.id)
132 .map_err(|err| format_err!("could not open statefile for {}: {}", &job.id, err))?;
133
134 let status = compute_schedule_status(&last_state, job.schedule.as_deref())?;
135
136 let next_run = status.next_run.unwrap_or(current_time);
137
138 let mut next_media_label = None;
139
140 if let Ok(pool) = pool_config.lookup::<MediaPoolConfig>("pool", &job.setup.pool) {
141 let mut changer_name = None;
142 if let Ok(Some((_, name))) = media_changer(&drive_config, &job.setup.drive) {
143 changer_name = Some(name);
144 }
145 if let Ok(mut pool) = MediaPool::with_config(status_path, &pool, changer_name, true) {
146 if pool.start_write_session(next_run, false).is_ok() {
147 if let Ok(media_id) = pool.guess_next_writable_media(next_run) {
148 next_media_label = Some(media_id.label.label_text);
149 }
150 }
151 }
152 }
153
154 list.push(TapeBackupJobStatus { config: job, status, next_media_label });
155 }
156
157 rpcenv["digest"] = proxmox::tools::digest_to_hex(&digest).into();
158
159 Ok(list)
160 }
161
162 pub fn do_tape_backup_job(
163 mut job: Job,
164 setup: TapeBackupJobSetup,
165 auth_id: &Authid,
166 schedule: Option<String>,
167 to_stdout: bool,
168 ) -> Result<String, Error> {
169
170 let job_id = format!("{}:{}:{}:{}",
171 setup.store,
172 setup.pool,
173 setup.drive,
174 job.jobname());
175
176 let worker_type = job.jobtype().to_string();
177
178 let datastore = DataStore::lookup_datastore(&setup.store)?;
179
180 let (config, _digest) = pbs_config::media_pool::config()?;
181 let pool_config: MediaPoolConfig = config.lookup("pool", &setup.pool)?;
182
183 let (drive_config, _digest) = pbs_config::drive::config()?;
184
185 // for scheduled jobs we acquire the lock later in the worker
186 let drive_lock = if schedule.is_some() {
187 None
188 } else {
189 Some(lock_tape_device(&drive_config, &setup.drive)?)
190 };
191
192 let notify_user = setup.notify_user.as_ref().unwrap_or_else(|| &Userid::root_userid());
193 let email = lookup_user_email(notify_user);
194
195 let upid_str = WorkerTask::new_thread(
196 &worker_type,
197 Some(job_id.clone()),
198 auth_id.to_string(),
199 to_stdout,
200 move |worker| {
201 job.start(&worker.upid().to_string())?;
202 let mut drive_lock = drive_lock;
203
204 let mut summary = Default::default();
205 let job_result = try_block!({
206 if schedule.is_some() {
207 // for scheduled tape backup jobs, we wait indefinitely for the lock
208 task_log!(worker, "waiting for drive lock...");
209 loop {
210 worker.check_abort()?;
211 match lock_tape_device(&drive_config, &setup.drive) {
212 Ok(lock) => {
213 drive_lock = Some(lock);
214 break;
215 }
216 Err(TapeLockError::TimeOut) => continue,
217 Err(TapeLockError::Other(err)) => return Err(err),
218 }
219 }
220 }
221 set_tape_device_state(&setup.drive, &worker.upid().to_string())?;
222
223 task_log!(worker,"Starting tape backup job '{}'", job_id);
224 if let Some(event_str) = schedule {
225 task_log!(worker,"task triggered by schedule '{}'", event_str);
226 }
227
228
229 backup_worker(
230 &worker,
231 datastore,
232 &pool_config,
233 &setup,
234 email.clone(),
235 &mut summary,
236 false,
237 )
238 });
239
240 let status = worker.create_state(&job_result);
241
242 if let Some(email) = email {
243 if let Err(err) = crate::server::send_tape_backup_status(
244 &email,
245 Some(job.jobname()),
246 &setup,
247 &job_result,
248 summary,
249 ) {
250 eprintln!("send tape backup notification failed: {}", err);
251 }
252 }
253
254 if let Err(err) = job.finish(status) {
255 eprintln!(
256 "could not finish job state for {}: {}",
257 job.jobtype().to_string(),
258 err
259 );
260 }
261
262 if let Err(err) = set_tape_device_state(&setup.drive, "") {
263 eprintln!(
264 "could not unset drive state for {}: {}",
265 setup.drive,
266 err
267 );
268 }
269
270 job_result
271 }
272 )?;
273
274 Ok(upid_str)
275 }
276
277 #[api(
278 input: {
279 properties: {
280 id: {
281 schema: JOB_ID_SCHEMA,
282 },
283 },
284 },
285 access: {
286 // Note: parameters are from job config, so we need to test inside function body
287 description: "The user needs Tape.Write privilege on /tape/pool/{pool} \
288 and /tape/drive/{drive}, Datastore.Read privilege on /datastore/{store}.",
289 permission: &Permission::Anybody,
290 },
291 )]
292 /// Runs a tape backup job manually.
293 pub fn run_tape_backup_job(
294 id: String,
295 rpcenv: &mut dyn RpcEnvironment,
296 ) -> Result<String, Error> {
297 let auth_id: Authid = rpcenv.get_auth_id().unwrap().parse()?;
298
299 let (config, _digest) = pbs_config::tape_job::config()?;
300 let backup_job: TapeBackupJobConfig = config.lookup("backup", &id)?;
301
302 check_backup_permission(
303 &auth_id,
304 &backup_job.setup.store,
305 &backup_job.setup.pool,
306 &backup_job.setup.drive,
307 )?;
308
309 let job = Job::new("tape-backup-job", &id)?;
310
311 let to_stdout = rpcenv.env_type() == RpcEnvironmentType::CLI;
312
313 let upid_str = do_tape_backup_job(job, backup_job.setup, &auth_id, None, to_stdout)?;
314
315 Ok(upid_str)
316 }
317
318 #[api(
319 input: {
320 properties: {
321 setup: {
322 type: TapeBackupJobSetup,
323 flatten: true,
324 },
325 "force-media-set": {
326 description: "Ignore the allocation policy and start a new media-set.",
327 optional: true,
328 type: bool,
329 default: false,
330 },
331 },
332 },
333 returns: {
334 schema: UPID_SCHEMA,
335 },
336 access: {
337 // Note: parameters are no uri parameter, so we need to test inside function body
338 description: "The user needs Tape.Write privilege on /tape/pool/{pool} \
339 and /tape/drive/{drive}, Datastore.Read privilege on /datastore/{store}.",
340 permission: &Permission::Anybody,
341 },
342 )]
343 /// Backup datastore to tape media pool
344 pub fn backup(
345 setup: TapeBackupJobSetup,
346 force_media_set: bool,
347 rpcenv: &mut dyn RpcEnvironment,
348 ) -> Result<Value, Error> {
349
350 let auth_id: Authid = rpcenv.get_auth_id().unwrap().parse()?;
351
352 check_backup_permission(
353 &auth_id,
354 &setup.store,
355 &setup.pool,
356 &setup.drive,
357 )?;
358
359 let datastore = DataStore::lookup_datastore(&setup.store)?;
360
361 let (config, _digest) = pbs_config::media_pool::config()?;
362 let pool_config: MediaPoolConfig = config.lookup("pool", &setup.pool)?;
363
364 let (drive_config, _digest) = pbs_config::drive::config()?;
365
366 // early check/lock before starting worker
367 let drive_lock = lock_tape_device(&drive_config, &setup.drive)?;
368
369 let to_stdout = rpcenv.env_type() == RpcEnvironmentType::CLI;
370
371 let job_id = format!("{}:{}:{}", setup.store, setup.pool, setup.drive);
372
373 let notify_user = setup.notify_user.as_ref().unwrap_or_else(|| &Userid::root_userid());
374 let email = lookup_user_email(notify_user);
375
376 let upid_str = WorkerTask::new_thread(
377 "tape-backup",
378 Some(job_id),
379 auth_id.to_string(),
380 to_stdout,
381 move |worker| {
382 let _drive_lock = drive_lock; // keep lock guard
383 set_tape_device_state(&setup.drive, &worker.upid().to_string())?;
384
385 let mut summary = Default::default();
386 let job_result = backup_worker(
387 &worker,
388 datastore,
389 &pool_config,
390 &setup,
391 email.clone(),
392 &mut summary,
393 force_media_set,
394 );
395
396 if let Some(email) = email {
397 if let Err(err) = crate::server::send_tape_backup_status(
398 &email,
399 None,
400 &setup,
401 &job_result,
402 summary,
403 ) {
404 eprintln!("send tape backup notification failed: {}", err);
405 }
406 }
407
408 // ignore errors
409 let _ = set_tape_device_state(&setup.drive, "");
410 job_result
411 }
412 )?;
413
414 Ok(upid_str.into())
415 }
416
417 fn backup_worker(
418 worker: &WorkerTask,
419 datastore: Arc<DataStore>,
420 pool_config: &MediaPoolConfig,
421 setup: &TapeBackupJobSetup,
422 email: Option<String>,
423 summary: &mut TapeBackupJobSummary,
424 force_media_set: bool,
425 ) -> Result<(), Error> {
426
427 let status_path = Path::new(TAPE_STATUS_DIR);
428 let start = std::time::Instant::now();
429
430 task_log!(worker, "update media online status");
431 let changer_name = update_media_online_status(&setup.drive)?;
432
433 let pool = MediaPool::with_config(status_path, &pool_config, changer_name, false)?;
434
435 let mut pool_writer = PoolWriter::new(
436 pool,
437 &setup.drive,
438 worker,
439 email,
440 force_media_set
441 )?;
442
443 let mut group_list = BackupInfo::list_backup_groups(&datastore.base_path())?;
444
445 group_list.sort_unstable();
446
447 let group_count = group_list.len();
448 task_log!(worker, "found {} groups", group_count);
449
450 let mut progress = StoreProgress::new(group_count as u64);
451
452 let latest_only = setup.latest_only.unwrap_or(false);
453
454 if latest_only {
455 task_log!(worker, "latest-only: true (only considering latest snapshots)");
456 }
457
458 let datastore_name = datastore.name();
459
460 let mut errors = false;
461
462 let mut need_catalog = false; // avoid writing catalog for empty jobs
463
464 for (group_number, group) in group_list.into_iter().enumerate() {
465 progress.done_groups = group_number as u64;
466 progress.done_snapshots = 0;
467 progress.group_snapshots = 0;
468
469 let snapshot_list = group.list_backups(&datastore.base_path())?;
470
471 // filter out unfinished backups
472 let mut snapshot_list: Vec<_> = snapshot_list
473 .into_iter()
474 .filter(|item| item.is_finished())
475 .collect();
476
477 if snapshot_list.is_empty() {
478 task_log!(worker, "group {} was empty", group);
479 continue;
480 }
481
482 BackupInfo::sort_list(&mut snapshot_list, true); // oldest first
483
484 if latest_only {
485 progress.group_snapshots = 1;
486 if let Some(info) = snapshot_list.pop() {
487 if pool_writer.contains_snapshot(datastore_name, &info.backup_dir.to_string()) {
488 task_log!(worker, "skip snapshot {}", info.backup_dir);
489 continue;
490 }
491
492 need_catalog = true;
493
494 let snapshot_name = info.backup_dir.to_string();
495 if !backup_snapshot(worker, &mut pool_writer, datastore.clone(), info.backup_dir)? {
496 errors = true;
497 } else {
498 summary.snapshot_list.push(snapshot_name);
499 }
500 progress.done_snapshots = 1;
501 task_log!(
502 worker,
503 "percentage done: {}",
504 progress
505 );
506 }
507 } else {
508 progress.group_snapshots = snapshot_list.len() as u64;
509 for (snapshot_number, info) in snapshot_list.into_iter().enumerate() {
510 if pool_writer.contains_snapshot(datastore_name, &info.backup_dir.to_string()) {
511 task_log!(worker, "skip snapshot {}", info.backup_dir);
512 continue;
513 }
514
515 need_catalog = true;
516
517 let snapshot_name = info.backup_dir.to_string();
518 if !backup_snapshot(worker, &mut pool_writer, datastore.clone(), info.backup_dir)? {
519 errors = true;
520 } else {
521 summary.snapshot_list.push(snapshot_name);
522 }
523 progress.done_snapshots = snapshot_number as u64 + 1;
524 task_log!(
525 worker,
526 "percentage done: {}",
527 progress
528 );
529 }
530 }
531 }
532
533 pool_writer.commit()?;
534
535 if need_catalog {
536 task_log!(worker, "append media catalog");
537
538 let uuid = pool_writer.load_writable_media(worker)?;
539 let done = pool_writer.append_catalog_archive(worker)?;
540 if !done {
541 task_log!(worker, "catalog does not fit on tape, writing to next volume");
542 pool_writer.set_media_status_full(&uuid)?;
543 pool_writer.load_writable_media(worker)?;
544 let done = pool_writer.append_catalog_archive(worker)?;
545 if !done {
546 bail!("write_catalog_archive failed on second media");
547 }
548 }
549 }
550
551 if setup.export_media_set.unwrap_or(false) {
552 pool_writer.export_media_set(worker)?;
553 } else if setup.eject_media.unwrap_or(false) {
554 pool_writer.eject_media(worker)?;
555 }
556
557 if errors {
558 bail!("Tape backup finished with some errors. Please check the task log.");
559 }
560
561 summary.duration = start.elapsed();
562
563 Ok(())
564 }
565
566 // Try to update the the media online status
567 fn update_media_online_status(drive: &str) -> Result<Option<String>, Error> {
568
569 let (config, _digest) = pbs_config::drive::config()?;
570
571 if let Ok(Some((mut changer, changer_name))) = media_changer(&config, drive) {
572
573 let label_text_list = changer.online_media_label_texts()?;
574
575 let status_path = Path::new(TAPE_STATUS_DIR);
576 let mut inventory = Inventory::load(status_path)?;
577
578 update_changer_online_status(
579 &config,
580 &mut inventory,
581 &changer_name,
582 &label_text_list,
583 )?;
584
585 Ok(Some(changer_name))
586 } else {
587 Ok(None)
588 }
589 }
590
591 pub fn backup_snapshot(
592 worker: &WorkerTask,
593 pool_writer: &mut PoolWriter,
594 datastore: Arc<DataStore>,
595 snapshot: BackupDir,
596 ) -> Result<bool, Error> {
597
598 task_log!(worker, "backup snapshot {}", snapshot);
599
600 let snapshot_reader = match SnapshotReader::new(datastore.clone(), snapshot.clone()) {
601 Ok(reader) => reader,
602 Err(err) => {
603 // ignore missing snapshots and continue
604 task_warn!(worker, "failed opening snapshot '{}': {}", snapshot, err);
605 return Ok(false);
606 }
607 };
608
609 let snapshot_reader = Arc::new(Mutex::new(snapshot_reader));
610
611 let (reader_thread, chunk_iter) = pool_writer.spawn_chunk_reader_thread(
612 datastore.clone(),
613 snapshot_reader.clone(),
614 )?;
615
616 let mut chunk_iter = chunk_iter.peekable();
617
618 loop {
619 worker.check_abort()?;
620
621 // test is we have remaining chunks
622 match chunk_iter.peek() {
623 None => break,
624 Some(Ok(_)) => { /* Ok */ },
625 Some(Err(err)) => bail!("{}", err),
626 }
627
628 let uuid = pool_writer.load_writable_media(worker)?;
629
630 worker.check_abort()?;
631
632 let (leom, _bytes) = pool_writer.append_chunk_archive(worker, &mut chunk_iter, datastore.name())?;
633
634 if leom {
635 pool_writer.set_media_status_full(&uuid)?;
636 }
637 }
638
639 if let Err(_) = reader_thread.join() {
640 bail!("chunk reader thread failed");
641 }
642
643 worker.check_abort()?;
644
645 let uuid = pool_writer.load_writable_media(worker)?;
646
647 worker.check_abort()?;
648
649 let snapshot_reader = snapshot_reader.lock().unwrap();
650
651 let (done, _bytes) = pool_writer.append_snapshot_archive(worker, &snapshot_reader)?;
652
653 if !done {
654 // does not fit on tape, so we try on next volume
655 pool_writer.set_media_status_full(&uuid)?;
656
657 worker.check_abort()?;
658
659 pool_writer.load_writable_media(worker)?;
660 let (done, _bytes) = pool_writer.append_snapshot_archive(worker, &snapshot_reader)?;
661
662 if !done {
663 bail!("write_snapshot_archive failed on second media");
664 }
665 }
666
667 task_log!(worker, "end backup {}:{}", datastore.name(), snapshot);
668
669 Ok(true)
670 }