1 use std
::collections
::{HashMap, VecDeque}
;
3 use std
::io
::{Read, Write, BufRead, BufReader}
;
4 use std
::panic
::UnwindSafe
;
5 use std
::sync
::atomic
::{AtomicBool, Ordering}
;
6 use std
::sync
::{Arc, Mutex}
;
8 use anyhow
::{bail, format_err, Error}
;
10 use lazy_static
::lazy_static
;
12 use serde_json
::{json, Value}
;
13 use serde
::{Serialize, Deserialize}
;
14 use tokio
::sync
::oneshot
;
16 use proxmox
::sys
::linux
::procfs
;
17 use proxmox
::try_block
;
18 use proxmox
::tools
::fs
::{create_path, open_file_locked, replace_file, CreateOptions}
;
22 use crate::tools
::logrotate
::{LogRotate, LogRotateFiles}
;
23 use crate::tools
::FileLogger
;
24 use crate::api2
::types
::Userid
;
26 macro_rules
! PROXMOX_BACKUP_VAR_RUN_DIR_M { () => ("/run/proxmox-backup") }
27 macro_rules
! PROXMOX_BACKUP_LOG_DIR_M { () => ("/var/log/proxmox-backup") }
28 macro_rules
! PROXMOX_BACKUP_TASK_DIR_M { () => (concat!( PROXMOX_BACKUP_LOG_DIR_M!(), "/tasks")) }
30 pub const PROXMOX_BACKUP_VAR_RUN_DIR
: &str = PROXMOX_BACKUP_VAR_RUN_DIR_M
!();
31 pub const PROXMOX_BACKUP_LOG_DIR
: &str = PROXMOX_BACKUP_LOG_DIR_M
!();
32 pub const PROXMOX_BACKUP_TASK_DIR
: &str = PROXMOX_BACKUP_TASK_DIR_M
!();
33 pub const PROXMOX_BACKUP_TASK_LOCK_FN
: &str = concat
!(PROXMOX_BACKUP_TASK_DIR_M
!(), "/.active.lock");
34 pub const PROXMOX_BACKUP_ACTIVE_TASK_FN
: &str = concat
!(PROXMOX_BACKUP_TASK_DIR_M
!(), "/active");
35 pub const PROXMOX_BACKUP_INDEX_TASK_FN
: &str = concat
!(PROXMOX_BACKUP_TASK_DIR_M
!(), "/index");
36 pub const PROXMOX_BACKUP_ARCHIVE_TASK_FN
: &str = concat
!(PROXMOX_BACKUP_TASK_DIR_M
!(), "/archive");
38 const MAX_INDEX_TASKS
: usize = 1000;
41 static ref WORKER_TASK_LIST
: Mutex
<HashMap
<usize, Arc
<WorkerTask
>>> = Mutex
::new(HashMap
::new());
43 static ref MY_PID
: i32 = unsafe { libc::getpid() }
;
44 static ref MY_PID_PSTART
: u64 = procfs
::PidStat
::read_from_pid(Pid
::from_raw(*MY_PID
))
49 /// Test if the task is still running
50 pub async
fn worker_is_active(upid
: &UPID
) -> Result
<bool
, Error
> {
51 if (upid
.pid
== *MY_PID
) && (upid
.pstart
== *MY_PID_PSTART
) {
52 return Ok(WORKER_TASK_LIST
.lock().unwrap().contains_key(&upid
.task_id
));
55 if !procfs
::check_process_running_pstart(upid
.pid
, upid
.pstart
).is_some() {
59 let socketname
= format
!(
60 "\0{}/proxmox-task-control-{}.sock", PROXMOX_BACKUP_VAR_RUN_DIR
, upid
.pid
);
64 "upid": upid
.to_string(),
67 let status
= super::send_command(socketname
, cmd
).await?
;
69 if let Some(active
) = status
.as_bool() {
72 bail
!("got unexpected result {:?} (expected bool)", status
);
76 /// Test if the task is still running (fast but inaccurate implementation)
78 /// If the task is spanned from a different process, we simply return if
79 /// that process is still running. This information is good enough to detect
81 pub fn worker_is_active_local(upid
: &UPID
) -> bool
{
82 if (upid
.pid
== *MY_PID
) && (upid
.pstart
== *MY_PID_PSTART
) {
83 WORKER_TASK_LIST
.lock().unwrap().contains_key(&upid
.task_id
)
85 procfs
::check_process_running_pstart(upid
.pid
, upid
.pstart
).is_some()
89 pub fn create_task_control_socket() -> Result
<(), Error
> {
91 let socketname
= format
!(
92 "\0{}/proxmox-task-control-{}.sock", PROXMOX_BACKUP_VAR_RUN_DIR
, *MY_PID
);
94 let control_future
= super::create_control_socket(socketname
, |param
| {
95 let param
= param
.as_object()
96 .ok_or_else(|| format_err
!("unable to parse parameters (expected json object)"))?
;
97 if param
.keys().count() != 2 { bail!("wrong number of parameters"); }
99 let command
= param
["command"].as_str()
100 .ok_or_else(|| format_err
!("unable to parse parameters (missing command)"))?
;
102 // we have only two commands for now
103 if !(command
== "abort-task" || command
== "status") { bail!("got unknown command '{}'
", command); }
105 let upid_str = param["upid
"].as_str()
106 .ok_or_else(|| format_err!("unable to parse
parameters (missing upid
)"))?;
108 let upid = upid_str.parse::<UPID>()?;
110 if !((upid.pid == *MY_PID) && (upid.pstart == *MY_PID_PSTART)) {
111 bail!("upid does not belong to this process
");
114 let hash = WORKER_TASK_LIST.lock().unwrap();
118 if let Some(ref worker) = hash.get(&upid.task_id) {
119 worker.request_abort();
121 // assume task is already stopped
126 let active = hash.contains_key(&upid.task_id);
130 bail!("got unknown command '{}'
", command);
135 tokio::spawn(control_future);
140 pub fn abort_worker_async(upid: UPID) {
141 tokio::spawn(async move {
142 if let Err(err) = abort_worker(upid).await {
143 eprintln!("abort worker failed
- {}
", err);
148 pub async fn abort_worker(upid: UPID) -> Result<(), Error> {
150 let target_pid = upid.pid;
152 let socketname = format!(
153 "\0{}
/proxmox
-task
-control
-{}
.sock
", PROXMOX_BACKUP_VAR_RUN_DIR, target_pid);
156 "command
": "abort
-task
",
157 "upid
": upid.to_string(),
160 super::send_command(socketname, cmd).map_ok(|_| ()).await
163 fn parse_worker_status_line(line: &str) -> Result<(String, UPID, Option<TaskState>), Error> {
165 let data = line.splitn(3, ' ').collect::<Vec<&str>>();
167 let len = data.len();
170 1 => Ok((data[0].to_owned(), data[0].parse::<UPID>()?, None)),
172 let endtime = i64::from_str_radix(data[1], 16)?;
173 let state = TaskState::from_endtime_and_message(endtime, data[2])?;
174 Ok((data[0].to_owned(), data[0].parse::<UPID>()?, Some(state)))
176 _ => bail!("wrong number of components
"),
180 /// Create task log directory with correct permissions
181 pub fn create_task_log_dirs() -> Result<(), Error> {
184 let backup_user = crate::backup::backup_user()?;
185 let opts = CreateOptions::new()
186 .owner(backup_user.uid)
187 .group(backup_user.gid);
189 create_path(PROXMOX_BACKUP_LOG_DIR, None, Some(opts.clone()))?;
190 create_path(PROXMOX_BACKUP_TASK_DIR, None, Some(opts.clone()))?;
191 create_path(PROXMOX_BACKUP_VAR_RUN_DIR, None, Some(opts))?;
193 }).map_err(|err: Error| format_err!("unable to create task log dir
- {}
", err))?;
198 /// Read endtime (time of last log line) and exitstatus from task log file
199 /// If there is not a single line with at valid datetime, we assume the
200 /// starttime to be the endtime
201 pub fn upid_read_status(upid: &UPID) -> Result<TaskState, Error> {
203 let mut status = TaskState::Unknown { endtime: upid.starttime };
205 let path = upid.log_path();
207 let mut file = File::open(path)?;
209 /// speedup - only read tail
211 use std::io::SeekFrom;
212 let _ = file.seek(SeekFrom::End(-8192)); // ignore errors
214 let mut data = Vec::with_capacity(8192);
215 file.read_to_end(&mut data)?;
217 // task logs should end with newline, we do not want it here
218 if data.len() > 0 && data[data.len()-1] == b'\n' {
224 for pos in (0..data.len()).rev() {
225 if data[pos] == b'\n' {
226 start = data.len().min(pos + 1);
233 let last_line = std::str::from_utf8(last_line)
234 .map_err(|err| format_err!("upid_read_status
: utf8 parse failed
: {}
", err))?;
236 let mut iter = last_line.splitn(2, ": ");
237 if let Some(time_str) = iter.next() {
238 if let Ok(endtime) = proxmox::tools::time::parse_rfc3339(time_str) {
239 if let Some(rest) = iter.next().and_then(|rest| rest.strip_prefix("TASK
")) {
240 if let Ok(state) = TaskState::from_endtime_and_message(endtime, rest) {
251 #[derive(Debug, PartialEq, Eq, Serialize, Deserialize)]
253 /// The Task ended with an undefined state
254 Unknown { endtime: i64 },
255 /// The Task ended and there were no errors or warnings
257 /// The Task had 'count' amount of warnings and no errors
258 Warning { count: u64, endtime: i64 },
259 /// The Task ended with the error described in 'message'
260 Error { message: String, endtime: i64 },
264 pub fn endtime(&self) -> i64 {
266 TaskState::Unknown { endtime } => endtime,
267 TaskState::OK { endtime } => endtime,
268 TaskState::Warning { endtime, .. } => endtime,
269 TaskState::Error { endtime, .. } => endtime,
273 fn result_text(&self) -> String {
275 TaskState::Error { message, .. } => format!("TASK ERROR
: {}
", message),
276 other => format!("TASK {}
", other),
280 fn from_endtime_and_message(endtime: i64, s: &str) -> Result<Self, Error> {
282 Ok(TaskState::Unknown { endtime })
283 } else if s == "OK
" {
284 Ok(TaskState::OK { endtime })
285 } else if s.starts_with("WARNINGS
: ") {
286 let count: u64 = s[10..].parse()?;
287 Ok(TaskState::Warning{ count, endtime })
288 } else if s.len() > 0 {
289 let message = if s.starts_with("ERROR
: ") { &s[7..] } else { s }.to_string();
290 Ok(TaskState::Error{ message, endtime })
292 bail!("unable to parse Task Status '{}'
", s);
297 impl std::cmp::PartialOrd for TaskState {
298 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
299 Some(self.endtime().cmp(&other.endtime()))
303 impl std::cmp::Ord for TaskState {
304 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
305 self.endtime().cmp(&other.endtime())
309 impl std::fmt::Display for TaskState {
310 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
312 TaskState::Unknown { .. } => write!(f, "unknown
"),
313 TaskState::OK { .. }=> write!(f, "OK
"),
314 TaskState::Warning { count, .. } => write!(f, "WARNINGS
: {}
", count),
315 TaskState::Error { message, .. } => write!(f, "{}
", message),
320 /// Task details including parsed UPID
322 /// If there is no `state`, the task is still running.
324 pub struct TaskListInfo {
327 /// UPID string representation
328 pub upid_str: String,
329 /// Task `(endtime, status)` if already finished
330 pub state: Option<TaskState>, // endtime, status
333 fn lock_task_list_files(exclusive: bool) -> Result<std::fs::File, Error> {
334 let backup_user = crate::backup::backup_user()?;
336 let lock = open_file_locked(PROXMOX_BACKUP_TASK_LOCK_FN, std::time::Duration::new(10, 0), exclusive)?;
337 nix::unistd::chown(PROXMOX_BACKUP_TASK_LOCK_FN, Some(backup_user.uid), Some(backup_user.gid))?;
342 // atomically read/update the task list, update status of finished tasks
343 // new_upid is added to the list when specified.
344 fn update_active_workers(new_upid: Option<&UPID>) -> Result<(), Error> {
346 let backup_user = crate::backup::backup_user()?;
348 let lock = lock_task_list_files(true)?;
350 let mut finish_list: Vec<TaskListInfo> = read_task_file_from_path(PROXMOX_BACKUP_INDEX_TASK_FN)?;
351 let mut active_list: Vec<TaskListInfo> = read_task_file_from_path(PROXMOX_BACKUP_ACTIVE_TASK_FN)?
354 if info.state.is_some() {
355 // this can happen when the active file still includes finished tasks
356 finish_list.push(info);
360 if !worker_is_active_local(&info.upid) {
361 println!("Detected stopped UPID {}
", &info.upid_str);
362 let now = proxmox::tools::time::epoch_i64();
363 let status = upid_read_status(&info.upid)
364 .unwrap_or_else(|_| TaskState::Unknown { endtime: now });
365 finish_list.push(TaskListInfo {
367 upid_str: info.upid_str,
376 if let Some(upid) = new_upid {
377 active_list.push(TaskListInfo { upid: upid.clone(), upid_str: upid.to_string(), state: None });
380 let active_raw = render_task_list(&active_list);
383 PROXMOX_BACKUP_ACTIVE_TASK_FN,
384 active_raw.as_bytes(),
386 .owner(backup_user.uid)
387 .group(backup_user.gid),
390 finish_list.sort_unstable_by(|a, b| {
391 match (&a.state, &b.state) {
392 (Some(s1), Some(s2)) => s1.cmp(&s2),
393 (Some(_), None) => std::cmp::Ordering::Less,
394 (None, Some(_)) => std::cmp::Ordering::Greater,
395 _ => a.upid.starttime.cmp(&b.upid.starttime),
399 let start = (finish_list.len()-MAX_INDEX_TASKS).max(0);
400 let end = (start+MAX_INDEX_TASKS).min(finish_list.len());
401 let index_raw = render_task_list(&finish_list[start..end]);
404 PROXMOX_BACKUP_INDEX_TASK_FN,
405 index_raw.as_bytes(),
407 .owner(backup_user.uid)
408 .group(backup_user.gid),
411 if !finish_list.is_empty() && start > 0 {
412 match std::fs::OpenOptions::new().append(true).create(true).open(PROXMOX_BACKUP_ARCHIVE_TASK_FN) {
414 for info in &finish_list[0..start] {
415 writer.write_all(render_task_line(&info).as_bytes())?;
418 Err(err) => bail!("could not write task archive
- {}
", err),
421 nix::unistd::chown(PROXMOX_BACKUP_ARCHIVE_TASK_FN, Some(backup_user.uid), Some(backup_user.gid))?;
429 fn render_task_line(info: &TaskListInfo) -> String {
430 let mut raw = String::new();
431 if let Some(status) = &info.state {
432 raw.push_str(&format!("{} {:08X} {}
\n", info.upid_str, status.endtime(), status));
434 raw.push_str(&info.upid_str);
441 fn render_task_list(list: &[TaskListInfo]) -> String {
442 let mut raw = String::new();
444 raw.push_str(&render_task_line(&info));
449 // note this is not locked, caller has to make sure it is
450 // this will skip (and log) lines that are not valid status lines
451 fn read_task_file<R: Read>(reader: R) -> Result<Vec<TaskListInfo>, Error>
453 let reader = BufReader::new(reader);
454 let mut list = Vec::new();
455 for line in reader.lines() {
457 match parse_worker_status_line(&line) {
458 Ok((upid_str, upid, state)) => list.push(TaskListInfo {
464 eprintln!("unable to parse worker status '{}'
- {}
", line, err);
473 // note this is not locked, caller has to make sure it is
474 fn read_task_file_from_path<P>(path: P) -> Result<Vec<TaskListInfo>, Error>
476 P: AsRef<std::path::Path> + std::fmt::Debug,
478 let file = match File::open(&path) {
480 Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(Vec::new()),
481 Err(err) => bail!("unable to open task list {:?}
- {}
", path, err),
494 pub struct TaskListInfoIterator {
495 list: VecDeque<TaskListInfo>,
497 archive: Option<LogRotateFiles>,
501 impl TaskListInfoIterator {
502 pub fn new(active_only: bool) -> Result<Self, Error> {
503 let (read_lock, active_list) = {
504 let lock = lock_task_list_files(false)?;
505 let active_list = read_task_file_from_path(PROXMOX_BACKUP_ACTIVE_TASK_FN)?;
507 let needs_update = active_list
509 .any(|info| info.state.is_none() && !worker_is_active_local(&info.upid));
513 update_active_workers(None)?;
514 let lock = lock_task_list_files(false)?;
515 let active_list = read_task_file_from_path(PROXMOX_BACKUP_ACTIVE_TASK_FN)?;
522 let archive = if active_only {
525 let logrotate = LogRotate::new(PROXMOX_BACKUP_ARCHIVE_TASK_FN, true).ok_or_else(|| format_err!("could not get archive file names
"))?;
526 Some(logrotate.files())
529 let file = if active_only { TaskFile::End } else { TaskFile::Active };
530 let lock = if active_only { None } else { Some(read_lock) };
533 list: active_list.into(),
541 impl Iterator for TaskListInfoIterator {
542 type Item = Result<TaskListInfo, Error>;
544 fn next(&mut self) -> Option<Self::Item> {
546 if let Some(element) = self.list.pop_back() {
547 return Some(Ok(element));
550 TaskFile::Active => {
551 let index = match read_task_file_from_path(PROXMOX_BACKUP_INDEX_TASK_FN) {
553 Err(err) => return Some(Err(err)),
555 self.list.append(&mut index.into());
556 self.file = TaskFile::Index;
558 TaskFile::Index | TaskFile::Archive => {
559 if let Some(mut archive) = self.archive.take() {
560 if let Some(file) = archive.next() {
561 let list = match read_task_file(file) {
563 Err(err) => return Some(Err(err)),
565 self.list.append(&mut list.into());
566 self.archive = Some(archive);
567 self.file = TaskFile::Archive;
571 self.file = TaskFile::End;
575 TaskFile::End => return None,
582 /// Launch long running worker tasks.
584 /// A worker task can either be a whole thread, or a simply tokio
585 /// task/future. Each task can `log()` messages, which are stored
586 /// persistently to files. Task should poll the `abort_requested`
587 /// flag, and stop execution when requested.
589 pub struct WorkerTask {
591 data: Mutex<WorkerTaskData>,
592 abort_requested: AtomicBool,
595 impl std::fmt::Display for WorkerTask {
597 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
603 struct WorkerTaskData {
605 progress: f64, // 0..1
607 pub abort_listeners: Vec<oneshot::Sender<()>>,
610 impl Drop for WorkerTask {
613 println!("unregister worker
");
619 pub fn new(worker_type: &str, worker_id: Option<String>, userid: Userid, to_stdout: bool) -> Result<Arc<Self>, Error> {
620 println!("register worker
");
622 let upid = UPID::new(worker_type, worker_id, userid)?;
623 let task_id = upid.task_id;
625 let mut path = std::path::PathBuf::from(PROXMOX_BACKUP_TASK_DIR);
627 path.push(format!("{:02X}
", upid.pstart % 256));
629 let backup_user = crate::backup::backup_user()?;
631 create_path(&path, None, Some(CreateOptions::new().owner(backup_user.uid).group(backup_user.gid)))?;
633 path.push(upid.to_string());
635 println!("FILE
: {:?}
", path);
637 let logger = FileLogger::new(&path, to_stdout)?;
638 nix::unistd::chown(&path, Some(backup_user.uid), Some(backup_user.gid))?;
640 let worker = Arc::new(Self {
642 abort_requested: AtomicBool::new(false),
643 data: Mutex::new(WorkerTaskData {
647 abort_listeners: vec![],
651 // scope to drop the lock again after inserting
653 let mut hash = WORKER_TASK_LIST.lock().unwrap();
654 hash.insert(task_id, worker.clone());
655 super::set_worker_count(hash.len());
658 update_active_workers(Some(&upid))?;
663 /// Spawn a new tokio task/future.
666 worker_id: Option<String>,
670 ) -> Result<String, Error>
671 where F: Send + 'static + FnOnce(Arc<WorkerTask>) -> T,
672 T: Send + 'static + Future<Output = Result<(), Error>>,
674 let worker = WorkerTask::new(worker_type, worker_id, userid, to_stdout)?;
675 let upid_str = worker.upid.to_string();
676 let f = f(worker.clone());
677 tokio::spawn(async move {
678 let result = f.await;
679 worker.log_result(&result);
685 /// Create a new worker thread.
686 pub fn new_thread<F>(
688 worker_id: Option<String>,
692 ) -> Result<String, Error>
693 where F: Send + UnwindSafe + 'static + FnOnce(Arc<WorkerTask>) -> Result<(), Error>
695 println!("register worker thread
");
697 let worker = WorkerTask::new(worker_type, worker_id, userid, to_stdout)?;
698 let upid_str = worker.upid.to_string();
700 let _child = std::thread::Builder::new().name(upid_str.clone()).spawn(move || {
701 let worker1 = worker.clone();
702 let result = match std::panic::catch_unwind(move || f(worker1)) {
705 match panic.downcast::<&str>() {
707 Err(format_err!("worker panicked
: {}
", panic_msg))
710 Err(format_err!("worker panicked
: unknown
type."))
716 worker.log_result(&result);
722 /// create state from self and a result
723 pub fn create_state(&self, result: &Result<(), Error>) -> TaskState {
724 let warn_count = self.data.lock().unwrap().warn_count;
726 let endtime = proxmox::tools::time::epoch_i64();
728 if let Err(err) = result {
729 TaskState::Error { message: err.to_string(), endtime }
730 } else if warn_count > 0 {
731 TaskState::Warning { count: warn_count, endtime }
733 TaskState::OK { endtime }
737 /// Log task result, remove task from running list
738 pub fn log_result(&self, result: &Result<(), Error>) {
739 let state = self.create_state(result);
740 self.log(state.result_text());
742 WORKER_TASK_LIST.lock().unwrap().remove(&self.upid.task_id);
743 let _ = update_active_workers(None);
744 super::set_worker_count(WORKER_TASK_LIST.lock().unwrap().len());
748 pub fn log<S: AsRef<str>>(&self, msg: S) {
749 let mut data = self.data.lock().unwrap();
750 data.logger.log(msg);
753 /// Log a message as warning.
754 pub fn warn<S: AsRef<str>>(&self, msg: S) {
755 let mut data = self.data.lock().unwrap();
756 data.logger.log(format!("WARN
: {}
", msg.as_ref()));
757 data.warn_count += 1;
760 /// Set progress indicator
761 pub fn progress(&self, progress: f64) {
762 if progress >= 0.0 && progress <= 1.0 {
763 let mut data = self.data.lock().unwrap();
764 data.progress = progress;
766 // fixme: log!("task '{}'
: ignoring strange value
for progress '{}'
", self.upid, progress);
771 pub fn request_abort(&self) {
772 eprintln!("set abort flag
for worker {}
", self.upid);
773 self.abort_requested.store(true, Ordering::SeqCst);
775 let mut data = self.data.lock().unwrap();
777 match data.abort_listeners.pop() {
780 let _ = ch.send(()); // ignore erros here
786 /// Test if abort was requested.
787 pub fn abort_requested(&self) -> bool {
788 self.abort_requested.load(Ordering::SeqCst)
791 /// Fail if abort was requested.
792 pub fn fail_on_abort(&self) -> Result<(), Error> {
793 if self.abort_requested() {
794 bail!("abort requested
- aborting task
");
799 /// Get a future which resolves on task abort
800 pub fn abort_future(&self) -> oneshot::Receiver<()> {
801 let (tx, rx) = oneshot::channel::<()>();
803 let mut data = self.data.lock().unwrap();
804 if self.abort_requested() {
807 data.abort_listeners.push(tx);
812 pub fn upid(&self) -> &UPID {