1 use std
::collections
::HashMap
;
3 use std
::io
::{BufRead, BufReader}
;
4 use std
::panic
::UnwindSafe
;
5 use std
::sync
::atomic
::{AtomicBool, Ordering}
;
6 use std
::sync
::{Arc, Mutex}
;
9 use anyhow
::{bail, format_err, Error}
;
11 use lazy_static
::lazy_static
;
13 use serde_json
::{json, Value}
;
14 use serde
::{Serialize, Deserialize}
;
15 use tokio
::sync
::oneshot
;
17 use proxmox
::sys
::linux
::procfs
;
18 use proxmox
::try_block
;
19 use proxmox
::tools
::fs
::{create_path, open_file_locked, replace_file, CreateOptions}
;
23 use crate::tools
::FileLogger
;
24 use crate::api2
::types
::Userid
;
26 macro_rules
! PROXMOX_BACKUP_VAR_RUN_DIR_M { () => ("/run/proxmox-backup") }
27 macro_rules
! PROXMOX_BACKUP_LOG_DIR_M { () => ("/var/log/proxmox-backup") }
28 macro_rules
! PROXMOX_BACKUP_TASK_DIR_M { () => (concat!( PROXMOX_BACKUP_LOG_DIR_M!(), "/tasks")) }
30 pub const PROXMOX_BACKUP_VAR_RUN_DIR
: &str = PROXMOX_BACKUP_VAR_RUN_DIR_M
!();
31 pub const PROXMOX_BACKUP_LOG_DIR
: &str = PROXMOX_BACKUP_LOG_DIR_M
!();
32 pub const PROXMOX_BACKUP_TASK_DIR
: &str = PROXMOX_BACKUP_TASK_DIR_M
!();
33 pub const PROXMOX_BACKUP_TASK_LOCK_FN
: &str = concat
!(PROXMOX_BACKUP_TASK_DIR_M
!(), "/.active.lock");
34 pub const PROXMOX_BACKUP_ACTIVE_TASK_FN
: &str = concat
!(PROXMOX_BACKUP_TASK_DIR_M
!(), "/active");
37 static ref WORKER_TASK_LIST
: Mutex
<HashMap
<usize, Arc
<WorkerTask
>>> = Mutex
::new(HashMap
::new());
39 static ref MY_PID
: i32 = unsafe { libc::getpid() }
;
40 static ref MY_PID_PSTART
: u64 = procfs
::PidStat
::read_from_pid(Pid
::from_raw(*MY_PID
))
45 /// Test if the task is still running
46 pub async
fn worker_is_active(upid
: &UPID
) -> Result
<bool
, Error
> {
47 if (upid
.pid
== *MY_PID
) && (upid
.pstart
== *MY_PID_PSTART
) {
48 return Ok(WORKER_TASK_LIST
.lock().unwrap().contains_key(&upid
.task_id
));
51 if !procfs
::check_process_running_pstart(upid
.pid
, upid
.pstart
).is_some() {
55 let socketname
= format
!(
56 "\0{}/proxmox-task-control-{}.sock", PROXMOX_BACKUP_VAR_RUN_DIR
, upid
.pid
);
60 "upid": upid
.to_string(),
63 let status
= super::send_command(socketname
, cmd
).await?
;
65 if let Some(active
) = status
.as_bool() {
68 bail
!("got unexpected result {:?} (expected bool)", status
);
72 /// Test if the task is still running (fast but inaccurate implementation)
74 /// If the task is spanned from a different process, we simply return if
75 /// that process is still running. This information is good enough to detect
77 pub fn worker_is_active_local(upid
: &UPID
) -> bool
{
78 if (upid
.pid
== *MY_PID
) && (upid
.pstart
== *MY_PID_PSTART
) {
79 WORKER_TASK_LIST
.lock().unwrap().contains_key(&upid
.task_id
)
81 procfs
::check_process_running_pstart(upid
.pid
, upid
.pstart
).is_some()
85 pub fn create_task_control_socket() -> Result
<(), Error
> {
87 let socketname
= format
!(
88 "\0{}/proxmox-task-control-{}.sock", PROXMOX_BACKUP_VAR_RUN_DIR
, *MY_PID
);
90 let control_future
= super::create_control_socket(socketname
, |param
| {
91 let param
= param
.as_object()
92 .ok_or_else(|| format_err
!("unable to parse parameters (expected json object)"))?
;
93 if param
.keys().count() != 2 { bail!("wrong number of parameters"); }
95 let command
= param
["command"].as_str()
96 .ok_or_else(|| format_err
!("unable to parse parameters (missing command)"))?
;
98 // we have only two commands for now
99 if !(command
== "abort-task" || command
== "status") { bail!("got unknown command '{}'
", command); }
101 let upid_str = param["upid
"].as_str()
102 .ok_or_else(|| format_err!("unable to parse
parameters (missing upid
)"))?;
104 let upid = upid_str.parse::<UPID>()?;
106 if !((upid.pid == *MY_PID) && (upid.pstart == *MY_PID_PSTART)) {
107 bail!("upid does not belong to this process
");
110 let hash = WORKER_TASK_LIST.lock().unwrap();
114 if let Some(ref worker) = hash.get(&upid.task_id) {
115 worker.request_abort();
117 // assume task is already stopped
122 let active = hash.contains_key(&upid.task_id);
126 bail!("got unknown command '{}'
", command);
131 tokio::spawn(control_future);
136 pub fn abort_worker_async(upid: UPID) {
137 tokio::spawn(async move {
138 if let Err(err) = abort_worker(upid).await {
139 eprintln!("abort worker failed
- {}
", err);
144 pub async fn abort_worker(upid: UPID) -> Result<(), Error> {
146 let target_pid = upid.pid;
148 let socketname = format!(
149 "\0{}
/proxmox
-task
-control
-{}
.sock
", PROXMOX_BACKUP_VAR_RUN_DIR, target_pid);
152 "command
": "abort
-task
",
153 "upid
": upid.to_string(),
156 super::send_command(socketname, cmd).map_ok(|_| ()).await
159 fn parse_worker_status_line(line: &str) -> Result<(String, UPID, Option<(i64, String)>), Error> {
161 let data = line.splitn(3, ' ').collect::<Vec<&str>>();
163 let len = data.len();
166 1 => Ok((data[0].to_owned(), data[0].parse::<UPID>()?, None)),
168 let endtime = i64::from_str_radix(data[1], 16)?;
169 Ok((data[0].to_owned(), data[0].parse::<UPID>()?, Some((endtime, data[2].to_owned()))))
171 _ => bail!("wrong number of components
"),
175 /// Create task log directory with correct permissions
176 pub fn create_task_log_dirs() -> Result<(), Error> {
179 let backup_user = crate::backup::backup_user()?;
180 let opts = CreateOptions::new()
181 .owner(backup_user.uid)
182 .group(backup_user.gid);
184 create_path(PROXMOX_BACKUP_LOG_DIR, None, Some(opts.clone()))?;
185 create_path(PROXMOX_BACKUP_TASK_DIR, None, Some(opts.clone()))?;
186 create_path(PROXMOX_BACKUP_VAR_RUN_DIR, None, Some(opts))?;
188 }).map_err(|err: Error| format_err!("unable to create task log dir
- {}
", err))?;
193 /// Read endtime (time of last log line) and exitstatus from task log file
194 /// If there is not a single line with at valid datetime, we assume the
195 /// starttime to be the endtime
196 pub fn upid_read_status(upid: &UPID) -> Result<(i64, TaskState), Error> {
197 let mut status = TaskState::Unknown;
198 let mut time = upid.starttime;
200 let path = upid.log_path();
202 let mut file = File::open(path)?;
204 /// speedup - only read tail
206 use std::io::SeekFrom;
207 let _ = file.seek(SeekFrom::End(-8192)); // ignore errors
209 let reader = BufReader::new(file);
211 for line in reader.lines() {
214 let mut iter = line.splitn(2, ": ");
215 if let Some(time_str) = iter.next() {
216 time = chrono::DateTime::parse_from_rfc3339(time_str)
217 .map_err(|err| format_err!("cannot parse '{}'
: {}
", time_str, err))?
222 match iter.next().and_then(|rest| rest.strip_prefix("TASK
")) {
225 if let Ok(state) = rest.parse() {
236 #[derive(Debug, PartialEq, Serialize, Deserialize)]
238 /// The Task ended with an undefined state
240 /// The Task ended and there were no errors or warnings
242 /// The Task had 'count' amount of warnings and no errors
243 Warning { count: u64 },
244 /// The Task ended with the error described in 'message'
245 Error { message: String },
249 fn result_text(&self) -> String {
251 TaskState::Error { message } => format!("TASK ERROR
: {}
", message),
252 other => format!("TASK {}
", other),
257 impl std::fmt::Display for TaskState {
258 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
260 TaskState::Unknown => write!(f, "unknown
"),
261 TaskState::OK => write!(f, "OK
"),
262 TaskState::Warning { count } => write!(f, "WARNINGS
: {}
", count),
263 TaskState::Error { message } => write!(f, "{}
", message),
268 impl std::str::FromStr for TaskState {
271 fn from_str(s: &str) -> Result<Self, Self::Err> {
273 Ok(TaskState::Unknown)
274 } else if s == "OK
" {
276 } else if s.starts_with("WARNINGS
: ") {
277 let count: u64 = s[10..].parse()?;
278 Ok(TaskState::Warning{ count })
279 } else if s.len() > 0 {
280 let message = if s.starts_with("ERROR
: ") { &s[7..] } else { s }.to_string();
281 Ok(TaskState::Error{ message })
283 bail!("unable to parse Task Status '{}'
", s);
288 /// Task details including parsed UPID
290 /// If there is no `state`, the task is still running.
292 pub struct TaskListInfo {
295 /// UPID string representation
296 pub upid_str: String,
297 /// Task `(endtime, status)` if already finished
298 pub state: Option<(i64, TaskState)>, // endtime, status
301 // atomically read/update the task list, update status of finished tasks
302 // new_upid is added to the list when specified.
303 // Returns a sorted list of known tasks,
304 fn update_active_workers(new_upid: Option<&UPID>) -> Result<Vec<TaskListInfo>, Error> {
306 let backup_user = crate::backup::backup_user()?;
308 let lock = open_file_locked(PROXMOX_BACKUP_TASK_LOCK_FN, std::time::Duration::new(10, 0))?;
309 nix::unistd::chown(PROXMOX_BACKUP_TASK_LOCK_FN, Some(backup_user.uid), Some(backup_user.gid))?;
311 let reader = match File::open(PROXMOX_BACKUP_ACTIVE_TASK_FN) {
312 Ok(f) => Some(BufReader::new(f)),
314 if err.kind() == std::io::ErrorKind::NotFound {
317 bail!("unable to open active worker {:?}
- {}
", PROXMOX_BACKUP_ACTIVE_TASK_FN, err);
322 let mut active_list = vec![];
323 let mut finish_list = vec![];
325 if let Some(lines) = reader.map(|r| r.lines()) {
329 match parse_worker_status_line(&line) {
330 Err(err) => bail!("unable to parse active worker status '{}'
- {}
", line, err),
331 Ok((upid_str, upid, state)) => match state {
332 None if worker_is_active_local(&upid) => {
333 active_list.push(TaskListInfo { upid, upid_str, state: None });
336 println!("Detected stopped UPID {}
", upid_str);
337 let (time, status) = upid_read_status(&upid)
338 .unwrap_or_else(|_| (Local::now().timestamp(), TaskState::Unknown));
339 finish_list.push(TaskListInfo {
340 upid, upid_str, state: Some((time, status))
343 Some((endtime, status)) => {
344 finish_list.push(TaskListInfo {
345 upid, upid_str, state: Some((endtime, status.parse()?))
353 if let Some(upid) = new_upid {
354 active_list.push(TaskListInfo { upid: upid.clone(), upid_str: upid.to_string(), state: None });
357 // assemble list without duplicates
358 // we include all active tasks,
359 // and fill up to 1000 entries with finished tasks
363 let mut task_hash = HashMap::new();
365 for info in active_list {
366 task_hash.insert(info.upid_str.clone(), info);
369 for info in finish_list {
370 if task_hash.len() > max { break; }
371 if !task_hash.contains_key(&info.upid_str) {
372 task_hash.insert(info.upid_str.clone(), info);
376 let mut task_list: Vec<TaskListInfo> = vec![];
377 for (_, info) in task_hash { task_list.push(info); }
379 task_list.sort_unstable_by(|b, a| { // lastest on top
380 match (&a.state, &b.state) {
381 (Some(s1), Some(s2)) => s1.0.cmp(&s2.0),
382 (Some(_), None) => std::cmp::Ordering::Less,
383 (None, Some(_)) => std::cmp::Ordering::Greater,
384 _ => a.upid.starttime.cmp(&b.upid.starttime),
388 let mut raw = String::new();
389 for info in &task_list {
390 if let Some((endtime, status)) = &info.state {
391 raw.push_str(&format!("{} {:08X} {}
\n", info.upid_str, endtime, status));
393 raw.push_str(&info.upid_str);
399 PROXMOX_BACKUP_ACTIVE_TASK_FN,
402 .owner(backup_user.uid)
403 .group(backup_user.gid),
411 /// Returns a sorted list of known tasks
413 /// The list is sorted by `(starttime, endtime)` in ascending order
414 pub fn read_task_list() -> Result<Vec<TaskListInfo>, Error> {
415 update_active_workers(None)
418 /// Launch long running worker tasks.
420 /// A worker task can either be a whole thread, or a simply tokio
421 /// task/future. Each task can `log()` messages, which are stored
422 /// persistently to files. Task should poll the `abort_requested`
423 /// flag, and stop execution when requested.
425 pub struct WorkerTask {
427 data: Mutex<WorkerTaskData>,
428 abort_requested: AtomicBool,
431 impl std::fmt::Display for WorkerTask {
433 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
439 struct WorkerTaskData {
441 progress: f64, // 0..1
443 pub abort_listeners: Vec<oneshot::Sender<()>>,
446 impl Drop for WorkerTask {
449 println!("unregister worker
");
455 pub fn new(worker_type: &str, worker_id: Option<String>, userid: Userid, to_stdout: bool) -> Result<Arc<Self>, Error> {
456 println!("register worker
");
458 let upid = UPID::new(worker_type, worker_id, userid)?;
459 let task_id = upid.task_id;
461 let mut path = std::path::PathBuf::from(PROXMOX_BACKUP_TASK_DIR);
463 path.push(format!("{:02X}
", upid.pstart % 256));
465 let backup_user = crate::backup::backup_user()?;
467 create_path(&path, None, Some(CreateOptions::new().owner(backup_user.uid).group(backup_user.gid)))?;
469 path.push(upid.to_string());
471 println!("FILE
: {:?}
", path);
473 let logger = FileLogger::new(&path, to_stdout)?;
474 nix::unistd::chown(&path, Some(backup_user.uid), Some(backup_user.gid))?;
476 let worker = Arc::new(Self {
478 abort_requested: AtomicBool::new(false),
479 data: Mutex::new(WorkerTaskData {
483 abort_listeners: vec![],
487 // scope to drop the lock again after inserting
489 let mut hash = WORKER_TASK_LIST.lock().unwrap();
490 hash.insert(task_id, worker.clone());
491 super::set_worker_count(hash.len());
494 update_active_workers(Some(&upid))?;
499 /// Spawn a new tokio task/future.
502 worker_id: Option<String>,
506 ) -> Result<String, Error>
507 where F: Send + 'static + FnOnce(Arc<WorkerTask>) -> T,
508 T: Send + 'static + Future<Output = Result<(), Error>>,
510 let worker = WorkerTask::new(worker_type, worker_id, userid, to_stdout)?;
511 let upid_str = worker.upid.to_string();
512 let f = f(worker.clone());
513 tokio::spawn(async move {
514 let result = f.await;
515 worker.log_result(&result);
521 /// Create a new worker thread.
522 pub fn new_thread<F>(
524 worker_id: Option<String>,
528 ) -> Result<String, Error>
529 where F: Send + UnwindSafe + 'static + FnOnce(Arc<WorkerTask>) -> Result<(), Error>
531 println!("register worker thread
");
533 let worker = WorkerTask::new(worker_type, worker_id, userid, to_stdout)?;
534 let upid_str = worker.upid.to_string();
536 let _child = std::thread::Builder::new().name(upid_str.clone()).spawn(move || {
537 let worker1 = worker.clone();
538 let result = match std::panic::catch_unwind(move || f(worker1)) {
541 match panic.downcast::<&str>() {
543 Err(format_err!("worker panicked
: {}
", panic_msg))
546 Err(format_err!("worker panicked
: unknown
type."))
552 worker.log_result(&result);
558 /// create state from self and a result
559 pub fn create_state(&self, result: &Result<(), Error>) -> TaskState {
560 let warn_count = self.data.lock().unwrap().warn_count;
562 if let Err(err) = result {
563 TaskState::Error { message: err.to_string() }
564 } else if warn_count > 0 {
565 TaskState::Warning { count: warn_count }
571 /// Log task result, remove task from running list
572 pub fn log_result(&self, result: &Result<(), Error>) {
573 let state = self.create_state(result);
574 self.log(state.result_text());
576 WORKER_TASK_LIST.lock().unwrap().remove(&self.upid.task_id);
577 let _ = update_active_workers(None);
578 super::set_worker_count(WORKER_TASK_LIST.lock().unwrap().len());
582 pub fn log<S: AsRef<str>>(&self, msg: S) {
583 let mut data = self.data.lock().unwrap();
584 data.logger.log(msg);
587 /// Log a message as warning.
588 pub fn warn<S: AsRef<str>>(&self, msg: S) {
589 let mut data = self.data.lock().unwrap();
590 data.logger.log(format!("WARN
: {}
", msg.as_ref()));
591 data.warn_count += 1;
594 /// Set progress indicator
595 pub fn progress(&self, progress: f64) {
596 if progress >= 0.0 && progress <= 1.0 {
597 let mut data = self.data.lock().unwrap();
598 data.progress = progress;
600 // fixme: log!("task '{}'
: ignoring strange value
for progress '{}'
", self.upid, progress);
605 pub fn request_abort(&self) {
606 eprintln!("set abort flag
for worker {}
", self.upid);
607 self.abort_requested.store(true, Ordering::SeqCst);
609 let mut data = self.data.lock().unwrap();
611 match data.abort_listeners.pop() {
614 let _ = ch.send(()); // ignore erros here
620 /// Test if abort was requested.
621 pub fn abort_requested(&self) -> bool {
622 self.abort_requested.load(Ordering::SeqCst)
625 /// Fail if abort was requested.
626 pub fn fail_on_abort(&self) -> Result<(), Error> {
627 if self.abort_requested() {
628 bail!("abort requested
- aborting task
");
633 /// Get a future which resolves on task abort
634 pub fn abort_future(&self) -> oneshot::Receiver<()> {
635 let (tx, rx) = oneshot::channel::<()>();
637 let mut data = self.data.lock().unwrap();
638 if self.abort_requested() {
641 data.abort_listeners.push(tx);
646 pub fn upid(&self) -> &UPID {