1 use std
::collections
::HashMap
;
3 use std
::io
::{Read, BufRead, BufReader}
;
4 use std
::panic
::UnwindSafe
;
5 use std
::sync
::atomic
::{AtomicBool, Ordering}
;
6 use std
::sync
::{Arc, Mutex}
;
8 use anyhow
::{bail, format_err, Error}
;
10 use lazy_static
::lazy_static
;
12 use serde_json
::{json, Value}
;
13 use serde
::{Serialize, Deserialize}
;
14 use tokio
::sync
::oneshot
;
16 use proxmox
::sys
::linux
::procfs
;
17 use proxmox
::try_block
;
18 use proxmox
::tools
::fs
::{create_path, open_file_locked, replace_file, CreateOptions}
;
22 use crate::tools
::FileLogger
;
23 use crate::api2
::types
::Userid
;
25 macro_rules
! PROXMOX_BACKUP_VAR_RUN_DIR_M { () => ("/run/proxmox-backup") }
26 macro_rules
! PROXMOX_BACKUP_LOG_DIR_M { () => ("/var/log/proxmox-backup") }
27 macro_rules
! PROXMOX_BACKUP_TASK_DIR_M { () => (concat!( PROXMOX_BACKUP_LOG_DIR_M!(), "/tasks")) }
29 pub const PROXMOX_BACKUP_VAR_RUN_DIR
: &str = PROXMOX_BACKUP_VAR_RUN_DIR_M
!();
30 pub const PROXMOX_BACKUP_LOG_DIR
: &str = PROXMOX_BACKUP_LOG_DIR_M
!();
31 pub const PROXMOX_BACKUP_TASK_DIR
: &str = PROXMOX_BACKUP_TASK_DIR_M
!();
32 pub const PROXMOX_BACKUP_TASK_LOCK_FN
: &str = concat
!(PROXMOX_BACKUP_TASK_DIR_M
!(), "/.active.lock");
33 pub const PROXMOX_BACKUP_ACTIVE_TASK_FN
: &str = concat
!(PROXMOX_BACKUP_TASK_DIR_M
!(), "/active");
36 static ref WORKER_TASK_LIST
: Mutex
<HashMap
<usize, Arc
<WorkerTask
>>> = Mutex
::new(HashMap
::new());
38 static ref MY_PID
: i32 = unsafe { libc::getpid() }
;
39 static ref MY_PID_PSTART
: u64 = procfs
::PidStat
::read_from_pid(Pid
::from_raw(*MY_PID
))
44 /// Test if the task is still running
45 pub async
fn worker_is_active(upid
: &UPID
) -> Result
<bool
, Error
> {
46 if (upid
.pid
== *MY_PID
) && (upid
.pstart
== *MY_PID_PSTART
) {
47 return Ok(WORKER_TASK_LIST
.lock().unwrap().contains_key(&upid
.task_id
));
50 if !procfs
::check_process_running_pstart(upid
.pid
, upid
.pstart
).is_some() {
54 let socketname
= format
!(
55 "\0{}/proxmox-task-control-{}.sock", PROXMOX_BACKUP_VAR_RUN_DIR
, upid
.pid
);
59 "upid": upid
.to_string(),
62 let status
= super::send_command(socketname
, cmd
).await?
;
64 if let Some(active
) = status
.as_bool() {
67 bail
!("got unexpected result {:?} (expected bool)", status
);
71 /// Test if the task is still running (fast but inaccurate implementation)
73 /// If the task is spanned from a different process, we simply return if
74 /// that process is still running. This information is good enough to detect
76 pub fn worker_is_active_local(upid
: &UPID
) -> bool
{
77 if (upid
.pid
== *MY_PID
) && (upid
.pstart
== *MY_PID_PSTART
) {
78 WORKER_TASK_LIST
.lock().unwrap().contains_key(&upid
.task_id
)
80 procfs
::check_process_running_pstart(upid
.pid
, upid
.pstart
).is_some()
84 pub fn create_task_control_socket() -> Result
<(), Error
> {
86 let socketname
= format
!(
87 "\0{}/proxmox-task-control-{}.sock", PROXMOX_BACKUP_VAR_RUN_DIR
, *MY_PID
);
89 let control_future
= super::create_control_socket(socketname
, |param
| {
90 let param
= param
.as_object()
91 .ok_or_else(|| format_err
!("unable to parse parameters (expected json object)"))?
;
92 if param
.keys().count() != 2 { bail!("wrong number of parameters"); }
94 let command
= param
["command"].as_str()
95 .ok_or_else(|| format_err
!("unable to parse parameters (missing command)"))?
;
97 // we have only two commands for now
98 if !(command
== "abort-task" || command
== "status") { bail!("got unknown command '{}'
", command); }
100 let upid_str = param["upid
"].as_str()
101 .ok_or_else(|| format_err!("unable to parse
parameters (missing upid
)"))?;
103 let upid = upid_str.parse::<UPID>()?;
105 if !((upid.pid == *MY_PID) && (upid.pstart == *MY_PID_PSTART)) {
106 bail!("upid does not belong to this process
");
109 let hash = WORKER_TASK_LIST.lock().unwrap();
113 if let Some(ref worker) = hash.get(&upid.task_id) {
114 worker.request_abort();
116 // assume task is already stopped
121 let active = hash.contains_key(&upid.task_id);
125 bail!("got unknown command '{}'
", command);
130 tokio::spawn(control_future);
135 pub fn abort_worker_async(upid: UPID) {
136 tokio::spawn(async move {
137 if let Err(err) = abort_worker(upid).await {
138 eprintln!("abort worker failed
- {}
", err);
143 pub async fn abort_worker(upid: UPID) -> Result<(), Error> {
145 let target_pid = upid.pid;
147 let socketname = format!(
148 "\0{}
/proxmox
-task
-control
-{}
.sock
", PROXMOX_BACKUP_VAR_RUN_DIR, target_pid);
151 "command
": "abort
-task
",
152 "upid
": upid.to_string(),
155 super::send_command(socketname, cmd).map_ok(|_| ()).await
158 fn parse_worker_status_line(line: &str) -> Result<(String, UPID, Option<TaskState>), Error> {
160 let data = line.splitn(3, ' ').collect::<Vec<&str>>();
162 let len = data.len();
165 1 => Ok((data[0].to_owned(), data[0].parse::<UPID>()?, None)),
167 let endtime = i64::from_str_radix(data[1], 16)?;
168 let state = TaskState::from_endtime_and_message(endtime, data[2])?;
169 Ok((data[0].to_owned(), data[0].parse::<UPID>()?, Some(state)))
171 _ => bail!("wrong number of components
"),
175 /// Create task log directory with correct permissions
176 pub fn create_task_log_dirs() -> Result<(), Error> {
179 let backup_user = crate::backup::backup_user()?;
180 let opts = CreateOptions::new()
181 .owner(backup_user.uid)
182 .group(backup_user.gid);
184 create_path(PROXMOX_BACKUP_LOG_DIR, None, Some(opts.clone()))?;
185 create_path(PROXMOX_BACKUP_TASK_DIR, None, Some(opts.clone()))?;
186 create_path(PROXMOX_BACKUP_VAR_RUN_DIR, None, Some(opts))?;
188 }).map_err(|err: Error| format_err!("unable to create task log dir
- {}
", err))?;
193 /// Read endtime (time of last log line) and exitstatus from task log file
194 /// If there is not a single line with at valid datetime, we assume the
195 /// starttime to be the endtime
196 pub fn upid_read_status(upid: &UPID) -> Result<TaskState, Error> {
198 let mut status = TaskState::Unknown { endtime: upid.starttime };
200 let path = upid.log_path();
202 let mut file = File::open(path)?;
204 /// speedup - only read tail
206 use std::io::SeekFrom;
207 let _ = file.seek(SeekFrom::End(-8192)); // ignore errors
209 let mut data = Vec::with_capacity(8192);
210 file.read_to_end(&mut data)?;
212 // task logs should end with newline, we do not want it here
213 if data[data.len()-1] == b'\n' {
219 for pos in (0..data.len()).rev() {
220 if data[pos] == b'\n' {
228 let last_line = std::str::from_utf8(last_line)
229 .map_err(|err| format_err!("upid_read_status
: utf8 parse failed
: {}
", err))?;
231 let mut iter = last_line.splitn(2, ": ");
232 if let Some(time_str) = iter.next() {
233 if let Ok(endtime) = proxmox::tools::time::parse_rfc3339(time_str) {
234 if let Some(rest) = iter.next().and_then(|rest| rest.strip_prefix("TASK
")) {
235 if let Ok(state) = TaskState::from_endtime_and_message(endtime, rest) {
246 #[derive(Debug, PartialEq, Eq, Serialize, Deserialize)]
248 /// The Task ended with an undefined state
249 Unknown { endtime: i64 },
250 /// The Task ended and there were no errors or warnings
252 /// The Task had 'count' amount of warnings and no errors
253 Warning { count: u64, endtime: i64 },
254 /// The Task ended with the error described in 'message'
255 Error { message: String, endtime: i64 },
259 pub fn endtime(&self) -> i64 {
261 TaskState::Unknown { endtime } => endtime,
262 TaskState::OK { endtime } => endtime,
263 TaskState::Warning { endtime, .. } => endtime,
264 TaskState::Error { endtime, .. } => endtime,
268 fn result_text(&self) -> String {
270 TaskState::Error { message, .. } => format!("TASK ERROR
: {}
", message),
271 other => format!("TASK {}
", other),
275 fn from_endtime_and_message(endtime: i64, s: &str) -> Result<Self, Error> {
277 Ok(TaskState::Unknown { endtime })
278 } else if s == "OK
" {
279 Ok(TaskState::OK { endtime })
280 } else if s.starts_with("WARNINGS
: ") {
281 let count: u64 = s[10..].parse()?;
282 Ok(TaskState::Warning{ count, endtime })
283 } else if s.len() > 0 {
284 let message = if s.starts_with("ERROR
: ") { &s[7..] } else { s }.to_string();
285 Ok(TaskState::Error{ message, endtime })
287 bail!("unable to parse Task Status '{}'
", s);
292 impl std::cmp::PartialOrd for TaskState {
293 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
294 Some(self.endtime().cmp(&other.endtime()))
298 impl std::cmp::Ord for TaskState {
299 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
300 self.endtime().cmp(&other.endtime())
304 impl std::fmt::Display for TaskState {
305 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
307 TaskState::Unknown { .. } => write!(f, "unknown
"),
308 TaskState::OK { .. }=> write!(f, "OK
"),
309 TaskState::Warning { count, .. } => write!(f, "WARNINGS
: {}
", count),
310 TaskState::Error { message, .. } => write!(f, "{}
", message),
315 /// Task details including parsed UPID
317 /// If there is no `state`, the task is still running.
319 pub struct TaskListInfo {
322 /// UPID string representation
323 pub upid_str: String,
324 /// Task `(endtime, status)` if already finished
325 pub state: Option<TaskState>, // endtime, status
328 // atomically read/update the task list, update status of finished tasks
329 // new_upid is added to the list when specified.
330 // Returns a sorted list of known tasks,
331 fn update_active_workers(new_upid: Option<&UPID>) -> Result<Vec<TaskListInfo>, Error> {
333 let backup_user = crate::backup::backup_user()?;
335 let lock = open_file_locked(PROXMOX_BACKUP_TASK_LOCK_FN, std::time::Duration::new(10, 0))?;
336 nix::unistd::chown(PROXMOX_BACKUP_TASK_LOCK_FN, Some(backup_user.uid), Some(backup_user.gid))?;
338 let reader = match File::open(PROXMOX_BACKUP_ACTIVE_TASK_FN) {
339 Ok(f) => Some(BufReader::new(f)),
341 if err.kind() == std::io::ErrorKind::NotFound {
344 bail!("unable to open active worker {:?}
- {}
", PROXMOX_BACKUP_ACTIVE_TASK_FN, err);
349 let mut active_list = vec![];
350 let mut finish_list = vec![];
352 if let Some(lines) = reader.map(|r| r.lines()) {
356 match parse_worker_status_line(&line) {
357 Err(err) => bail!("unable to parse active worker status '{}'
- {}
", line, err),
358 Ok((upid_str, upid, state)) => match state {
359 None if worker_is_active_local(&upid) => {
360 active_list.push(TaskListInfo { upid, upid_str, state: None });
363 println!("Detected stopped UPID {}
", upid_str);
364 let now = proxmox::tools::time::epoch_i64();
365 let status = upid_read_status(&upid)
366 .unwrap_or_else(|_| TaskState::Unknown { endtime: now });
367 finish_list.push(TaskListInfo {
368 upid, upid_str, state: Some(status)
372 finish_list.push(TaskListInfo {
373 upid, upid_str, state: Some(status)
381 if let Some(upid) = new_upid {
382 active_list.push(TaskListInfo { upid: upid.clone(), upid_str: upid.to_string(), state: None });
385 // assemble list without duplicates
386 // we include all active tasks,
387 // and fill up to 1000 entries with finished tasks
391 let mut task_hash = HashMap::new();
393 for info in active_list {
394 task_hash.insert(info.upid_str.clone(), info);
397 for info in finish_list {
398 if task_hash.len() > max { break; }
399 if !task_hash.contains_key(&info.upid_str) {
400 task_hash.insert(info.upid_str.clone(), info);
404 let mut task_list: Vec<TaskListInfo> = vec![];
405 for (_, info) in task_hash { task_list.push(info); }
407 task_list.sort_unstable_by(|b, a| { // lastest on top
408 match (&a.state, &b.state) {
409 (Some(s1), Some(s2)) => s1.cmp(&s2),
410 (Some(_), None) => std::cmp::Ordering::Less,
411 (None, Some(_)) => std::cmp::Ordering::Greater,
412 _ => a.upid.starttime.cmp(&b.upid.starttime),
416 let mut raw = String::new();
417 for info in &task_list {
418 if let Some(status) = &info.state {
419 raw.push_str(&format!("{} {:08X} {}
\n", info.upid_str, status.endtime(), status));
421 raw.push_str(&info.upid_str);
427 PROXMOX_BACKUP_ACTIVE_TASK_FN,
430 .owner(backup_user.uid)
431 .group(backup_user.gid),
439 /// Returns a sorted list of known tasks
441 /// The list is sorted by `(starttime, endtime)` in ascending order
442 pub fn read_task_list() -> Result<Vec<TaskListInfo>, Error> {
443 update_active_workers(None)
446 /// Launch long running worker tasks.
448 /// A worker task can either be a whole thread, or a simply tokio
449 /// task/future. Each task can `log()` messages, which are stored
450 /// persistently to files. Task should poll the `abort_requested`
451 /// flag, and stop execution when requested.
453 pub struct WorkerTask {
455 data: Mutex<WorkerTaskData>,
456 abort_requested: AtomicBool,
459 impl std::fmt::Display for WorkerTask {
461 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
467 struct WorkerTaskData {
469 progress: f64, // 0..1
471 pub abort_listeners: Vec<oneshot::Sender<()>>,
474 impl Drop for WorkerTask {
477 println!("unregister worker
");
483 pub fn new(worker_type: &str, worker_id: Option<String>, userid: Userid, to_stdout: bool) -> Result<Arc<Self>, Error> {
484 println!("register worker
");
486 let upid = UPID::new(worker_type, worker_id, userid)?;
487 let task_id = upid.task_id;
489 let mut path = std::path::PathBuf::from(PROXMOX_BACKUP_TASK_DIR);
491 path.push(format!("{:02X}
", upid.pstart % 256));
493 let backup_user = crate::backup::backup_user()?;
495 create_path(&path, None, Some(CreateOptions::new().owner(backup_user.uid).group(backup_user.gid)))?;
497 path.push(upid.to_string());
499 println!("FILE
: {:?}
", path);
501 let logger = FileLogger::new(&path, to_stdout)?;
502 nix::unistd::chown(&path, Some(backup_user.uid), Some(backup_user.gid))?;
504 let worker = Arc::new(Self {
506 abort_requested: AtomicBool::new(false),
507 data: Mutex::new(WorkerTaskData {
511 abort_listeners: vec![],
515 // scope to drop the lock again after inserting
517 let mut hash = WORKER_TASK_LIST.lock().unwrap();
518 hash.insert(task_id, worker.clone());
519 super::set_worker_count(hash.len());
522 update_active_workers(Some(&upid))?;
527 /// Spawn a new tokio task/future.
530 worker_id: Option<String>,
534 ) -> Result<String, Error>
535 where F: Send + 'static + FnOnce(Arc<WorkerTask>) -> T,
536 T: Send + 'static + Future<Output = Result<(), Error>>,
538 let worker = WorkerTask::new(worker_type, worker_id, userid, to_stdout)?;
539 let upid_str = worker.upid.to_string();
540 let f = f(worker.clone());
541 tokio::spawn(async move {
542 let result = f.await;
543 worker.log_result(&result);
549 /// Create a new worker thread.
550 pub fn new_thread<F>(
552 worker_id: Option<String>,
556 ) -> Result<String, Error>
557 where F: Send + UnwindSafe + 'static + FnOnce(Arc<WorkerTask>) -> Result<(), Error>
559 println!("register worker thread
");
561 let worker = WorkerTask::new(worker_type, worker_id, userid, to_stdout)?;
562 let upid_str = worker.upid.to_string();
564 let _child = std::thread::Builder::new().name(upid_str.clone()).spawn(move || {
565 let worker1 = worker.clone();
566 let result = match std::panic::catch_unwind(move || f(worker1)) {
569 match panic.downcast::<&str>() {
571 Err(format_err!("worker panicked
: {}
", panic_msg))
574 Err(format_err!("worker panicked
: unknown
type."))
580 worker.log_result(&result);
586 /// create state from self and a result
587 pub fn create_state(&self, result: &Result<(), Error>) -> TaskState {
588 let warn_count = self.data.lock().unwrap().warn_count;
590 let endtime = proxmox::tools::time::epoch_i64();
592 if let Err(err) = result {
593 TaskState::Error { message: err.to_string(), endtime }
594 } else if warn_count > 0 {
595 TaskState::Warning { count: warn_count, endtime }
597 TaskState::OK { endtime }
601 /// Log task result, remove task from running list
602 pub fn log_result(&self, result: &Result<(), Error>) {
603 let state = self.create_state(result);
604 self.log(state.result_text());
606 WORKER_TASK_LIST.lock().unwrap().remove(&self.upid.task_id);
607 let _ = update_active_workers(None);
608 super::set_worker_count(WORKER_TASK_LIST.lock().unwrap().len());
612 pub fn log<S: AsRef<str>>(&self, msg: S) {
613 let mut data = self.data.lock().unwrap();
614 data.logger.log(msg);
617 /// Log a message as warning.
618 pub fn warn<S: AsRef<str>>(&self, msg: S) {
619 let mut data = self.data.lock().unwrap();
620 data.logger.log(format!("WARN
: {}
", msg.as_ref()));
621 data.warn_count += 1;
624 /// Set progress indicator
625 pub fn progress(&self, progress: f64) {
626 if progress >= 0.0 && progress <= 1.0 {
627 let mut data = self.data.lock().unwrap();
628 data.progress = progress;
630 // fixme: log!("task '{}'
: ignoring strange value
for progress '{}'
", self.upid, progress);
635 pub fn request_abort(&self) {
636 eprintln!("set abort flag
for worker {}
", self.upid);
637 self.abort_requested.store(true, Ordering::SeqCst);
639 let mut data = self.data.lock().unwrap();
641 match data.abort_listeners.pop() {
644 let _ = ch.send(()); // ignore erros here
650 /// Test if abort was requested.
651 pub fn abort_requested(&self) -> bool {
652 self.abort_requested.load(Ordering::SeqCst)
655 /// Fail if abort was requested.
656 pub fn fail_on_abort(&self) -> Result<(), Error> {
657 if self.abort_requested() {
658 bail!("abort requested
- aborting task
");
663 /// Get a future which resolves on task abort
664 pub fn abort_future(&self) -> oneshot::Receiver<()> {
665 let (tx, rx) = oneshot::channel::<()>();
667 let mut data = self.data.lock().unwrap();
668 if self.abort_requested() {
671 data.abort_listeners.push(tx);
676 pub fn upid(&self) -> &UPID {