1 use std
::collections
::HashMap
;
3 use std
::io
::{Read, BufRead, BufReader}
;
4 use std
::panic
::UnwindSafe
;
5 use std
::sync
::atomic
::{AtomicBool, Ordering}
;
6 use std
::sync
::{Arc, Mutex}
;
8 use anyhow
::{bail, format_err, Error}
;
10 use lazy_static
::lazy_static
;
12 use serde_json
::{json, Value}
;
13 use serde
::{Serialize, Deserialize}
;
14 use tokio
::sync
::oneshot
;
16 use proxmox
::sys
::linux
::procfs
;
17 use proxmox
::try_block
;
18 use proxmox
::tools
::fs
::{create_path, open_file_locked, replace_file, CreateOptions}
;
22 use crate::tools
::FileLogger
;
23 use crate::api2
::types
::Userid
;
25 macro_rules
! PROXMOX_BACKUP_VAR_RUN_DIR_M { () => ("/run/proxmox-backup") }
26 macro_rules
! PROXMOX_BACKUP_LOG_DIR_M { () => ("/var/log/proxmox-backup") }
27 macro_rules
! PROXMOX_BACKUP_TASK_DIR_M { () => (concat!( PROXMOX_BACKUP_LOG_DIR_M!(), "/tasks")) }
29 pub const PROXMOX_BACKUP_VAR_RUN_DIR
: &str = PROXMOX_BACKUP_VAR_RUN_DIR_M
!();
30 pub const PROXMOX_BACKUP_LOG_DIR
: &str = PROXMOX_BACKUP_LOG_DIR_M
!();
31 pub const PROXMOX_BACKUP_TASK_DIR
: &str = PROXMOX_BACKUP_TASK_DIR_M
!();
32 pub const PROXMOX_BACKUP_TASK_LOCK_FN
: &str = concat
!(PROXMOX_BACKUP_TASK_DIR_M
!(), "/.active.lock");
33 pub const PROXMOX_BACKUP_ACTIVE_TASK_FN
: &str = concat
!(PROXMOX_BACKUP_TASK_DIR_M
!(), "/active");
34 pub const PROXMOX_BACKUP_INDEX_TASK_FN
: &str = concat
!(PROXMOX_BACKUP_TASK_DIR_M
!(), "/index");
36 const MAX_INDEX_TASKS
: usize = 1000;
39 static ref WORKER_TASK_LIST
: Mutex
<HashMap
<usize, Arc
<WorkerTask
>>> = Mutex
::new(HashMap
::new());
41 static ref MY_PID
: i32 = unsafe { libc::getpid() }
;
42 static ref MY_PID_PSTART
: u64 = procfs
::PidStat
::read_from_pid(Pid
::from_raw(*MY_PID
))
47 /// Test if the task is still running
48 pub async
fn worker_is_active(upid
: &UPID
) -> Result
<bool
, Error
> {
49 if (upid
.pid
== *MY_PID
) && (upid
.pstart
== *MY_PID_PSTART
) {
50 return Ok(WORKER_TASK_LIST
.lock().unwrap().contains_key(&upid
.task_id
));
53 if !procfs
::check_process_running_pstart(upid
.pid
, upid
.pstart
).is_some() {
57 let socketname
= format
!(
58 "\0{}/proxmox-task-control-{}.sock", PROXMOX_BACKUP_VAR_RUN_DIR
, upid
.pid
);
62 "upid": upid
.to_string(),
65 let status
= super::send_command(socketname
, cmd
).await?
;
67 if let Some(active
) = status
.as_bool() {
70 bail
!("got unexpected result {:?} (expected bool)", status
);
74 /// Test if the task is still running (fast but inaccurate implementation)
76 /// If the task is spanned from a different process, we simply return if
77 /// that process is still running. This information is good enough to detect
79 pub fn worker_is_active_local(upid
: &UPID
) -> bool
{
80 if (upid
.pid
== *MY_PID
) && (upid
.pstart
== *MY_PID_PSTART
) {
81 WORKER_TASK_LIST
.lock().unwrap().contains_key(&upid
.task_id
)
83 procfs
::check_process_running_pstart(upid
.pid
, upid
.pstart
).is_some()
87 pub fn create_task_control_socket() -> Result
<(), Error
> {
89 let socketname
= format
!(
90 "\0{}/proxmox-task-control-{}.sock", PROXMOX_BACKUP_VAR_RUN_DIR
, *MY_PID
);
92 let control_future
= super::create_control_socket(socketname
, |param
| {
93 let param
= param
.as_object()
94 .ok_or_else(|| format_err
!("unable to parse parameters (expected json object)"))?
;
95 if param
.keys().count() != 2 { bail!("wrong number of parameters"); }
97 let command
= param
["command"].as_str()
98 .ok_or_else(|| format_err
!("unable to parse parameters (missing command)"))?
;
100 // we have only two commands for now
101 if !(command
== "abort-task" || command
== "status") { bail!("got unknown command '{}'
", command); }
103 let upid_str = param["upid
"].as_str()
104 .ok_or_else(|| format_err!("unable to parse
parameters (missing upid
)"))?;
106 let upid = upid_str.parse::<UPID>()?;
108 if !((upid.pid == *MY_PID) && (upid.pstart == *MY_PID_PSTART)) {
109 bail!("upid does not belong to this process
");
112 let hash = WORKER_TASK_LIST.lock().unwrap();
116 if let Some(ref worker) = hash.get(&upid.task_id) {
117 worker.request_abort();
119 // assume task is already stopped
124 let active = hash.contains_key(&upid.task_id);
128 bail!("got unknown command '{}'
", command);
133 tokio::spawn(control_future);
138 pub fn abort_worker_async(upid: UPID) {
139 tokio::spawn(async move {
140 if let Err(err) = abort_worker(upid).await {
141 eprintln!("abort worker failed
- {}
", err);
146 pub async fn abort_worker(upid: UPID) -> Result<(), Error> {
148 let target_pid = upid.pid;
150 let socketname = format!(
151 "\0{}
/proxmox
-task
-control
-{}
.sock
", PROXMOX_BACKUP_VAR_RUN_DIR, target_pid);
154 "command
": "abort
-task
",
155 "upid
": upid.to_string(),
158 super::send_command(socketname, cmd).map_ok(|_| ()).await
161 fn parse_worker_status_line(line: &str) -> Result<(String, UPID, Option<TaskState>), Error> {
163 let data = line.splitn(3, ' ').collect::<Vec<&str>>();
165 let len = data.len();
168 1 => Ok((data[0].to_owned(), data[0].parse::<UPID>()?, None)),
170 let endtime = i64::from_str_radix(data[1], 16)?;
171 let state = TaskState::from_endtime_and_message(endtime, data[2])?;
172 Ok((data[0].to_owned(), data[0].parse::<UPID>()?, Some(state)))
174 _ => bail!("wrong number of components
"),
178 /// Create task log directory with correct permissions
179 pub fn create_task_log_dirs() -> Result<(), Error> {
182 let backup_user = crate::backup::backup_user()?;
183 let opts = CreateOptions::new()
184 .owner(backup_user.uid)
185 .group(backup_user.gid);
187 create_path(PROXMOX_BACKUP_LOG_DIR, None, Some(opts.clone()))?;
188 create_path(PROXMOX_BACKUP_TASK_DIR, None, Some(opts.clone()))?;
189 create_path(PROXMOX_BACKUP_VAR_RUN_DIR, None, Some(opts))?;
191 }).map_err(|err: Error| format_err!("unable to create task log dir
- {}
", err))?;
196 /// Read endtime (time of last log line) and exitstatus from task log file
197 /// If there is not a single line with at valid datetime, we assume the
198 /// starttime to be the endtime
199 pub fn upid_read_status(upid: &UPID) -> Result<TaskState, Error> {
201 let mut status = TaskState::Unknown { endtime: upid.starttime };
203 let path = upid.log_path();
205 let mut file = File::open(path)?;
207 /// speedup - only read tail
209 use std::io::SeekFrom;
210 let _ = file.seek(SeekFrom::End(-8192)); // ignore errors
212 let mut data = Vec::with_capacity(8192);
213 file.read_to_end(&mut data)?;
215 // task logs should end with newline, we do not want it here
216 if data.len() > 0 && data[data.len()-1] == b'\n' {
222 for pos in (0..data.len()).rev() {
223 if data[pos] == b'\n' {
224 start = data.len().min(pos + 1);
231 let last_line = std::str::from_utf8(last_line)
232 .map_err(|err| format_err!("upid_read_status
: utf8 parse failed
: {}
", err))?;
234 let mut iter = last_line.splitn(2, ": ");
235 if let Some(time_str) = iter.next() {
236 if let Ok(endtime) = proxmox::tools::time::parse_rfc3339(time_str) {
237 if let Some(rest) = iter.next().and_then(|rest| rest.strip_prefix("TASK
")) {
238 if let Ok(state) = TaskState::from_endtime_and_message(endtime, rest) {
249 #[derive(Debug, PartialEq, Eq, Serialize, Deserialize)]
251 /// The Task ended with an undefined state
252 Unknown { endtime: i64 },
253 /// The Task ended and there were no errors or warnings
255 /// The Task had 'count' amount of warnings and no errors
256 Warning { count: u64, endtime: i64 },
257 /// The Task ended with the error described in 'message'
258 Error { message: String, endtime: i64 },
262 pub fn endtime(&self) -> i64 {
264 TaskState::Unknown { endtime } => endtime,
265 TaskState::OK { endtime } => endtime,
266 TaskState::Warning { endtime, .. } => endtime,
267 TaskState::Error { endtime, .. } => endtime,
271 fn result_text(&self) -> String {
273 TaskState::Error { message, .. } => format!("TASK ERROR
: {}
", message),
274 other => format!("TASK {}
", other),
278 fn from_endtime_and_message(endtime: i64, s: &str) -> Result<Self, Error> {
280 Ok(TaskState::Unknown { endtime })
281 } else if s == "OK
" {
282 Ok(TaskState::OK { endtime })
283 } else if s.starts_with("WARNINGS
: ") {
284 let count: u64 = s[10..].parse()?;
285 Ok(TaskState::Warning{ count, endtime })
286 } else if s.len() > 0 {
287 let message = if s.starts_with("ERROR
: ") { &s[7..] } else { s }.to_string();
288 Ok(TaskState::Error{ message, endtime })
290 bail!("unable to parse Task Status '{}'
", s);
295 impl std::cmp::PartialOrd for TaskState {
296 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
297 Some(self.endtime().cmp(&other.endtime()))
301 impl std::cmp::Ord for TaskState {
302 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
303 self.endtime().cmp(&other.endtime())
307 impl std::fmt::Display for TaskState {
308 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
310 TaskState::Unknown { .. } => write!(f, "unknown
"),
311 TaskState::OK { .. }=> write!(f, "OK
"),
312 TaskState::Warning { count, .. } => write!(f, "WARNINGS
: {}
", count),
313 TaskState::Error { message, .. } => write!(f, "{}
", message),
318 /// Task details including parsed UPID
320 /// If there is no `state`, the task is still running.
322 pub struct TaskListInfo {
325 /// UPID string representation
326 pub upid_str: String,
327 /// Task `(endtime, status)` if already finished
328 pub state: Option<TaskState>, // endtime, status
331 fn lock_task_list_files(exclusive: bool) -> Result<std::fs::File, Error> {
332 let backup_user = crate::backup::backup_user()?;
334 let lock = open_file_locked(PROXMOX_BACKUP_TASK_LOCK_FN, std::time::Duration::new(10, 0), exclusive)?;
335 nix::unistd::chown(PROXMOX_BACKUP_TASK_LOCK_FN, Some(backup_user.uid), Some(backup_user.gid))?;
340 // atomically read/update the task list, update status of finished tasks
341 // new_upid is added to the list when specified.
342 // Returns a sorted list of known tasks,
343 fn update_active_workers(new_upid: Option<&UPID>) -> Result<Vec<TaskListInfo>, Error> {
345 let backup_user = crate::backup::backup_user()?;
347 let lock = lock_task_list_files(true)?;
349 let mut finish_list: Vec<TaskListInfo> = read_task_file_from_path(PROXMOX_BACKUP_INDEX_TASK_FN)?;
350 let mut active_list: Vec<TaskListInfo> = read_task_file_from_path(PROXMOX_BACKUP_ACTIVE_TASK_FN)?
353 if info.state.is_some() {
354 // this can happen when the active file still includes finished tasks
355 finish_list.push(info);
359 if !worker_is_active_local(&info.upid) {
360 println!("Detected stopped UPID {}
", &info.upid_str);
361 let now = proxmox::tools::time::epoch_i64();
362 let status = upid_read_status(&info.upid)
363 .unwrap_or_else(|_| TaskState::Unknown { endtime: now });
364 finish_list.push(TaskListInfo {
366 upid_str: info.upid_str,
375 if let Some(upid) = new_upid {
376 active_list.push(TaskListInfo { upid: upid.clone(), upid_str: upid.to_string(), state: None });
379 let active_raw = render_task_list(&active_list);
382 PROXMOX_BACKUP_ACTIVE_TASK_FN,
383 active_raw.as_bytes(),
385 .owner(backup_user.uid)
386 .group(backup_user.gid),
389 finish_list.sort_unstable_by(|a, b| {
390 match (&a.state, &b.state) {
391 (Some(s1), Some(s2)) => s1.cmp(&s2),
392 (Some(_), None) => std::cmp::Ordering::Less,
393 (None, Some(_)) => std::cmp::Ordering::Greater,
394 _ => a.upid.starttime.cmp(&b.upid.starttime),
398 let start = (finish_list.len()-MAX_INDEX_TASKS).max(0);
399 let end = (start+MAX_INDEX_TASKS).min(finish_list.len());
400 let index_raw = render_task_list(&finish_list[start..end]);
403 PROXMOX_BACKUP_INDEX_TASK_FN,
404 index_raw.as_bytes(),
406 .owner(backup_user.uid)
407 .group(backup_user.gid),
412 finish_list.append(&mut active_list);
413 finish_list.reverse();
417 /// Returns a sorted list of known tasks
419 /// The list is sorted by `(starttime, endtime)` in ascending order
420 pub fn read_task_list() -> Result<Vec<TaskListInfo>, Error> {
421 update_active_workers(None)
424 fn render_task_line(info: &TaskListInfo) -> String {
425 let mut raw = String::new();
426 if let Some(status) = &info.state {
427 raw.push_str(&format!("{} {:08X} {}
\n", info.upid_str, status.endtime(), status));
429 raw.push_str(&info.upid_str);
436 fn render_task_list(list: &[TaskListInfo]) -> String {
437 let mut raw = String::new();
439 raw.push_str(&render_task_line(&info));
444 // note this is not locked, caller has to make sure it is
445 // this will skip (and log) lines that are not valid status lines
446 fn read_task_file<R: Read>(reader: R) -> Result<Vec<TaskListInfo>, Error>
448 let reader = BufReader::new(reader);
449 let mut list = Vec::new();
450 for line in reader.lines() {
452 match parse_worker_status_line(&line) {
453 Ok((upid_str, upid, state)) => list.push(TaskListInfo {
459 eprintln!("unable to parse worker status '{}'
- {}
", line, err);
468 // note this is not locked, caller has to make sure it is
469 fn read_task_file_from_path<P>(path: P) -> Result<Vec<TaskListInfo>, Error>
471 P: AsRef<std::path::Path> + std::fmt::Debug,
473 let file = match File::open(&path) {
475 Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(Vec::new()),
476 Err(err) => bail!("unable to open task list {:?}
- {}
", path, err),
482 /// Launch long running worker tasks.
484 /// A worker task can either be a whole thread, or a simply tokio
485 /// task/future. Each task can `log()` messages, which are stored
486 /// persistently to files. Task should poll the `abort_requested`
487 /// flag, and stop execution when requested.
489 pub struct WorkerTask {
491 data: Mutex<WorkerTaskData>,
492 abort_requested: AtomicBool,
495 impl std::fmt::Display for WorkerTask {
497 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
503 struct WorkerTaskData {
505 progress: f64, // 0..1
507 pub abort_listeners: Vec<oneshot::Sender<()>>,
510 impl Drop for WorkerTask {
513 println!("unregister worker
");
519 pub fn new(worker_type: &str, worker_id: Option<String>, userid: Userid, to_stdout: bool) -> Result<Arc<Self>, Error> {
520 println!("register worker
");
522 let upid = UPID::new(worker_type, worker_id, userid)?;
523 let task_id = upid.task_id;
525 let mut path = std::path::PathBuf::from(PROXMOX_BACKUP_TASK_DIR);
527 path.push(format!("{:02X}
", upid.pstart % 256));
529 let backup_user = crate::backup::backup_user()?;
531 create_path(&path, None, Some(CreateOptions::new().owner(backup_user.uid).group(backup_user.gid)))?;
533 path.push(upid.to_string());
535 println!("FILE
: {:?}
", path);
537 let logger = FileLogger::new(&path, to_stdout)?;
538 nix::unistd::chown(&path, Some(backup_user.uid), Some(backup_user.gid))?;
540 let worker = Arc::new(Self {
542 abort_requested: AtomicBool::new(false),
543 data: Mutex::new(WorkerTaskData {
547 abort_listeners: vec![],
551 // scope to drop the lock again after inserting
553 let mut hash = WORKER_TASK_LIST.lock().unwrap();
554 hash.insert(task_id, worker.clone());
555 super::set_worker_count(hash.len());
558 update_active_workers(Some(&upid))?;
563 /// Spawn a new tokio task/future.
566 worker_id: Option<String>,
570 ) -> Result<String, Error>
571 where F: Send + 'static + FnOnce(Arc<WorkerTask>) -> T,
572 T: Send + 'static + Future<Output = Result<(), Error>>,
574 let worker = WorkerTask::new(worker_type, worker_id, userid, to_stdout)?;
575 let upid_str = worker.upid.to_string();
576 let f = f(worker.clone());
577 tokio::spawn(async move {
578 let result = f.await;
579 worker.log_result(&result);
585 /// Create a new worker thread.
586 pub fn new_thread<F>(
588 worker_id: Option<String>,
592 ) -> Result<String, Error>
593 where F: Send + UnwindSafe + 'static + FnOnce(Arc<WorkerTask>) -> Result<(), Error>
595 println!("register worker thread
");
597 let worker = WorkerTask::new(worker_type, worker_id, userid, to_stdout)?;
598 let upid_str = worker.upid.to_string();
600 let _child = std::thread::Builder::new().name(upid_str.clone()).spawn(move || {
601 let worker1 = worker.clone();
602 let result = match std::panic::catch_unwind(move || f(worker1)) {
605 match panic.downcast::<&str>() {
607 Err(format_err!("worker panicked
: {}
", panic_msg))
610 Err(format_err!("worker panicked
: unknown
type."))
616 worker.log_result(&result);
622 /// create state from self and a result
623 pub fn create_state(&self, result: &Result<(), Error>) -> TaskState {
624 let warn_count = self.data.lock().unwrap().warn_count;
626 let endtime = proxmox::tools::time::epoch_i64();
628 if let Err(err) = result {
629 TaskState::Error { message: err.to_string(), endtime }
630 } else if warn_count > 0 {
631 TaskState::Warning { count: warn_count, endtime }
633 TaskState::OK { endtime }
637 /// Log task result, remove task from running list
638 pub fn log_result(&self, result: &Result<(), Error>) {
639 let state = self.create_state(result);
640 self.log(state.result_text());
642 WORKER_TASK_LIST.lock().unwrap().remove(&self.upid.task_id);
643 let _ = update_active_workers(None);
644 super::set_worker_count(WORKER_TASK_LIST.lock().unwrap().len());
648 pub fn log<S: AsRef<str>>(&self, msg: S) {
649 let mut data = self.data.lock().unwrap();
650 data.logger.log(msg);
653 /// Log a message as warning.
654 pub fn warn<S: AsRef<str>>(&self, msg: S) {
655 let mut data = self.data.lock().unwrap();
656 data.logger.log(format!("WARN
: {}
", msg.as_ref()));
657 data.warn_count += 1;
660 /// Set progress indicator
661 pub fn progress(&self, progress: f64) {
662 if progress >= 0.0 && progress <= 1.0 {
663 let mut data = self.data.lock().unwrap();
664 data.progress = progress;
666 // fixme: log!("task '{}'
: ignoring strange value
for progress '{}'
", self.upid, progress);
671 pub fn request_abort(&self) {
672 eprintln!("set abort flag
for worker {}
", self.upid);
673 self.abort_requested.store(true, Ordering::SeqCst);
675 let mut data = self.data.lock().unwrap();
677 match data.abort_listeners.pop() {
680 let _ = ch.send(()); // ignore erros here
686 /// Test if abort was requested.
687 pub fn abort_requested(&self) -> bool {
688 self.abort_requested.load(Ordering::SeqCst)
691 /// Fail if abort was requested.
692 pub fn fail_on_abort(&self) -> Result<(), Error> {
693 if self.abort_requested() {
694 bail!("abort requested
- aborting task
");
699 /// Get a future which resolves on task abort
700 pub fn abort_future(&self) -> oneshot::Receiver<()> {
701 let (tx, rx) = oneshot::channel::<()>();
703 let mut data = self.data.lock().unwrap();
704 if self.abort_requested() {
707 data.abort_listeners.push(tx);
712 pub fn upid(&self) -> &UPID {