2 use lazy_static
::lazy_static
;
5 use tokio
::sync
::oneshot
;
7 use std
::sync
::{Arc, Mutex}
;
8 use std
::collections
::HashMap
;
9 use std
::sync
::atomic
::{AtomicBool, Ordering}
;
10 use std
::io
::{BufRead, BufReader}
;
12 use std
::panic
::UnwindSafe
;
14 use serde_json
::{json, Value}
;
18 fs
::{create_path, file_set_contents_full, CreateOptions}
,
23 use crate::tools
::FileLogger
;
25 macro_rules
! PROXMOX_BACKUP_VAR_RUN_DIR_M { () => ("/run/proxmox-backup") }
26 macro_rules
! PROXMOX_BACKUP_LOG_DIR_M { () => ("/var/log/proxmox-backup") }
27 macro_rules
! PROXMOX_BACKUP_TASK_DIR_M { () => (concat!( PROXMOX_BACKUP_LOG_DIR_M!(), "/tasks")) }
29 pub const PROXMOX_BACKUP_VAR_RUN_DIR
: &str = PROXMOX_BACKUP_VAR_RUN_DIR_M
!();
30 pub const PROXMOX_BACKUP_LOG_DIR
: &str = PROXMOX_BACKUP_LOG_DIR_M
!();
31 pub const PROXMOX_BACKUP_TASK_DIR
: &str = PROXMOX_BACKUP_TASK_DIR_M
!();
32 pub const PROXMOX_BACKUP_TASK_LOCK_FN
: &str = concat
!(PROXMOX_BACKUP_TASK_DIR_M
!(), "/.active.lock");
33 pub const PROXMOX_BACKUP_ACTIVE_TASK_FN
: &str = concat
!(PROXMOX_BACKUP_TASK_DIR_M
!(), "/active");
36 static ref WORKER_TASK_LIST
: Mutex
<HashMap
<usize, Arc
<WorkerTask
>>> = Mutex
::new(HashMap
::new());
38 static ref MY_PID
: i32 = unsafe { libc::getpid() }
;
39 static ref MY_PID_PSTART
: u64 = proxmox
::sys
::linux
::procfs
::read_proc_pid_stat(*MY_PID
).unwrap().starttime
;
42 /// Test if the task is still running
43 pub fn worker_is_active(upid
: &UPID
) -> bool
{
45 if (upid
.pid
== *MY_PID
) && (upid
.pstart
== *MY_PID_PSTART
) {
46 WORKER_TASK_LIST
.lock().unwrap().contains_key(&upid
.task_id
)
48 use proxmox
::sys
::linux
::procfs
;
49 procfs
::check_process_running_pstart(upid
.pid
, upid
.pstart
).is_some()
53 pub fn create_task_control_socket() -> Result
<(), Error
> {
55 let socketname
= format
!(
56 "\0{}/proxmox-task-control-{}.sock", PROXMOX_BACKUP_VAR_RUN_DIR
, *MY_PID
);
58 let control_future
= super::create_control_socket(socketname
, |param
| {
59 let param
= param
.as_object()
60 .ok_or_else(|| format_err
!("unable to parse parameters (expected json object)"))?
;
61 if param
.keys().count() != 2 { bail!("wrong number of parameters"); }
63 let command
= param
.get("command")
64 .ok_or_else(|| format_err
!("unable to parse parameters (missing command)"))?
;
66 // this is the only command for now
67 if command
!= "abort-task" { bail!("got unknown command '{}'
", command); }
69 let upid_str = param["upid
"].as_str()
70 .ok_or_else(|| format_err!("unable to parse
parameters (missing upid
)"))?;
72 let upid = upid_str.parse::<UPID>()?;
74 if !((upid.pid == *MY_PID) && (upid.pstart == *MY_PID_PSTART)) {
75 bail!("upid does not belong to this process
");
78 let hash = WORKER_TASK_LIST.lock().unwrap();
79 if let Some(ref worker) = hash.get(&upid.task_id) {
80 worker.request_abort();
82 // assume task is already stopped
87 tokio::spawn(control_future);
92 pub fn abort_worker_async(upid: UPID) {
93 tokio::spawn(async move {
94 if let Err(err) = abort_worker(upid).await {
95 eprintln!("abort worker failed
- {}
", err);
100 pub fn abort_worker(upid: UPID) -> impl Future<Output = Result<(), Error>> {
102 let target_pid = upid.pid;
104 let socketname = format!(
105 "\0{}
/proxmox
-task
-control
-{}
.sock
", PROXMOX_BACKUP_VAR_RUN_DIR, target_pid);
108 "command
": "abort
-task
",
109 "upid
": upid.to_string(),
112 super::send_command(socketname, cmd).map_ok(|_| ())
115 fn parse_worker_status_line(line: &str) -> Result<(String, UPID, Option<(i64, String)>), Error> {
117 let data = line.splitn(3, ' ').collect::<Vec<&str>>();
119 let len = data.len();
122 1 => Ok((data[0].to_owned(), data[0].parse::<UPID>()?, None)),
124 let endtime = i64::from_str_radix(data[1], 16)?;
125 Ok((data[0].to_owned(), data[0].parse::<UPID>()?, Some((endtime, data[2].to_owned()))))
127 _ => bail!("wrong number of components
"),
131 /// Create task log directory with correct permissions
132 pub fn create_task_log_dirs() -> Result<(), Error> {
135 let (backup_uid, backup_gid) = crate::tools::getpwnam_ugid("backup
")?;
136 let opts = CreateOptions::new()
137 .owner(nix::unistd::Uid::from_raw(backup_uid))
138 .group(nix::unistd::Gid::from_raw(backup_gid));
140 create_path(PROXMOX_BACKUP_LOG_DIR, None, Some(opts.clone()))?;
141 create_path(PROXMOX_BACKUP_TASK_DIR, None, Some(opts.clone()))?;
142 create_path(PROXMOX_BACKUP_VAR_RUN_DIR, None, Some(opts))?;
144 }).map_err(|err: Error| format_err!("unable to create task log dir
- {}
", err))?;
149 /// Read exits status from task log file
150 pub fn upid_read_status(upid: &UPID) -> Result<String, Error> {
151 let mut status = String::from("unknown
");
153 let path = upid.log_path();
155 let mut file = File::open(path)?;
157 /// speedup - only read tail
159 use std::io::SeekFrom;
160 let _ = file.seek(SeekFrom::End(-8192)); // ignore errors
162 let reader = BufReader::new(file);
164 for line in reader.lines() {
167 let mut iter = line.splitn(2, ": TASK
");
168 if iter.next() == None { continue; }
173 status = String::from(rest);
174 } else if rest.starts_with("ERROR
: ") {
175 status = String::from(&rest[7..]);
184 /// Task details including parsed UPID
186 /// If there is no `state`, the task is still running.
188 pub struct TaskListInfo {
191 /// UPID string representation
192 pub upid_str: String,
193 /// Task `(endtime, status)` if already finished
195 /// The `status` ise iether `unknown`, `OK`, or `ERROR: ...`
196 pub state: Option<(i64, String)>, // endtime, status
199 // atomically read/update the task list, update status of finished tasks
200 // new_upid is added to the list when specified.
201 // Returns a sorted list of known tasks,
202 fn update_active_workers(new_upid: Option<&UPID>) -> Result<Vec<TaskListInfo>, Error> {
204 let (backup_uid, backup_gid) = crate::tools::getpwnam_ugid("backup
")?;
205 let uid = Some(nix::unistd::Uid::from_raw(backup_uid));
206 let gid = Some(nix::unistd::Gid::from_raw(backup_gid));
208 let lock = crate::tools::open_file_locked(PROXMOX_BACKUP_TASK_LOCK_FN, std::time::Duration::new(10, 0))?;
209 nix::unistd::chown(PROXMOX_BACKUP_TASK_LOCK_FN, uid, gid)?;
211 let reader = match File::open(PROXMOX_BACKUP_ACTIVE_TASK_FN) {
212 Ok(f) => Some(BufReader::new(f)),
214 if err.kind() == std::io::ErrorKind::NotFound {
217 bail!("unable to open active worker {:?}
- {}
", PROXMOX_BACKUP_ACTIVE_TASK_FN, err);
222 let mut active_list = vec![];
223 let mut finish_list = vec![];
225 if let Some(lines) = reader.map(|r| r.lines()) {
229 match parse_worker_status_line(&line) {
230 Err(err) => bail!("unable to parse active worker status '{}'
- {}
", line, err),
231 Ok((upid_str, upid, state)) => {
233 let running = worker_is_active(&upid);
236 active_list.push(TaskListInfo { upid, upid_str, state: None });
240 println!("Detected stoped UPID {}
", upid_str);
241 let status = upid_read_status(&upid)
242 .unwrap_or_else(|_| String::from("unknown
"));
243 finish_list.push(TaskListInfo {
244 upid, upid_str, state: Some((Local::now().timestamp(), status))
247 Some((endtime, status)) => {
248 finish_list.push(TaskListInfo {
249 upid, upid_str, state: Some((endtime, status))
259 if let Some(upid) = new_upid {
260 active_list.push(TaskListInfo { upid: upid.clone(), upid_str: upid.to_string(), state: None });
263 // assemble list without duplicates
264 // we include all active tasks,
265 // and fill up to 1000 entries with finished tasks
269 let mut task_hash = HashMap::new();
271 for info in active_list {
272 task_hash.insert(info.upid_str.clone(), info);
275 for info in finish_list {
276 if task_hash.len() > max { break; }
277 if !task_hash.contains_key(&info.upid_str) {
278 task_hash.insert(info.upid_str.clone(), info);
282 let mut task_list: Vec<TaskListInfo> = vec![];
283 for (_, info) in task_hash { task_list.push(info); }
285 task_list.sort_unstable_by(|b, a| { // lastest on top
286 match (&a.state, &b.state) {
287 (Some(s1), Some(s2)) => s1.0.cmp(&s2.0),
288 (Some(_), None) => std::cmp::Ordering::Less,
289 (None, Some(_)) => std::cmp::Ordering::Greater,
290 _ => a.upid.starttime.cmp(&b.upid.starttime),
294 let mut raw = String::new();
295 for info in &task_list {
296 if let Some((endtime, status)) = &info.state {
297 raw.push_str(&format!("{} {:08X} {}
\n", info.upid_str, endtime, status));
299 raw.push_str(&info.upid_str);
304 file_set_contents_full(PROXMOX_BACKUP_ACTIVE_TASK_FN, raw.as_bytes(), None, uid, gid)?;
311 /// Returns a sorted list of known tasks
313 /// The list is sorted by `(starttime, endtime)` in ascending order
314 pub fn read_task_list() -> Result<Vec<TaskListInfo>, Error> {
315 update_active_workers(None)
318 /// Launch long running worker tasks.
320 /// A worker task can either be a whole thread, or a simply tokio
321 /// task/future. Each task can `log()` messages, which are stored
322 /// persistently to files. Task should poll the `abort_requested`
323 /// flag, and stop execution when requested.
325 pub struct WorkerTask {
327 data: Mutex<WorkerTaskData>,
328 abort_requested: AtomicBool,
331 impl std::fmt::Display for WorkerTask {
333 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
339 struct WorkerTaskData {
341 progress: f64, // 0..1
342 pub abort_listeners: Vec<oneshot::Sender<()>>,
345 impl Drop for WorkerTask {
348 println!("unregister worker
");
354 pub fn new(worker_type: &str, worker_id: Option<String>, username: &str, to_stdout: bool) -> Result<Arc<Self>, Error> {
355 println!("register worker
");
357 let upid = UPID::new(worker_type, worker_id, username)?;
358 let task_id = upid.task_id;
360 let mut path = std::path::PathBuf::from(PROXMOX_BACKUP_TASK_DIR);
362 path.push(format!("{:02X}
", upid.pstart % 256));
364 let (backup_uid, backup_gid) = crate::tools::getpwnam_ugid("backup
")?;
365 let uid = nix::unistd::Uid::from_raw(backup_uid);
366 let gid = nix::unistd::Gid::from_raw(backup_gid);
368 create_path(&path, None, Some(CreateOptions::new().owner(uid).group(gid)))?;
370 path.push(upid.to_string());
372 println!("FILE
: {:?}
", path);
374 let logger = FileLogger::new(&path, to_stdout)?;
375 nix::unistd::chown(&path, Some(uid), Some(gid))?;
377 update_active_workers(Some(&upid))?;
379 let worker = Arc::new(Self {
381 abort_requested: AtomicBool::new(false),
382 data: Mutex::new(WorkerTaskData {
385 abort_listeners: vec![],
389 let mut hash = WORKER_TASK_LIST.lock().unwrap();
391 hash.insert(task_id, worker.clone());
392 super::set_worker_count(hash.len());
397 /// Spawn a new tokio task/future.
400 worker_id: Option<String>,
404 ) -> Result<String, Error>
405 where F: Send + 'static + FnOnce(Arc<WorkerTask>) -> T,
406 T: Send + 'static + Future<Output = Result<(), Error>>,
408 let worker = WorkerTask::new(worker_type, worker_id, username, to_stdout)?;
409 let upid_str = worker.upid.to_string();
410 let f = f(worker.clone());
411 tokio::spawn(async move {
412 let result = f.await;
413 worker.log_result(&result);
419 /// Create a new worker thread.
420 pub fn new_thread<F>(
422 worker_id: Option<String>,
426 ) -> Result<String, Error>
427 where F: Send + UnwindSafe + 'static + FnOnce(Arc<WorkerTask>) -> Result<(), Error>
429 println!("register worker thread
");
431 let (p, c) = oneshot::channel::<()>();
433 let worker = WorkerTask::new(worker_type, worker_id, username, to_stdout)?;
434 let upid_str = worker.upid.to_string();
436 let _child = std::thread::spawn(move || {
437 let worker1 = worker.clone();
438 let result = match std::panic::catch_unwind(move || f(worker1)) {
441 match panic.downcast::<&str>() {
443 Err(format_err!("worker panicked
: {}
", panic_msg))
446 Err(format_err!("worker panicked
: unknown
type."))
452 worker.log_result(&result);
456 tokio::spawn(c.map(|_| ()));
461 /// Log task result, remove task from running list
462 pub fn log_result(&self, result: &Result<(), Error>) {
464 if let Err(err) = result {
465 self.log(&format!("TASK ERROR
: {}
", err));
470 WORKER_TASK_LIST.lock().unwrap().remove(&self.upid.task_id);
471 let _ = update_active_workers(None);
472 super::set_worker_count(WORKER_TASK_LIST.lock().unwrap().len());
476 pub fn log<S: AsRef<str>>(&self, msg: S) {
477 let mut data = self.data.lock().unwrap();
478 data.logger.log(msg);
481 /// Set progress indicator
482 pub fn progress(&self, progress: f64) {
483 if progress >= 0.0 && progress <= 1.0 {
484 let mut data = self.data.lock().unwrap();
485 data.progress = progress;
487 // fixme: log!("task '{}'
: ignoring strange value
for progress '{}'
", self.upid, progress);
492 pub fn request_abort(&self) {
493 eprintln!("set abort flag
for worker {}
", self.upid);
494 self.abort_requested.store(true, Ordering::SeqCst);
496 let mut data = self.data.lock().unwrap();
498 match data.abort_listeners.pop() {
501 let _ = ch.send(()); // ignore erros here
507 /// Test if abort was requested.
508 pub fn abort_requested(&self) -> bool {
509 self.abort_requested.load(Ordering::SeqCst)
512 /// Fail if abort was requested.
513 pub fn fail_on_abort(&self) -> Result<(), Error> {
514 if self.abort_requested() {
515 bail!("task '{}'
: abort requested
- aborting task
", self.upid);
520 /// Get a future which resolves on task abort
521 pub fn abort_future(&self) -> oneshot::Receiver<()> {
522 let (tx, rx) = oneshot::channel::<()>();
524 let mut data = self.data.lock().unwrap();
525 if self.abort_requested() {
528 data.abort_listeners.push(tx);