]> git.proxmox.com Git - proxmox-backup.git/blob - src/server/worker_task.rs
garbage_collect: call fail_on_abort to abort GV when requested.
[proxmox-backup.git] / src / server / worker_task.rs
1 use std::collections::HashMap;
2 use std::fs::File;
3 use std::io::{BufRead, BufReader};
4 use std::panic::UnwindSafe;
5 use std::sync::atomic::{AtomicBool, Ordering};
6 use std::sync::{Arc, Mutex};
7
8 use chrono::Local;
9 use anyhow::{bail, format_err, Error};
10 use futures::*;
11 use lazy_static::lazy_static;
12 use nix::unistd::Pid;
13 use serde_json::{json, Value};
14 use tokio::sync::oneshot;
15
16 use proxmox::sys::linux::procfs;
17 use proxmox::try_block;
18 use proxmox::tools::fs::{create_path, replace_file, CreateOptions};
19
20 use super::UPID;
21
22 use crate::tools::FileLogger;
23
24 macro_rules! PROXMOX_BACKUP_VAR_RUN_DIR_M { () => ("/run/proxmox-backup") }
25 macro_rules! PROXMOX_BACKUP_LOG_DIR_M { () => ("/var/log/proxmox-backup") }
26 macro_rules! PROXMOX_BACKUP_TASK_DIR_M { () => (concat!( PROXMOX_BACKUP_LOG_DIR_M!(), "/tasks")) }
27
28 pub const PROXMOX_BACKUP_VAR_RUN_DIR: &str = PROXMOX_BACKUP_VAR_RUN_DIR_M!();
29 pub const PROXMOX_BACKUP_LOG_DIR: &str = PROXMOX_BACKUP_LOG_DIR_M!();
30 pub const PROXMOX_BACKUP_TASK_DIR: &str = PROXMOX_BACKUP_TASK_DIR_M!();
31 pub const PROXMOX_BACKUP_TASK_LOCK_FN: &str = concat!(PROXMOX_BACKUP_TASK_DIR_M!(), "/.active.lock");
32 pub const PROXMOX_BACKUP_ACTIVE_TASK_FN: &str = concat!(PROXMOX_BACKUP_TASK_DIR_M!(), "/active");
33
34 lazy_static! {
35 static ref WORKER_TASK_LIST: Mutex<HashMap<usize, Arc<WorkerTask>>> = Mutex::new(HashMap::new());
36
37 static ref MY_PID: i32 = unsafe { libc::getpid() };
38 static ref MY_PID_PSTART: u64 = procfs::PidStat::read_from_pid(Pid::from_raw(*MY_PID))
39 .unwrap()
40 .starttime;
41 }
42
43 /// Test if the task is still running
44 pub fn worker_is_active(upid: &UPID) -> bool {
45
46 if (upid.pid == *MY_PID) && (upid.pstart == *MY_PID_PSTART) {
47 WORKER_TASK_LIST.lock().unwrap().contains_key(&upid.task_id)
48 } else {
49 procfs::check_process_running_pstart(upid.pid, upid.pstart).is_some()
50 }
51 }
52
53 pub fn create_task_control_socket() -> Result<(), Error> {
54
55 let socketname = format!(
56 "\0{}/proxmox-task-control-{}.sock", PROXMOX_BACKUP_VAR_RUN_DIR, *MY_PID);
57
58 let control_future = super::create_control_socket(socketname, |param| {
59 let param = param.as_object()
60 .ok_or_else(|| format_err!("unable to parse parameters (expected json object)"))?;
61 if param.keys().count() != 2 { bail!("wrong number of parameters"); }
62
63 let command = param.get("command")
64 .ok_or_else(|| format_err!("unable to parse parameters (missing command)"))?;
65
66 // this is the only command for now
67 if command != "abort-task" { bail!("got unknown command '{}'", command); }
68
69 let upid_str = param["upid"].as_str()
70 .ok_or_else(|| format_err!("unable to parse parameters (missing upid)"))?;
71
72 let upid = upid_str.parse::<UPID>()?;
73
74 if !((upid.pid == *MY_PID) && (upid.pstart == *MY_PID_PSTART)) {
75 bail!("upid does not belong to this process");
76 }
77
78 let hash = WORKER_TASK_LIST.lock().unwrap();
79 if let Some(ref worker) = hash.get(&upid.task_id) {
80 worker.request_abort();
81 } else {
82 // assume task is already stopped
83 }
84 Ok(Value::Null)
85 })?;
86
87 tokio::spawn(control_future);
88
89 Ok(())
90 }
91
92 pub fn abort_worker_async(upid: UPID) {
93 tokio::spawn(async move {
94 if let Err(err) = abort_worker(upid).await {
95 eprintln!("abort worker failed - {}", err);
96 }
97 });
98 }
99
100 pub fn abort_worker(upid: UPID) -> impl Future<Output = Result<(), Error>> {
101
102 let target_pid = upid.pid;
103
104 let socketname = format!(
105 "\0{}/proxmox-task-control-{}.sock", PROXMOX_BACKUP_VAR_RUN_DIR, target_pid);
106
107 let cmd = json!({
108 "command": "abort-task",
109 "upid": upid.to_string(),
110 });
111
112 super::send_command(socketname, cmd).map_ok(|_| ())
113 }
114
115 fn parse_worker_status_line(line: &str) -> Result<(String, UPID, Option<(i64, String)>), Error> {
116
117 let data = line.splitn(3, ' ').collect::<Vec<&str>>();
118
119 let len = data.len();
120
121 match len {
122 1 => Ok((data[0].to_owned(), data[0].parse::<UPID>()?, None)),
123 3 => {
124 let endtime = i64::from_str_radix(data[1], 16)?;
125 Ok((data[0].to_owned(), data[0].parse::<UPID>()?, Some((endtime, data[2].to_owned()))))
126 }
127 _ => bail!("wrong number of components"),
128 }
129 }
130
131 /// Create task log directory with correct permissions
132 pub fn create_task_log_dirs() -> Result<(), Error> {
133
134 try_block!({
135 let backup_user = crate::backup::backup_user()?;
136 let opts = CreateOptions::new()
137 .owner(backup_user.uid)
138 .group(backup_user.gid);
139
140 create_path(PROXMOX_BACKUP_LOG_DIR, None, Some(opts.clone()))?;
141 create_path(PROXMOX_BACKUP_TASK_DIR, None, Some(opts.clone()))?;
142 create_path(PROXMOX_BACKUP_VAR_RUN_DIR, None, Some(opts))?;
143 Ok(())
144 }).map_err(|err: Error| format_err!("unable to create task log dir - {}", err))?;
145
146 Ok(())
147 }
148
149 /// Read exits status from task log file
150 pub fn upid_read_status(upid: &UPID) -> Result<String, Error> {
151 let mut status = String::from("unknown");
152
153 let path = upid.log_path();
154
155 let mut file = File::open(path)?;
156
157 /// speedup - only read tail
158 use std::io::Seek;
159 use std::io::SeekFrom;
160 let _ = file.seek(SeekFrom::End(-8192)); // ignore errors
161
162 let reader = BufReader::new(file);
163
164 for line in reader.lines() {
165 let line = line?;
166
167 let mut iter = line.splitn(2, ": TASK ");
168 if iter.next() == None { continue; }
169 match iter.next() {
170 None => continue,
171 Some(rest) => {
172 if rest == "OK" {
173 status = String::from(rest);
174 } else if rest.starts_with("ERROR: ") {
175 status = String::from(&rest[7..]);
176 }
177 }
178 }
179 }
180
181 Ok(status)
182 }
183
184 /// Task details including parsed UPID
185 ///
186 /// If there is no `state`, the task is still running.
187 #[derive(Debug)]
188 pub struct TaskListInfo {
189 /// The parsed UPID
190 pub upid: UPID,
191 /// UPID string representation
192 pub upid_str: String,
193 /// Task `(endtime, status)` if already finished
194 ///
195 /// The `status` ise iether `unknown`, `OK`, or `ERROR: ...`
196 pub state: Option<(i64, String)>, // endtime, status
197 }
198
199 // atomically read/update the task list, update status of finished tasks
200 // new_upid is added to the list when specified.
201 // Returns a sorted list of known tasks,
202 fn update_active_workers(new_upid: Option<&UPID>) -> Result<Vec<TaskListInfo>, Error> {
203
204 let backup_user = crate::backup::backup_user()?;
205
206 let lock = crate::tools::open_file_locked(PROXMOX_BACKUP_TASK_LOCK_FN, std::time::Duration::new(10, 0))?;
207 nix::unistd::chown(PROXMOX_BACKUP_TASK_LOCK_FN, Some(backup_user.uid), Some(backup_user.gid))?;
208
209 let reader = match File::open(PROXMOX_BACKUP_ACTIVE_TASK_FN) {
210 Ok(f) => Some(BufReader::new(f)),
211 Err(err) => {
212 if err.kind() == std::io::ErrorKind::NotFound {
213 None
214 } else {
215 bail!("unable to open active worker {:?} - {}", PROXMOX_BACKUP_ACTIVE_TASK_FN, err);
216 }
217 }
218 };
219
220 let mut active_list = vec![];
221 let mut finish_list = vec![];
222
223 if let Some(lines) = reader.map(|r| r.lines()) {
224
225 for line in lines {
226 let line = line?;
227 match parse_worker_status_line(&line) {
228 Err(err) => bail!("unable to parse active worker status '{}' - {}", line, err),
229 Ok((upid_str, upid, state)) => {
230
231 let running = worker_is_active(&upid);
232
233 if running {
234 active_list.push(TaskListInfo { upid, upid_str, state: None });
235 } else {
236 match state {
237 None => {
238 println!("Detected stoped UPID {}", upid_str);
239 let status = upid_read_status(&upid)
240 .unwrap_or_else(|_| String::from("unknown"));
241 finish_list.push(TaskListInfo {
242 upid, upid_str, state: Some((Local::now().timestamp(), status))
243 });
244 }
245 Some((endtime, status)) => {
246 finish_list.push(TaskListInfo {
247 upid, upid_str, state: Some((endtime, status))
248 })
249 }
250 }
251 }
252 }
253 }
254 }
255 }
256
257 if let Some(upid) = new_upid {
258 active_list.push(TaskListInfo { upid: upid.clone(), upid_str: upid.to_string(), state: None });
259 }
260
261 // assemble list without duplicates
262 // we include all active tasks,
263 // and fill up to 1000 entries with finished tasks
264
265 let max = 1000;
266
267 let mut task_hash = HashMap::new();
268
269 for info in active_list {
270 task_hash.insert(info.upid_str.clone(), info);
271 }
272
273 for info in finish_list {
274 if task_hash.len() > max { break; }
275 if !task_hash.contains_key(&info.upid_str) {
276 task_hash.insert(info.upid_str.clone(), info);
277 }
278 }
279
280 let mut task_list: Vec<TaskListInfo> = vec![];
281 for (_, info) in task_hash { task_list.push(info); }
282
283 task_list.sort_unstable_by(|b, a| { // lastest on top
284 match (&a.state, &b.state) {
285 (Some(s1), Some(s2)) => s1.0.cmp(&s2.0),
286 (Some(_), None) => std::cmp::Ordering::Less,
287 (None, Some(_)) => std::cmp::Ordering::Greater,
288 _ => a.upid.starttime.cmp(&b.upid.starttime),
289 }
290 });
291
292 let mut raw = String::new();
293 for info in &task_list {
294 if let Some((endtime, status)) = &info.state {
295 raw.push_str(&format!("{} {:08X} {}\n", info.upid_str, endtime, status));
296 } else {
297 raw.push_str(&info.upid_str);
298 raw.push('\n');
299 }
300 }
301
302 replace_file(
303 PROXMOX_BACKUP_ACTIVE_TASK_FN,
304 raw.as_bytes(),
305 CreateOptions::new()
306 .owner(backup_user.uid)
307 .group(backup_user.gid),
308 )?;
309
310 drop(lock);
311
312 Ok(task_list)
313 }
314
315 /// Returns a sorted list of known tasks
316 ///
317 /// The list is sorted by `(starttime, endtime)` in ascending order
318 pub fn read_task_list() -> Result<Vec<TaskListInfo>, Error> {
319 update_active_workers(None)
320 }
321
322 /// Launch long running worker tasks.
323 ///
324 /// A worker task can either be a whole thread, or a simply tokio
325 /// task/future. Each task can `log()` messages, which are stored
326 /// persistently to files. Task should poll the `abort_requested`
327 /// flag, and stop execution when requested.
328 #[derive(Debug)]
329 pub struct WorkerTask {
330 upid: UPID,
331 data: Mutex<WorkerTaskData>,
332 abort_requested: AtomicBool,
333 }
334
335 impl std::fmt::Display for WorkerTask {
336
337 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
338 self.upid.fmt(f)
339 }
340 }
341
342 #[derive(Debug)]
343 struct WorkerTaskData {
344 logger: FileLogger,
345 progress: f64, // 0..1
346 pub abort_listeners: Vec<oneshot::Sender<()>>,
347 }
348
349 impl Drop for WorkerTask {
350
351 fn drop(&mut self) {
352 println!("unregister worker");
353 }
354 }
355
356 impl WorkerTask {
357
358 pub fn new(worker_type: &str, worker_id: Option<String>, username: &str, to_stdout: bool) -> Result<Arc<Self>, Error> {
359 println!("register worker");
360
361 let upid = UPID::new(worker_type, worker_id, username)?;
362 let task_id = upid.task_id;
363
364 let mut path = std::path::PathBuf::from(PROXMOX_BACKUP_TASK_DIR);
365
366 path.push(format!("{:02X}", upid.pstart % 256));
367
368 let backup_user = crate::backup::backup_user()?;
369
370 create_path(&path, None, Some(CreateOptions::new().owner(backup_user.uid).group(backup_user.gid)))?;
371
372 path.push(upid.to_string());
373
374 println!("FILE: {:?}", path);
375
376 let logger = FileLogger::new(&path, to_stdout)?;
377 nix::unistd::chown(&path, Some(backup_user.uid), Some(backup_user.gid))?;
378
379 update_active_workers(Some(&upid))?;
380
381 let worker = Arc::new(Self {
382 upid,
383 abort_requested: AtomicBool::new(false),
384 data: Mutex::new(WorkerTaskData {
385 logger,
386 progress: 0.0,
387 abort_listeners: vec![],
388 }),
389 });
390
391 let mut hash = WORKER_TASK_LIST.lock().unwrap();
392
393 hash.insert(task_id, worker.clone());
394 super::set_worker_count(hash.len());
395
396 Ok(worker)
397 }
398
399 /// Spawn a new tokio task/future.
400 pub fn spawn<F, T>(
401 worker_type: &str,
402 worker_id: Option<String>,
403 username: &str,
404 to_stdout: bool,
405 f: F,
406 ) -> Result<String, Error>
407 where F: Send + 'static + FnOnce(Arc<WorkerTask>) -> T,
408 T: Send + 'static + Future<Output = Result<(), Error>>,
409 {
410 let worker = WorkerTask::new(worker_type, worker_id, username, to_stdout)?;
411 let upid_str = worker.upid.to_string();
412 let f = f(worker.clone());
413 tokio::spawn(async move {
414 let result = f.await;
415 worker.log_result(&result);
416 });
417
418 Ok(upid_str)
419 }
420
421 /// Create a new worker thread.
422 pub fn new_thread<F>(
423 worker_type: &str,
424 worker_id: Option<String>,
425 username: &str,
426 to_stdout: bool,
427 f: F,
428 ) -> Result<String, Error>
429 where F: Send + UnwindSafe + 'static + FnOnce(Arc<WorkerTask>) -> Result<(), Error>
430 {
431 println!("register worker thread");
432
433 let (p, c) = oneshot::channel::<()>();
434
435 let worker = WorkerTask::new(worker_type, worker_id, username, to_stdout)?;
436 let upid_str = worker.upid.to_string();
437
438 let _child = std::thread::Builder::new().name(upid_str.clone()).spawn(move || {
439 let worker1 = worker.clone();
440 let result = match std::panic::catch_unwind(move || f(worker1)) {
441 Ok(r) => r,
442 Err(panic) => {
443 match panic.downcast::<&str>() {
444 Ok(panic_msg) => {
445 Err(format_err!("worker panicked: {}", panic_msg))
446 }
447 Err(_) => {
448 Err(format_err!("worker panicked: unknown type."))
449 }
450 }
451 }
452 };
453
454 worker.log_result(&result);
455 p.send(()).unwrap();
456 });
457
458 tokio::spawn(c.map(|_| ()));
459
460 Ok(upid_str)
461 }
462
463 /// Log task result, remove task from running list
464 pub fn log_result(&self, result: &Result<(), Error>) {
465
466 if let Err(err) = result {
467 self.log(&format!("TASK ERROR: {}", err));
468 } else {
469 self.log("TASK OK");
470 }
471
472 WORKER_TASK_LIST.lock().unwrap().remove(&self.upid.task_id);
473 let _ = update_active_workers(None);
474 super::set_worker_count(WORKER_TASK_LIST.lock().unwrap().len());
475 }
476
477 /// Log a message.
478 pub fn log<S: AsRef<str>>(&self, msg: S) {
479 let mut data = self.data.lock().unwrap();
480 data.logger.log(msg);
481 }
482
483 /// Set progress indicator
484 pub fn progress(&self, progress: f64) {
485 if progress >= 0.0 && progress <= 1.0 {
486 let mut data = self.data.lock().unwrap();
487 data.progress = progress;
488 } else {
489 // fixme: log!("task '{}': ignoring strange value for progress '{}'", self.upid, progress);
490 }
491 }
492
493 /// Request abort
494 pub fn request_abort(&self) {
495 eprintln!("set abort flag for worker {}", self.upid);
496 self.abort_requested.store(true, Ordering::SeqCst);
497 // noitify listeners
498 let mut data = self.data.lock().unwrap();
499 loop {
500 match data.abort_listeners.pop() {
501 None => { break; },
502 Some(ch) => {
503 let _ = ch.send(()); // ignore erros here
504 },
505 }
506 }
507 }
508
509 /// Test if abort was requested.
510 pub fn abort_requested(&self) -> bool {
511 self.abort_requested.load(Ordering::SeqCst)
512 }
513
514 /// Fail if abort was requested.
515 pub fn fail_on_abort(&self) -> Result<(), Error> {
516 if self.abort_requested() {
517 bail!("abort requested - aborting task");
518 }
519 Ok(())
520 }
521
522 /// Get a future which resolves on task abort
523 pub fn abort_future(&self) -> oneshot::Receiver<()> {
524 let (tx, rx) = oneshot::channel::<()>();
525
526 let mut data = self.data.lock().unwrap();
527 if self.abort_requested() {
528 let _ = tx.send(());
529 } else {
530 data.abort_listeners.push(tx);
531 }
532 rx
533 }
534 }