]> git.proxmox.com Git - proxmox-backup.git/blob - src/server/worker_task.rs
191d8a448a484b54380849e774f384bcdced50c8
[proxmox-backup.git] / src / server / worker_task.rs
1 use std::collections::{HashMap, VecDeque};
2 use std::fs::File;
3 use std::path::PathBuf;
4 use std::io::{Read, Write, BufRead, BufReader};
5 use std::panic::UnwindSafe;
6 use std::sync::atomic::{AtomicBool, Ordering};
7 use std::sync::{Arc, Mutex};
8
9 use anyhow::{bail, format_err, Error};
10 use futures::*;
11 use lazy_static::lazy_static;
12 use serde_json::{json, Value};
13 use serde::{Serialize, Deserialize};
14 use tokio::sync::oneshot;
15 use nix::fcntl::OFlag;
16 use once_cell::sync::OnceCell;
17
18 use proxmox::sys::linux::procfs;
19 use proxmox::try_block;
20 use proxmox::tools::fs::{create_path, replace_file, atomic_open_or_create_file, CreateOptions};
21
22 use pbs_tools::logrotate::{LogRotate, LogRotateFiles};
23 use pbs_api_types::UPID;
24 use proxmox_rest_server::{CommandoSocket, FileLogger, FileLogOptions};
25
26 struct TaskListLockGuard(File);
27
28 struct WorkerTaskSetup {
29 file_opts: CreateOptions,
30 taskdir: PathBuf,
31 task_lock_fn: PathBuf,
32 active_tasks_fn: PathBuf,
33 task_index_fn: PathBuf,
34 task_archive_fn: PathBuf,
35 }
36
37 static WORKER_TASK_SETUP: OnceCell<WorkerTaskSetup> = OnceCell::new();
38
39 fn worker_task_setup() -> Result<&'static WorkerTaskSetup, Error> {
40 WORKER_TASK_SETUP.get()
41 .ok_or_else(|| format_err!("WorkerTask library is not initialized"))
42 }
43
44 impl WorkerTaskSetup {
45
46 fn new(basedir: PathBuf, file_opts: CreateOptions) -> Self {
47
48 let mut taskdir = basedir.clone();
49 taskdir.push("tasks");
50
51 let mut task_lock_fn = taskdir.clone();
52 task_lock_fn.push(".active.lock");
53
54 let mut active_tasks_fn = taskdir.clone();
55 active_tasks_fn.push("active");
56
57 let mut task_index_fn = taskdir.clone();
58 task_index_fn.push("index");
59
60 let mut task_archive_fn = taskdir.clone();
61 task_archive_fn.push("archive");
62
63 Self {
64 file_opts,
65 taskdir,
66 task_lock_fn,
67 active_tasks_fn,
68 task_index_fn,
69 task_archive_fn,
70 }
71 }
72
73 fn lock_task_list_files(&self, exclusive: bool) -> Result<TaskListLockGuard, Error> {
74 let options = self.file_opts.clone()
75 .perm(nix::sys::stat::Mode::from_bits_truncate(0o660));
76
77 let timeout = std::time::Duration::new(10, 0);
78
79 let file = proxmox::tools::fs::open_file_locked(
80 &self.task_lock_fn,
81 timeout,
82 exclusive,
83 options,
84 )?;
85
86 Ok(TaskListLockGuard(file))
87 }
88
89 fn log_path(&self, upid: &UPID) -> std::path::PathBuf {
90 let mut path = self.taskdir.clone();
91 path.push(format!("{:02X}", upid.pstart % 256));
92 path.push(upid.to_string());
93 path
94 }
95
96 // atomically read/update the task list, update status of finished tasks
97 // new_upid is added to the list when specified.
98 fn update_active_workers(&self, new_upid: Option<&UPID>) -> Result<(), Error> {
99
100 let lock = self.lock_task_list_files(true)?;
101
102 // TODO remove with 1.x
103 let mut finish_list: Vec<TaskListInfo> = read_task_file_from_path(&self.task_index_fn)?;
104 let had_index_file = !finish_list.is_empty();
105
106 // We use filter_map because one negative case wants to *move* the data into `finish_list`,
107 // clippy doesn't quite catch this!
108 #[allow(clippy::unnecessary_filter_map)]
109 let mut active_list: Vec<TaskListInfo> = read_task_file_from_path(&self.active_tasks_fn)?
110 .into_iter()
111 .filter_map(|info| {
112 if info.state.is_some() {
113 // this can happen when the active file still includes finished tasks
114 finish_list.push(info);
115 return None;
116 }
117
118 if !worker_is_active_local(&info.upid) {
119 // println!("Detected stopped task '{}'", &info.upid_str);
120 let now = proxmox::tools::time::epoch_i64();
121 let status = upid_read_status(&info.upid).unwrap_or(TaskState::Unknown { endtime: now });
122 finish_list.push(TaskListInfo {
123 upid: info.upid,
124 upid_str: info.upid_str,
125 state: Some(status)
126 });
127 return None;
128 }
129
130 Some(info)
131 }).collect();
132
133 if let Some(upid) = new_upid {
134 active_list.push(TaskListInfo { upid: upid.clone(), upid_str: upid.to_string(), state: None });
135 }
136
137 let active_raw = render_task_list(&active_list);
138
139 let options = self.file_opts.clone()
140 .perm(nix::sys::stat::Mode::from_bits_truncate(0o660));
141
142 replace_file(
143 &self.active_tasks_fn,
144 active_raw.as_bytes(),
145 options,
146 )?;
147
148 finish_list.sort_unstable_by(|a, b| {
149 match (&a.state, &b.state) {
150 (Some(s1), Some(s2)) => s1.cmp(&s2),
151 (Some(_), None) => std::cmp::Ordering::Less,
152 (None, Some(_)) => std::cmp::Ordering::Greater,
153 _ => a.upid.starttime.cmp(&b.upid.starttime),
154 }
155 });
156
157 if !finish_list.is_empty() {
158 let options = self.file_opts.clone()
159 .perm(nix::sys::stat::Mode::from_bits_truncate(0o660));
160
161 let mut writer = atomic_open_or_create_file(
162 &self.task_archive_fn,
163 OFlag::O_APPEND | OFlag::O_RDWR,
164 &[],
165 options,
166 )?;
167 for info in &finish_list {
168 writer.write_all(render_task_line(&info).as_bytes())?;
169 }
170 }
171
172 // TODO Remove with 1.x
173 // for compatibility, if we had an INDEX file, we do not need it anymore
174 if had_index_file {
175 let _ = nix::unistd::unlink(&self.task_index_fn);
176 }
177
178 drop(lock);
179
180 Ok(())
181 }
182
183 // Create task log directory with correct permissions
184 fn create_task_log_dirs(&self) -> Result<(), Error> {
185
186 try_block!({
187 let dir_opts = self.file_opts.clone()
188 .perm(nix::sys::stat::Mode::from_bits_truncate(0o755));
189
190 create_path(&self.taskdir, Some(dir_opts.clone()), Some(dir_opts.clone()))?;
191 // fixme:??? create_path(pbs_buildcfg::PROXMOX_BACKUP_RUN_DIR, None, Some(opts))?;
192 Ok(())
193 }).map_err(|err: Error| format_err!("unable to create task log dir - {}", err))
194 }
195 }
196
197 /// Initialize the WorkerTask library
198 pub fn init_worker_tasks(basedir: PathBuf, file_opts: CreateOptions) -> Result<(), Error> {
199 let setup = WorkerTaskSetup::new(basedir, file_opts);
200 setup.create_task_log_dirs()?;
201 WORKER_TASK_SETUP.set(setup)
202 .map_err(|_| format_err!("init_worker_tasks failed - already initialized"))
203 }
204
205 /// checks if the Task Archive is bigger that 'size_threshold' bytes, and
206 /// rotates it if it is
207 pub fn rotate_task_log_archive(size_threshold: u64, compress: bool, max_files: Option<usize>) -> Result<bool, Error> {
208
209 let setup = worker_task_setup()?;
210
211 let _lock = setup.lock_task_list_files(true)?;
212
213 let mut logrotate = LogRotate::new(&setup.task_archive_fn, compress)
214 .ok_or_else(|| format_err!("could not get archive file names"))?;
215
216 logrotate.rotate(size_threshold, None, max_files)
217 }
218
219
220 /// Path to the worker log file
221 pub fn upid_log_path(upid: &UPID) -> Result<std::path::PathBuf, Error> {
222 let setup = worker_task_setup()?;
223 Ok(setup.log_path(upid))
224 }
225
226 /// Read endtime (time of last log line) and exitstatus from task log file
227 /// If there is not a single line with at valid datetime, we assume the
228 /// starttime to be the endtime
229 pub fn upid_read_status(upid: &UPID) -> Result<TaskState, Error> {
230
231 let setup = worker_task_setup()?;
232
233 let mut status = TaskState::Unknown { endtime: upid.starttime };
234
235 let path = setup.log_path(upid);
236
237 let mut file = File::open(path)?;
238
239 /// speedup - only read tail
240 use std::io::Seek;
241 use std::io::SeekFrom;
242 let _ = file.seek(SeekFrom::End(-8192)); // ignore errors
243
244 let mut data = Vec::with_capacity(8192);
245 file.read_to_end(&mut data)?;
246
247 // strip newlines at the end of the task logs
248 while data.last() == Some(&b'\n') {
249 data.pop();
250 }
251
252 let last_line = match data.iter().rposition(|c| *c == b'\n') {
253 Some(start) if data.len() > (start+1) => &data[start+1..],
254 Some(_) => &data, // should not happen, since we removed all trailing newlines
255 None => &data,
256 };
257
258 let last_line = std::str::from_utf8(last_line)
259 .map_err(|err| format_err!("upid_read_status: utf8 parse failed: {}", err))?;
260
261 let mut iter = last_line.splitn(2, ": ");
262 if let Some(time_str) = iter.next() {
263 if let Ok(endtime) = proxmox::tools::time::parse_rfc3339(time_str) {
264 // set the endtime even if we cannot parse the state
265 status = TaskState::Unknown { endtime };
266 if let Some(rest) = iter.next().and_then(|rest| rest.strip_prefix("TASK ")) {
267 if let Ok(state) = TaskState::from_endtime_and_message(endtime, rest) {
268 status = state;
269 }
270 }
271 }
272 }
273
274 Ok(status)
275 }
276
277 lazy_static! {
278 static ref WORKER_TASK_LIST: Mutex<HashMap<usize, Arc<WorkerTask>>> = Mutex::new(HashMap::new());
279 }
280
281 /// checks if the task UPID refers to a worker from this process
282 fn is_local_worker(upid: &UPID) -> bool {
283 upid.pid == crate::server::pid() && upid.pstart == crate::server::pstart()
284 }
285
286 /// Test if the task is still running
287 pub async fn worker_is_active(upid: &UPID) -> Result<bool, Error> {
288 if is_local_worker(upid) {
289 return Ok(WORKER_TASK_LIST.lock().unwrap().contains_key(&upid.task_id));
290 }
291
292 if procfs::check_process_running_pstart(upid.pid, upid.pstart).is_none() {
293 return Ok(false);
294 }
295
296 let sock = crate::server::ctrl_sock_from_pid(upid.pid);
297 let cmd = json!({
298 "command": "worker-task-status",
299 "args": {
300 "upid": upid.to_string(),
301 },
302 });
303 let status = proxmox_rest_server::send_command(sock, &cmd).await?;
304
305 if let Some(active) = status.as_bool() {
306 Ok(active)
307 } else {
308 bail!("got unexpected result {:?} (expected bool)", status);
309 }
310 }
311
312 /// Test if the task is still running (fast but inaccurate implementation)
313 ///
314 /// If the task is spawned from a different process, we simply return if
315 /// that process is still running. This information is good enough to detect
316 /// stale tasks...
317 pub fn worker_is_active_local(upid: &UPID) -> bool {
318 if is_local_worker(upid) {
319 WORKER_TASK_LIST.lock().unwrap().contains_key(&upid.task_id)
320 } else {
321 procfs::check_process_running_pstart(upid.pid, upid.pstart).is_some()
322 }
323 }
324
325 pub fn register_task_control_commands(
326 commando_sock: &mut CommandoSocket,
327 ) -> Result<(), Error> {
328 fn get_upid(args: Option<&Value>) -> Result<UPID, Error> {
329 let args = if let Some(args) = args { args } else { bail!("missing args") };
330 let upid = match args.get("upid") {
331 Some(Value::String(upid)) => upid.parse::<UPID>()?,
332 None => bail!("no upid in args"),
333 _ => bail!("unable to parse upid"),
334 };
335 if !is_local_worker(&upid) {
336 bail!("upid does not belong to this process");
337 }
338 Ok(upid)
339 }
340
341 commando_sock.register_command("worker-task-abort".into(), move |args| {
342 let upid = get_upid(args)?;
343
344 abort_local_worker(upid);
345
346 Ok(Value::Null)
347 })?;
348 commando_sock.register_command("worker-task-status".into(), move |args| {
349 let upid = get_upid(args)?;
350
351 let active = WORKER_TASK_LIST.lock().unwrap().contains_key(&upid.task_id);
352
353 Ok(active.into())
354 })?;
355
356 Ok(())
357 }
358
359 pub fn abort_worker_async(upid: UPID) {
360 tokio::spawn(async move {
361 if let Err(err) = abort_worker(upid).await {
362 eprintln!("abort worker failed - {}", err);
363 }
364 });
365 }
366
367 pub async fn abort_worker(upid: UPID) -> Result<(), Error> {
368
369 let sock = crate::server::ctrl_sock_from_pid(upid.pid);
370 let cmd = json!({
371 "command": "worker-task-abort",
372 "args": {
373 "upid": upid.to_string(),
374 },
375 });
376 proxmox_rest_server::send_command(sock, &cmd).map_ok(|_| ()).await
377 }
378
379 fn parse_worker_status_line(line: &str) -> Result<(String, UPID, Option<TaskState>), Error> {
380
381 let data = line.splitn(3, ' ').collect::<Vec<&str>>();
382
383 let len = data.len();
384
385 match len {
386 1 => Ok((data[0].to_owned(), data[0].parse::<UPID>()?, None)),
387 3 => {
388 let endtime = i64::from_str_radix(data[1], 16)?;
389 let state = TaskState::from_endtime_and_message(endtime, data[2])?;
390 Ok((data[0].to_owned(), data[0].parse::<UPID>()?, Some(state)))
391 }
392 _ => bail!("wrong number of components"),
393 }
394 }
395
396 /// Task State
397 #[derive(Debug, PartialEq, Eq, Serialize, Deserialize)]
398 pub enum TaskState {
399 /// The Task ended with an undefined state
400 Unknown { endtime: i64 },
401 /// The Task ended and there were no errors or warnings
402 OK { endtime: i64 },
403 /// The Task had 'count' amount of warnings and no errors
404 Warning { count: u64, endtime: i64 },
405 /// The Task ended with the error described in 'message'
406 Error { message: String, endtime: i64 },
407 }
408
409 impl TaskState {
410 pub fn endtime(&self) -> i64 {
411 match *self {
412 TaskState::Unknown { endtime } => endtime,
413 TaskState::OK { endtime } => endtime,
414 TaskState::Warning { endtime, .. } => endtime,
415 TaskState::Error { endtime, .. } => endtime,
416 }
417 }
418
419 fn result_text(&self) -> String {
420 match self {
421 TaskState::Error { message, .. } => format!("TASK ERROR: {}", message),
422 other => format!("TASK {}", other),
423 }
424 }
425
426 fn from_endtime_and_message(endtime: i64, s: &str) -> Result<Self, Error> {
427 if s == "unknown" {
428 Ok(TaskState::Unknown { endtime })
429 } else if s == "OK" {
430 Ok(TaskState::OK { endtime })
431 } else if let Some(warnings) = s.strip_prefix("WARNINGS: ") {
432 let count: u64 = warnings.parse()?;
433 Ok(TaskState::Warning{ count, endtime })
434 } else if !s.is_empty() {
435 let message = if let Some(err) = s.strip_prefix("ERROR: ") { err } else { s }.to_string();
436 Ok(TaskState::Error{ message, endtime })
437 } else {
438 bail!("unable to parse Task Status '{}'", s);
439 }
440 }
441 }
442
443 impl std::cmp::PartialOrd for TaskState {
444 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
445 Some(self.endtime().cmp(&other.endtime()))
446 }
447 }
448
449 impl std::cmp::Ord for TaskState {
450 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
451 self.endtime().cmp(&other.endtime())
452 }
453 }
454
455 impl std::fmt::Display for TaskState {
456 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
457 match self {
458 TaskState::Unknown { .. } => write!(f, "unknown"),
459 TaskState::OK { .. }=> write!(f, "OK"),
460 TaskState::Warning { count, .. } => write!(f, "WARNINGS: {}", count),
461 TaskState::Error { message, .. } => write!(f, "{}", message),
462 }
463 }
464 }
465
466 /// Task details including parsed UPID
467 ///
468 /// If there is no `state`, the task is still running.
469 #[derive(Debug)]
470 pub struct TaskListInfo {
471 /// The parsed UPID
472 pub upid: UPID,
473 /// UPID string representation
474 pub upid_str: String,
475 /// Task `(endtime, status)` if already finished
476 pub state: Option<TaskState>, // endtime, status
477 }
478
479 impl Into<pbs_api_types::TaskListItem> for TaskListInfo {
480 fn into(self) -> pbs_api_types::TaskListItem {
481 let (endtime, status) = self
482 .state
483 .map_or_else(|| (None, None), |a| (Some(a.endtime()), Some(a.to_string())));
484
485 pbs_api_types::TaskListItem {
486 upid: self.upid_str,
487 node: "localhost".to_string(),
488 pid: self.upid.pid as i64,
489 pstart: self.upid.pstart,
490 starttime: self.upid.starttime,
491 worker_type: self.upid.worker_type,
492 worker_id: self.upid.worker_id,
493 user: self.upid.auth_id,
494 endtime,
495 status,
496 }
497 }
498 }
499
500 fn render_task_line(info: &TaskListInfo) -> String {
501 let mut raw = String::new();
502 if let Some(status) = &info.state {
503 raw.push_str(&format!("{} {:08X} {}\n", info.upid_str, status.endtime(), status));
504 } else {
505 raw.push_str(&info.upid_str);
506 raw.push('\n');
507 }
508
509 raw
510 }
511
512 fn render_task_list(list: &[TaskListInfo]) -> String {
513 let mut raw = String::new();
514 for info in list {
515 raw.push_str(&render_task_line(&info));
516 }
517 raw
518 }
519
520 // note this is not locked, caller has to make sure it is
521 // this will skip (and log) lines that are not valid status lines
522 fn read_task_file<R: Read>(reader: R) -> Result<Vec<TaskListInfo>, Error>
523 {
524 let reader = BufReader::new(reader);
525 let mut list = Vec::new();
526 for line in reader.lines() {
527 let line = line?;
528 match parse_worker_status_line(&line) {
529 Ok((upid_str, upid, state)) => list.push(TaskListInfo {
530 upid_str,
531 upid,
532 state
533 }),
534 Err(err) => {
535 eprintln!("unable to parse worker status '{}' - {}", line, err);
536 continue;
537 }
538 };
539 }
540
541 Ok(list)
542 }
543
544 // note this is not locked, caller has to make sure it is
545 fn read_task_file_from_path<P>(path: P) -> Result<Vec<TaskListInfo>, Error>
546 where
547 P: AsRef<std::path::Path> + std::fmt::Debug,
548 {
549 let file = match File::open(&path) {
550 Ok(f) => f,
551 Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(Vec::new()),
552 Err(err) => bail!("unable to open task list {:?} - {}", path, err),
553 };
554
555 read_task_file(file)
556 }
557
558 pub struct TaskListInfoIterator {
559 list: VecDeque<TaskListInfo>,
560 end: bool,
561 archive: Option<LogRotateFiles>,
562 lock: Option<TaskListLockGuard>,
563 }
564
565 impl TaskListInfoIterator {
566 pub fn new(active_only: bool) -> Result<Self, Error> {
567
568 let setup = worker_task_setup()?;
569
570 let (read_lock, active_list) = {
571 let lock = setup.lock_task_list_files(false)?;
572 let active_list = read_task_file_from_path(&setup.active_tasks_fn)?;
573
574 let needs_update = active_list
575 .iter()
576 .any(|info| info.state.is_some() || !worker_is_active_local(&info.upid));
577
578 // TODO remove with 1.x
579 let index_exists = setup.task_index_fn.is_file();
580
581 if needs_update || index_exists {
582 drop(lock);
583 setup.update_active_workers(None)?;
584 let lock = setup.lock_task_list_files(false)?;
585 let active_list = read_task_file_from_path(&setup.active_tasks_fn)?;
586 (lock, active_list)
587 } else {
588 (lock, active_list)
589 }
590 };
591
592 let archive = if active_only {
593 None
594 } else {
595 let logrotate = LogRotate::new(&setup.task_archive_fn, true)
596 .ok_or_else(|| format_err!("could not get archive file names"))?;
597 Some(logrotate.files())
598 };
599
600 let lock = if active_only { None } else { Some(read_lock) };
601
602 Ok(Self {
603 list: active_list.into(),
604 end: active_only,
605 archive,
606 lock,
607 })
608 }
609 }
610
611 impl Iterator for TaskListInfoIterator {
612 type Item = Result<TaskListInfo, Error>;
613
614 fn next(&mut self) -> Option<Self::Item> {
615 loop {
616 if let Some(element) = self.list.pop_back() {
617 return Some(Ok(element));
618 } else if self.end {
619 return None;
620 } else {
621 if let Some(mut archive) = self.archive.take() {
622 if let Some(file) = archive.next() {
623 let list = match read_task_file(file) {
624 Ok(list) => list,
625 Err(err) => return Some(Err(err)),
626 };
627 self.list.append(&mut list.into());
628 self.archive = Some(archive);
629 continue;
630 }
631 }
632
633 self.end = true;
634 self.lock.take();
635 }
636 }
637 }
638 }
639
640 /// Launch long running worker tasks.
641 ///
642 /// A worker task can either be a whole thread, or a simply tokio
643 /// task/future. Each task can `log()` messages, which are stored
644 /// persistently to files. Task should poll the `abort_requested`
645 /// flag, and stop execution when requested.
646 pub struct WorkerTask {
647 setup: &'static WorkerTaskSetup,
648 upid: UPID,
649 data: Mutex<WorkerTaskData>,
650 abort_requested: AtomicBool,
651 }
652
653 impl std::fmt::Display for WorkerTask {
654
655 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
656 self.upid.fmt(f)
657 }
658 }
659
660 struct WorkerTaskData {
661 logger: FileLogger,
662 progress: f64, // 0..1
663 warn_count: u64,
664 pub abort_listeners: Vec<oneshot::Sender<()>>,
665 }
666
667 impl WorkerTask {
668
669 pub fn new(
670 worker_type: &str,
671 worker_id: Option<String>,
672 auth_id: String,
673 to_stdout: bool,
674 ) -> Result<Arc<Self>, Error> {
675
676 let setup = worker_task_setup()?;
677
678 let upid = UPID::new(worker_type, worker_id, auth_id)?;
679 let task_id = upid.task_id;
680
681 let mut path = setup.taskdir.clone();
682
683 path.push(format!("{:02X}", upid.pstart & 255));
684
685 let dir_opts = setup.file_opts.clone()
686 .perm(nix::sys::stat::Mode::from_bits_truncate(0o755));
687
688 create_path(&path, None, Some(dir_opts))?;
689
690 path.push(upid.to_string());
691
692 let logger_options = FileLogOptions {
693 to_stdout,
694 exclusive: true,
695 prefix_time: true,
696 read: true,
697 file_opts: setup.file_opts.clone(),
698 ..Default::default()
699 };
700 let logger = FileLogger::new(&path, logger_options)?;
701
702 let worker = Arc::new(Self {
703 setup,
704 upid: upid.clone(),
705 abort_requested: AtomicBool::new(false),
706 data: Mutex::new(WorkerTaskData {
707 logger,
708 progress: 0.0,
709 warn_count: 0,
710 abort_listeners: vec![],
711 }),
712 });
713
714 // scope to drop the lock again after inserting
715 {
716 let mut hash = WORKER_TASK_LIST.lock().unwrap();
717 hash.insert(task_id, worker.clone());
718 proxmox_rest_server::set_worker_count(hash.len());
719 }
720
721 setup.update_active_workers(Some(&upid))?;
722
723 Ok(worker)
724 }
725
726 /// Spawn a new tokio task/future.
727 pub fn spawn<F, T>(
728 worker_type: &str,
729 worker_id: Option<String>,
730 auth_id: String,
731 to_stdout: bool,
732 f: F,
733 ) -> Result<String, Error>
734 where F: Send + 'static + FnOnce(Arc<WorkerTask>) -> T,
735 T: Send + 'static + Future<Output = Result<(), Error>>,
736 {
737 let worker = WorkerTask::new(worker_type, worker_id, auth_id, to_stdout)?;
738 let upid_str = worker.upid.to_string();
739 let f = f(worker.clone());
740 tokio::spawn(async move {
741 let result = f.await;
742 worker.log_result(&result);
743 });
744
745 Ok(upid_str)
746 }
747
748 /// Create a new worker thread.
749 pub fn new_thread<F>(
750 worker_type: &str,
751 worker_id: Option<String>,
752 auth_id: String,
753 to_stdout: bool,
754 f: F,
755 ) -> Result<String, Error>
756 where F: Send + UnwindSafe + 'static + FnOnce(Arc<WorkerTask>) -> Result<(), Error>
757 {
758 let worker = WorkerTask::new(worker_type, worker_id, auth_id, to_stdout)?;
759 let upid_str = worker.upid.to_string();
760
761 let _child = std::thread::Builder::new().name(upid_str.clone()).spawn(move || {
762 let worker1 = worker.clone();
763 let result = match std::panic::catch_unwind(move || f(worker1)) {
764 Ok(r) => r,
765 Err(panic) => {
766 match panic.downcast::<&str>() {
767 Ok(panic_msg) => {
768 Err(format_err!("worker panicked: {}", panic_msg))
769 }
770 Err(_) => {
771 Err(format_err!("worker panicked: unknown type."))
772 }
773 }
774 }
775 };
776
777 worker.log_result(&result);
778 });
779
780 Ok(upid_str)
781 }
782
783 /// create state from self and a result
784 pub fn create_state(&self, result: &Result<(), Error>) -> TaskState {
785 let warn_count = self.data.lock().unwrap().warn_count;
786
787 let endtime = proxmox::tools::time::epoch_i64();
788
789 if let Err(err) = result {
790 TaskState::Error { message: err.to_string(), endtime }
791 } else if warn_count > 0 {
792 TaskState::Warning { count: warn_count, endtime }
793 } else {
794 TaskState::OK { endtime }
795 }
796 }
797
798 /// Log task result, remove task from running list
799 pub fn log_result(&self, result: &Result<(), Error>) {
800 let state = self.create_state(result);
801 self.log(state.result_text());
802
803 WORKER_TASK_LIST.lock().unwrap().remove(&self.upid.task_id);
804 let _ = self.setup.update_active_workers(None);
805 proxmox_rest_server::set_worker_count(WORKER_TASK_LIST.lock().unwrap().len());
806 }
807
808 /// Log a message.
809 pub fn log<S: AsRef<str>>(&self, msg: S) {
810 let mut data = self.data.lock().unwrap();
811 data.logger.log(msg);
812 }
813
814 /// Log a message as warning.
815 pub fn warn<S: AsRef<str>>(&self, msg: S) {
816 let mut data = self.data.lock().unwrap();
817 data.logger.log(format!("WARN: {}", msg.as_ref()));
818 data.warn_count += 1;
819 }
820
821 /// Set progress indicator
822 pub fn progress(&self, progress: f64) {
823 if progress >= 0.0 && progress <= 1.0 {
824 let mut data = self.data.lock().unwrap();
825 data.progress = progress;
826 } else {
827 // fixme: log!("task '{}': ignoring strange value for progress '{}'", self.upid, progress);
828 }
829 }
830
831 /// Request abort
832 pub fn request_abort(&self) {
833 eprintln!("set abort flag for worker {}", self.upid);
834
835 let prev_abort = self.abort_requested.swap(true, Ordering::SeqCst);
836 if !prev_abort { // log abort one time
837 self.log(format!("received abort request ..."));
838 }
839 // noitify listeners
840 let mut data = self.data.lock().unwrap();
841 loop {
842 match data.abort_listeners.pop() {
843 None => { break; },
844 Some(ch) => {
845 let _ = ch.send(()); // ignore errors here
846 },
847 }
848 }
849 }
850
851 /// Test if abort was requested.
852 pub fn abort_requested(&self) -> bool {
853 self.abort_requested.load(Ordering::SeqCst)
854 }
855
856 /// Fail if abort was requested.
857 pub fn fail_on_abort(&self) -> Result<(), Error> {
858 if self.abort_requested() {
859 bail!("abort requested - aborting task");
860 }
861 Ok(())
862 }
863
864 /// Get a future which resolves on task abort
865 pub fn abort_future(&self) -> oneshot::Receiver<()> {
866 let (tx, rx) = oneshot::channel::<()>();
867
868 let mut data = self.data.lock().unwrap();
869 if self.abort_requested() {
870 let _ = tx.send(());
871 } else {
872 data.abort_listeners.push(tx);
873 }
874 rx
875 }
876
877 pub fn upid(&self) -> &UPID {
878 &self.upid
879 }
880 }
881
882 impl pbs_datastore::task::TaskState for WorkerTask {
883 fn check_abort(&self) -> Result<(), Error> {
884 self.fail_on_abort()
885 }
886
887 fn log(&self, level: log::Level, message: &std::fmt::Arguments) {
888 match level {
889 log::Level::Error => self.warn(&message.to_string()),
890 log::Level::Warn => self.warn(&message.to_string()),
891 log::Level::Info => self.log(&message.to_string()),
892 log::Level::Debug => self.log(&format!("DEBUG: {}", message)),
893 log::Level::Trace => self.log(&format!("TRACE: {}", message)),
894 }
895 }
896 }
897
898 /// Wait for a locally spanned worker task
899 ///
900 /// Note: local workers should print logs to stdout, so there is no
901 /// need to fetch/display logs. We just wait for the worker to finish.
902 pub async fn wait_for_local_worker(upid_str: &str) -> Result<(), Error> {
903
904 let upid: UPID = upid_str.parse()?;
905
906 let sleep_duration = core::time::Duration::new(0, 100_000_000);
907
908 loop {
909 if worker_is_active_local(&upid) {
910 tokio::time::sleep(sleep_duration).await;
911 } else {
912 break;
913 }
914 }
915 Ok(())
916 }
917
918 /// Request abort of a local worker (if existing and running)
919 pub fn abort_local_worker(upid: UPID) {
920 if let Some(ref worker) = WORKER_TASK_LIST.lock().unwrap().get(&upid.task_id) {
921 worker.request_abort();
922 }
923 }