]> git.proxmox.com Git - proxmox-backup.git/blame - src/server/worker_task.rs
tools: file logger: use option struct to control behavior
[proxmox-backup.git] / src / server / worker_task.rs
CommitLineData
e7244387 1use std::collections::{HashMap, VecDeque};
4b01c983 2use std::fs::File;
9a760917 3use std::path::Path;
5ade6c25 4use std::io::{Read, Write, BufRead, BufReader};
d3f4c08f 5use std::panic::UnwindSafe;
18c0df4c
WB
6use std::sync::atomic::{AtomicBool, Ordering};
7use std::sync::{Arc, Mutex};
d3f4c08f 8
f7d4e4b5 9use anyhow::{bail, format_err, Error};
18c0df4c
WB
10use futures::*;
11use lazy_static::lazy_static;
619495b2 12use nix::unistd::Pid;
321070b4 13use serde_json::{json, Value};
4c116baf 14use serde::{Serialize, Deserialize};
18c0df4c 15use tokio::sync::oneshot;
479f6e40 16
619495b2 17use proxmox::sys::linux::procfs;
9ea4bce4 18use proxmox::try_block;
98c259b4 19use proxmox::tools::fs::{create_path, open_file_locked, replace_file, CreateOptions};
e18a6c9e 20
634132fe
DM
21use super::UPID;
22
e7244387 23use crate::tools::logrotate::{LogRotate, LogRotateFiles};
c0df91f8 24use crate::tools::{FileLogger, FileLogOptions};
e7cb4dc5 25use crate::api2::types::Userid;
479f6e40 26
2ec979e4 27macro_rules! PROXMOX_BACKUP_VAR_RUN_DIR_M { () => ("/run/proxmox-backup") }
634132fe
DM
28macro_rules! PROXMOX_BACKUP_LOG_DIR_M { () => ("/var/log/proxmox-backup") }
29macro_rules! PROXMOX_BACKUP_TASK_DIR_M { () => (concat!( PROXMOX_BACKUP_LOG_DIR_M!(), "/tasks")) }
30
d607b886 31pub const PROXMOX_BACKUP_VAR_RUN_DIR: &str = PROXMOX_BACKUP_VAR_RUN_DIR_M!();
634132fe
DM
32pub const PROXMOX_BACKUP_LOG_DIR: &str = PROXMOX_BACKUP_LOG_DIR_M!();
33pub const PROXMOX_BACKUP_TASK_DIR: &str = PROXMOX_BACKUP_TASK_DIR_M!();
34pub const PROXMOX_BACKUP_TASK_LOCK_FN: &str = concat!(PROXMOX_BACKUP_TASK_DIR_M!(), "/.active.lock");
35pub const PROXMOX_BACKUP_ACTIVE_TASK_FN: &str = concat!(PROXMOX_BACKUP_TASK_DIR_M!(), "/active");
784fa1c2 36pub const PROXMOX_BACKUP_INDEX_TASK_FN: &str = concat!(PROXMOX_BACKUP_TASK_DIR_M!(), "/index");
5ade6c25 37pub const PROXMOX_BACKUP_ARCHIVE_TASK_FN: &str = concat!(PROXMOX_BACKUP_TASK_DIR_M!(), "/archive");
784fa1c2
DC
38
39const MAX_INDEX_TASKS: usize = 1000;
479f6e40
DM
40
41lazy_static! {
42 static ref WORKER_TASK_LIST: Mutex<HashMap<usize, Arc<WorkerTask>>> = Mutex::new(HashMap::new());
d607b886
DM
43
44 static ref MY_PID: i32 = unsafe { libc::getpid() };
6a0dc4a5 45 static ref MY_PID_PSTART: u64 = procfs::PidStat::read_from_pid(Pid::from_raw(*MY_PID))
619495b2
WB
46 .unwrap()
47 .starttime;
479f6e40
DM
48}
49
634132fe 50/// Test if the task is still running
5751e495
DM
51pub async fn worker_is_active(upid: &UPID) -> Result<bool, Error> {
52 if (upid.pid == *MY_PID) && (upid.pstart == *MY_PID_PSTART) {
53 return Ok(WORKER_TASK_LIST.lock().unwrap().contains_key(&upid.task_id));
54 }
55
56 if !procfs::check_process_running_pstart(upid.pid, upid.pstart).is_some() {
57 return Ok(false);
58 }
59
60 let socketname = format!(
61 "\0{}/proxmox-task-control-{}.sock", PROXMOX_BACKUP_VAR_RUN_DIR, upid.pid);
62
63 let cmd = json!({
64 "command": "status",
65 "upid": upid.to_string(),
66 });
67
68 let status = super::send_command(socketname, cmd).await?;
4494d078 69
5751e495
DM
70 if let Some(active) = status.as_bool() {
71 Ok(active)
72 } else {
73 bail!("got unexpected result {:?} (expected bool)", status);
74 }
75}
76
77/// Test if the task is still running (fast but inaccurate implementation)
78///
79/// If the task is spanned from a different process, we simply return if
80/// that process is still running. This information is good enough to detect
81/// stale tasks...
77ebbefc 82pub fn worker_is_active_local(upid: &UPID) -> bool {
634132fe 83 if (upid.pid == *MY_PID) && (upid.pstart == *MY_PID_PSTART) {
62ee2eb4 84 WORKER_TASK_LIST.lock().unwrap().contains_key(&upid.task_id)
634132fe 85 } else {
62ee2eb4 86 procfs::check_process_running_pstart(upid.pid, upid.pstart).is_some()
479f6e40
DM
87 }
88}
89
d607b886
DM
90pub fn create_task_control_socket() -> Result<(), Error> {
91
92 let socketname = format!(
9b002cbc 93 "\0{}/proxmox-task-control-{}.sock", PROXMOX_BACKUP_VAR_RUN_DIR, *MY_PID);
d607b886 94
9b002cbc 95 let control_future = super::create_control_socket(socketname, |param| {
d607b886 96 let param = param.as_object()
62ee2eb4 97 .ok_or_else(|| format_err!("unable to parse parameters (expected json object)"))?;
321070b4 98 if param.keys().count() != 2 { bail!("wrong number of parameters"); }
d607b886 99
5751e495 100 let command = param["command"].as_str()
62ee2eb4 101 .ok_or_else(|| format_err!("unable to parse parameters (missing command)"))?;
d607b886 102
5751e495
DM
103 // we have only two commands for now
104 if !(command == "abort-task" || command == "status") { bail!("got unknown command '{}'", command); }
d607b886
DM
105
106 let upid_str = param["upid"].as_str()
62ee2eb4 107 .ok_or_else(|| format_err!("unable to parse parameters (missing upid)"))?;
d607b886
DM
108
109 let upid = upid_str.parse::<UPID>()?;
110
111 if !((upid.pid == *MY_PID) && (upid.pstart == *MY_PID_PSTART)) {
112 bail!("upid does not belong to this process");
113 }
114
115 let hash = WORKER_TASK_LIST.lock().unwrap();
5751e495
DM
116
117 match command {
118 "abort-task" => {
119 if let Some(ref worker) = hash.get(&upid.task_id) {
120 worker.request_abort();
121 } else {
122 // assume task is already stopped
123 }
124 Ok(Value::Null)
125 }
126 "status" => {
127 let active = hash.contains_key(&upid.task_id);
128 Ok(active.into())
129 }
130 _ => {
131 bail!("got unknown command '{}'", command);
132 }
d607b886 133 }
d607b886
DM
134 })?;
135
136 tokio::spawn(control_future);
137
138 Ok(())
139}
140
321070b4 141pub fn abort_worker_async(upid: UPID) {
75fef4b4
WB
142 tokio::spawn(async move {
143 if let Err(err) = abort_worker(upid).await {
321070b4
DM
144 eprintln!("abort worker failed - {}", err);
145 }
75fef4b4 146 });
321070b4
DM
147}
148
5751e495 149pub async fn abort_worker(upid: UPID) -> Result<(), Error> {
321070b4
DM
150
151 let target_pid = upid.pid;
152
153 let socketname = format!(
154 "\0{}/proxmox-task-control-{}.sock", PROXMOX_BACKUP_VAR_RUN_DIR, target_pid);
155
156 let cmd = json!({
157 "command": "abort-task",
158 "upid": upid.to_string(),
159 });
160
5751e495 161 super::send_command(socketname, cmd).map_ok(|_| ()).await
321070b4
DM
162}
163
77bd2a46 164fn parse_worker_status_line(line: &str) -> Result<(String, UPID, Option<TaskState>), Error> {
4b01c983
DM
165
166 let data = line.splitn(3, ' ').collect::<Vec<&str>>();
167
168 let len = data.len();
169
170 match len {
171 1 => Ok((data[0].to_owned(), data[0].parse::<UPID>()?, None)),
172 3 => {
173 let endtime = i64::from_str_radix(data[1], 16)?;
77bd2a46
DC
174 let state = TaskState::from_endtime_and_message(endtime, data[2])?;
175 Ok((data[0].to_owned(), data[0].parse::<UPID>()?, Some(state)))
4b01c983
DM
176 }
177 _ => bail!("wrong number of components"),
178 }
179}
180
35950380 181/// Create task log directory with correct permissions
d607b886 182pub fn create_task_log_dirs() -> Result<(), Error> {
35950380
DM
183
184 try_block!({
f74a03da 185 let backup_user = crate::backup::backup_user()?;
35238e23 186 let opts = CreateOptions::new()
f74a03da
DM
187 .owner(backup_user.uid)
188 .group(backup_user.gid);
35950380 189
35238e23
WB
190 create_path(PROXMOX_BACKUP_LOG_DIR, None, Some(opts.clone()))?;
191 create_path(PROXMOX_BACKUP_TASK_DIR, None, Some(opts.clone()))?;
192 create_path(PROXMOX_BACKUP_VAR_RUN_DIR, None, Some(opts))?;
35950380
DM
193 Ok(())
194 }).map_err(|err: Error| format_err!("unable to create task log dir - {}", err))?;
195
196 Ok(())
197}
198
ae197dda
DC
199/// Read endtime (time of last log line) and exitstatus from task log file
200/// If there is not a single line with at valid datetime, we assume the
201/// starttime to be the endtime
77bd2a46 202pub fn upid_read_status(upid: &UPID) -> Result<TaskState, Error> {
56b66645
DM
203
204 let mut status = TaskState::Unknown { endtime: upid.starttime };
4b01c983 205
4494d078 206 let path = upid.log_path();
4b01c983 207
0bfd87bc
DM
208 let mut file = File::open(path)?;
209
210 /// speedup - only read tail
211 use std::io::Seek;
212 use std::io::SeekFrom;
213 let _ = file.seek(SeekFrom::End(-8192)); // ignore errors
214
56b66645
DM
215 let mut data = Vec::with_capacity(8192);
216 file.read_to_end(&mut data)?;
4b01c983 217
a4c11436 218 // task logs should end with newline, we do not want it here
5e39918f 219 if data.len() > 0 && data[data.len()-1] == b'\n' {
a4c11436
DC
220 data.pop();
221 }
222
56b66645
DM
223 let last_line = {
224 let mut start = 0;
a4c11436 225 for pos in (0..data.len()).rev() {
56b66645 226 if data[pos] == b'\n' {
5e39918f 227 start = data.len().min(pos + 1);
56b66645
DM
228 break;
229 }
ae197dda 230 }
56b66645
DM
231 &data[start..]
232 };
233
234 let last_line = std::str::from_utf8(last_line)
235 .map_err(|err| format_err!("upid_read_status: utf8 parse failed: {}", err))?;
236
237 let mut iter = last_line.splitn(2, ": ");
238 if let Some(time_str) = iter.next() {
6a7be83e 239 if let Ok(endtime) = proxmox::tools::time::parse_rfc3339(time_str) {
56b66645 240 if let Some(rest) = iter.next().and_then(|rest| rest.strip_prefix("TASK ")) {
77bd2a46 241 if let Ok(state) = TaskState::from_endtime_and_message(endtime, rest) {
4c116baf 242 status = state;
4b01c983
DM
243 }
244 }
245 }
246 }
247
77bd2a46 248 Ok(status)
4b01c983
DM
249}
250
4c116baf 251/// Task State
77bd2a46 252#[derive(Debug, PartialEq, Eq, Serialize, Deserialize)]
4c116baf
DC
253pub enum TaskState {
254 /// The Task ended with an undefined state
77bd2a46 255 Unknown { endtime: i64 },
4c116baf 256 /// The Task ended and there were no errors or warnings
77bd2a46 257 OK { endtime: i64 },
4c116baf 258 /// The Task had 'count' amount of warnings and no errors
77bd2a46 259 Warning { count: u64, endtime: i64 },
4c116baf 260 /// The Task ended with the error described in 'message'
77bd2a46 261 Error { message: String, endtime: i64 },
4c116baf
DC
262}
263
264impl TaskState {
77bd2a46
DC
265 pub fn endtime(&self) -> i64 {
266 match *self {
267 TaskState::Unknown { endtime } => endtime,
268 TaskState::OK { endtime } => endtime,
269 TaskState::Warning { endtime, .. } => endtime,
270 TaskState::Error { endtime, .. } => endtime,
4c116baf
DC
271 }
272 }
4c116baf 273
77bd2a46 274 fn result_text(&self) -> String {
4c116baf 275 match self {
77bd2a46
DC
276 TaskState::Error { message, .. } => format!("TASK ERROR: {}", message),
277 other => format!("TASK {}", other),
4c116baf
DC
278 }
279 }
4c116baf 280
77bd2a46 281 fn from_endtime_and_message(endtime: i64, s: &str) -> Result<Self, Error> {
4c116baf 282 if s == "unknown" {
77bd2a46 283 Ok(TaskState::Unknown { endtime })
4c116baf 284 } else if s == "OK" {
77bd2a46 285 Ok(TaskState::OK { endtime })
4c116baf
DC
286 } else if s.starts_with("WARNINGS: ") {
287 let count: u64 = s[10..].parse()?;
77bd2a46 288 Ok(TaskState::Warning{ count, endtime })
4c116baf
DC
289 } else if s.len() > 0 {
290 let message = if s.starts_with("ERROR: ") { &s[7..] } else { s }.to_string();
77bd2a46 291 Ok(TaskState::Error{ message, endtime })
4c116baf
DC
292 } else {
293 bail!("unable to parse Task Status '{}'", s);
294 }
295 }
296}
297
77bd2a46
DC
298impl std::cmp::PartialOrd for TaskState {
299 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
300 Some(self.endtime().cmp(&other.endtime()))
301 }
302}
303
304impl std::cmp::Ord for TaskState {
305 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
306 self.endtime().cmp(&other.endtime())
307 }
308}
309
310impl std::fmt::Display for TaskState {
311 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
312 match self {
313 TaskState::Unknown { .. } => write!(f, "unknown"),
314 TaskState::OK { .. }=> write!(f, "OK"),
315 TaskState::Warning { count, .. } => write!(f, "WARNINGS: {}", count),
316 TaskState::Error { message, .. } => write!(f, "{}", message),
317 }
318 }
319}
320
93aebb38
DM
321/// Task details including parsed UPID
322///
323/// If there is no `state`, the task is still running.
324#[derive(Debug)]
325pub struct TaskListInfo {
326 /// The parsed UPID
327 pub upid: UPID,
328 /// UPID string representation
329 pub upid_str: String,
330 /// Task `(endtime, status)` if already finished
77bd2a46 331 pub state: Option<TaskState>, // endtime, status
93aebb38
DM
332}
333
66f4e6a8
DC
334fn lock_task_list_files(exclusive: bool) -> Result<std::fs::File, Error> {
335 let backup_user = crate::backup::backup_user()?;
336
337 let lock = open_file_locked(PROXMOX_BACKUP_TASK_LOCK_FN, std::time::Duration::new(10, 0), exclusive)?;
338 nix::unistd::chown(PROXMOX_BACKUP_TASK_LOCK_FN, Some(backup_user.uid), Some(backup_user.gid))?;
339
340 Ok(lock)
341}
342
9a760917
DC
343/// checks if the Task Archive is bigger that 'size_threshold' bytes, and
344/// rotates it if it is
345pub fn rotate_task_log_archive(size_threshold: u64, compress: bool, max_files: Option<usize>) -> Result<bool, Error> {
346 let _lock = lock_task_list_files(true)?;
347 let path = Path::new(PROXMOX_BACKUP_ARCHIVE_TASK_FN);
2d81f7b0
DC
348 let metadata = match path.metadata() {
349 Ok(metadata) => metadata,
350 Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(false),
351 Err(err) => bail!("unable to open task archive - {}", err),
352 };
353
9a760917
DC
354 if metadata.len() > size_threshold {
355 let mut logrotate = LogRotate::new(PROXMOX_BACKUP_ARCHIVE_TASK_FN, compress).ok_or_else(|| format_err!("could not get archive file names"))?;
356 let backup_user = crate::backup::backup_user()?;
357 logrotate.rotate(
358 CreateOptions::new()
359 .owner(backup_user.uid)
360 .group(backup_user.gid),
361 max_files,
362 )?;
363 Ok(true)
364 } else {
365 Ok(false)
366 }
367}
368
93aebb38
DM
369// atomically read/update the task list, update status of finished tasks
370// new_upid is added to the list when specified.
c386b06f 371fn update_active_workers(new_upid: Option<&UPID>) -> Result<(), Error> {
4b01c983 372
f74a03da 373 let backup_user = crate::backup::backup_user()?;
35950380 374
66f4e6a8 375 let lock = lock_task_list_files(true)?;
4b01c983 376
784fa1c2
DC
377 let mut finish_list: Vec<TaskListInfo> = read_task_file_from_path(PROXMOX_BACKUP_INDEX_TASK_FN)?;
378 let mut active_list: Vec<TaskListInfo> = read_task_file_from_path(PROXMOX_BACKUP_ACTIVE_TASK_FN)?
379 .into_iter()
380 .filter_map(|info| {
381 if info.state.is_some() {
382 // this can happen when the active file still includes finished tasks
383 finish_list.push(info);
384 return None;
4b01c983 385 }
4b01c983 386
784fa1c2
DC
387 if !worker_is_active_local(&info.upid) {
388 println!("Detected stopped UPID {}", &info.upid_str);
389 let now = proxmox::tools::time::epoch_i64();
390 let status = upid_read_status(&info.upid)
391 .unwrap_or_else(|_| TaskState::Unknown { endtime: now });
392 finish_list.push(TaskListInfo {
393 upid: info.upid,
394 upid_str: info.upid_str,
395 state: Some(status)
396 });
397 return None;
4b01c983 398 }
784fa1c2
DC
399
400 Some(info)
401 }).collect();
4b01c983
DM
402
403 if let Some(upid) = new_upid {
404 active_list.push(TaskListInfo { upid: upid.clone(), upid_str: upid.to_string(), state: None });
405 }
406
784fa1c2 407 let active_raw = render_task_list(&active_list);
4b01c983 408
784fa1c2
DC
409 replace_file(
410 PROXMOX_BACKUP_ACTIVE_TASK_FN,
411 active_raw.as_bytes(),
412 CreateOptions::new()
413 .owner(backup_user.uid)
414 .group(backup_user.gid),
415 )?;
93aebb38 416
784fa1c2 417 finish_list.sort_unstable_by(|a, b| {
4b01c983 418 match (&a.state, &b.state) {
77bd2a46 419 (Some(s1), Some(s2)) => s1.cmp(&s2),
4b01c983
DM
420 (Some(_), None) => std::cmp::Ordering::Less,
421 (None, Some(_)) => std::cmp::Ordering::Greater,
422 _ => a.upid.starttime.cmp(&b.upid.starttime),
423 }
424 });
425
7eebe148
DC
426
427 let start = if finish_list.len() > MAX_INDEX_TASKS {
428 finish_list.len() - MAX_INDEX_TASKS
429 } else {
430 0
431 };
432
784fa1c2 433 let end = (start+MAX_INDEX_TASKS).min(finish_list.len());
7eebe148
DC
434
435 let index_raw = if end > start {
436 render_task_list(&finish_list[start..end])
437 } else {
438 "".to_string()
439 };
4b01c983 440
feaa1ad3 441 replace_file(
784fa1c2
DC
442 PROXMOX_BACKUP_INDEX_TASK_FN,
443 index_raw.as_bytes(),
feaa1ad3 444 CreateOptions::new()
f74a03da
DM
445 .owner(backup_user.uid)
446 .group(backup_user.gid),
feaa1ad3 447 )?;
4b01c983 448
5ade6c25
DC
449 if !finish_list.is_empty() && start > 0 {
450 match std::fs::OpenOptions::new().append(true).create(true).open(PROXMOX_BACKUP_ARCHIVE_TASK_FN) {
451 Ok(mut writer) => {
452 for info in &finish_list[0..start] {
453 writer.write_all(render_task_line(&info).as_bytes())?;
454 }
455 },
456 Err(err) => bail!("could not write task archive - {}", err),
457 }
458
459 nix::unistd::chown(PROXMOX_BACKUP_ARCHIVE_TASK_FN, Some(backup_user.uid), Some(backup_user.gid))?;
460 }
461
4b01c983
DM
462 drop(lock);
463
c386b06f 464 Ok(())
93aebb38 465}
4b01c983 466
bbeb0256
DC
467fn render_task_line(info: &TaskListInfo) -> String {
468 let mut raw = String::new();
469 if let Some(status) = &info.state {
470 raw.push_str(&format!("{} {:08X} {}\n", info.upid_str, status.endtime(), status));
471 } else {
472 raw.push_str(&info.upid_str);
473 raw.push('\n');
474 }
475
476 raw
477}
478
479fn render_task_list(list: &[TaskListInfo]) -> String {
480 let mut raw = String::new();
481 for info in list {
482 raw.push_str(&render_task_line(&info));
483 }
484 raw
485}
486
784fa1c2
DC
487// note this is not locked, caller has to make sure it is
488// this will skip (and log) lines that are not valid status lines
489fn read_task_file<R: Read>(reader: R) -> Result<Vec<TaskListInfo>, Error>
490{
491 let reader = BufReader::new(reader);
492 let mut list = Vec::new();
493 for line in reader.lines() {
494 let line = line?;
495 match parse_worker_status_line(&line) {
496 Ok((upid_str, upid, state)) => list.push(TaskListInfo {
497 upid_str,
498 upid,
499 state
500 }),
501 Err(err) => {
502 eprintln!("unable to parse worker status '{}' - {}", line, err);
503 continue;
504 }
505 };
506 }
507
508 Ok(list)
509}
510
511// note this is not locked, caller has to make sure it is
512fn read_task_file_from_path<P>(path: P) -> Result<Vec<TaskListInfo>, Error>
513where
514 P: AsRef<std::path::Path> + std::fmt::Debug,
515{
516 let file = match File::open(&path) {
517 Ok(f) => f,
518 Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(Vec::new()),
519 Err(err) => bail!("unable to open task list {:?} - {}", path, err),
520 };
521
522 read_task_file(file)
523}
524
e7244387
DC
525enum TaskFile {
526 Active,
527 Index,
528 Archive,
529 End,
530}
531
532pub struct TaskListInfoIterator {
533 list: VecDeque<TaskListInfo>,
534 file: TaskFile,
535 archive: Option<LogRotateFiles>,
536 lock: Option<File>,
537}
538
539impl TaskListInfoIterator {
540 pub fn new(active_only: bool) -> Result<Self, Error> {
541 let (read_lock, active_list) = {
542 let lock = lock_task_list_files(false)?;
543 let active_list = read_task_file_from_path(PROXMOX_BACKUP_ACTIVE_TASK_FN)?;
544
545 let needs_update = active_list
546 .iter()
df4827f2 547 .any(|info| info.state.is_some() || !worker_is_active_local(&info.upid));
e7244387
DC
548
549 if needs_update {
550 drop(lock);
551 update_active_workers(None)?;
552 let lock = lock_task_list_files(false)?;
553 let active_list = read_task_file_from_path(PROXMOX_BACKUP_ACTIVE_TASK_FN)?;
554 (lock, active_list)
555 } else {
556 (lock, active_list)
557 }
558 };
559
560 let archive = if active_only {
561 None
562 } else {
563 let logrotate = LogRotate::new(PROXMOX_BACKUP_ARCHIVE_TASK_FN, true).ok_or_else(|| format_err!("could not get archive file names"))?;
564 Some(logrotate.files())
565 };
566
567 let file = if active_only { TaskFile::End } else { TaskFile::Active };
568 let lock = if active_only { None } else { Some(read_lock) };
569
570 Ok(Self {
571 list: active_list.into(),
572 file,
573 archive,
574 lock,
575 })
576 }
577}
578
579impl Iterator for TaskListInfoIterator {
580 type Item = Result<TaskListInfo, Error>;
581
582 fn next(&mut self) -> Option<Self::Item> {
583 loop {
584 if let Some(element) = self.list.pop_back() {
585 return Some(Ok(element));
586 } else {
587 match self.file {
588 TaskFile::Active => {
589 let index = match read_task_file_from_path(PROXMOX_BACKUP_INDEX_TASK_FN) {
590 Ok(index) => index,
591 Err(err) => return Some(Err(err)),
592 };
593 self.list.append(&mut index.into());
594 self.file = TaskFile::Index;
595 },
596 TaskFile::Index | TaskFile::Archive => {
597 if let Some(mut archive) = self.archive.take() {
598 if let Some(file) = archive.next() {
599 let list = match read_task_file(file) {
600 Ok(list) => list,
601 Err(err) => return Some(Err(err)),
602 };
603 self.list.append(&mut list.into());
604 self.archive = Some(archive);
605 self.file = TaskFile::Archive;
606 continue;
607 }
608 }
609 self.file = TaskFile::End;
610 self.lock.take();
611 return None;
612 }
613 TaskFile::End => return None,
614 }
615 }
616 }
617 }
618}
619
882594c5
DM
620/// Launch long running worker tasks.
621///
622/// A worker task can either be a whole thread, or a simply tokio
623/// task/future. Each task can `log()` messages, which are stored
624/// persistently to files. Task should poll the `abort_requested`
625/// flag, and stop execution when requested.
479f6e40
DM
626#[derive(Debug)]
627pub struct WorkerTask {
628 upid: UPID,
629 data: Mutex<WorkerTaskData>,
630 abort_requested: AtomicBool,
631}
632
633impl std::fmt::Display for WorkerTask {
634
635 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
636 self.upid.fmt(f)
637 }
638}
639
640#[derive(Debug)]
641struct WorkerTaskData {
642 logger: FileLogger,
643 progress: f64, // 0..1
f6de2c73 644 warn_count: u64,
75bc49be 645 pub abort_listeners: Vec<oneshot::Sender<()>>,
479f6e40
DM
646}
647
648impl Drop for WorkerTask {
649
650 fn drop(&mut self) {
651 println!("unregister worker");
652 }
653}
654
655impl WorkerTask {
656
e7cb4dc5 657 pub fn new(worker_type: &str, worker_id: Option<String>, userid: Userid, to_stdout: bool) -> Result<Arc<Self>, Error> {
479f6e40
DM
658 println!("register worker");
659
e7cb4dc5 660 let upid = UPID::new(worker_type, worker_id, userid)?;
634132fe 661 let task_id = upid.task_id;
479f6e40 662
634132fe 663 let mut path = std::path::PathBuf::from(PROXMOX_BACKUP_TASK_DIR);
35950380 664
479f6e40
DM
665 path.push(format!("{:02X}", upid.pstart % 256));
666
f74a03da 667 let backup_user = crate::backup::backup_user()?;
35950380 668
f74a03da 669 create_path(&path, None, Some(CreateOptions::new().owner(backup_user.uid).group(backup_user.gid)))?;
479f6e40
DM
670
671 path.push(upid.to_string());
672
673 println!("FILE: {:?}", path);
674
c0df91f8
TL
675 let logger_options = FileLogOptions {
676 to_stdout: to_stdout,
677 exclusive: true,
678 read: true,
679 ..Default::default()
680 };
681 let logger = FileLogger::new(&path, logger_options)?;
f74a03da 682 nix::unistd::chown(&path, Some(backup_user.uid), Some(backup_user.gid))?;
479f6e40
DM
683
684 let worker = Arc::new(Self {
05d755b2 685 upid: upid.clone(),
479f6e40
DM
686 abort_requested: AtomicBool::new(false),
687 data: Mutex::new(WorkerTaskData {
688 logger,
689 progress: 0.0,
f6de2c73 690 warn_count: 0,
75bc49be 691 abort_listeners: vec![],
479f6e40
DM
692 }),
693 });
694
05d755b2
DC
695 // scope to drop the lock again after inserting
696 {
697 let mut hash = WORKER_TASK_LIST.lock().unwrap();
698 hash.insert(task_id, worker.clone());
699 super::set_worker_count(hash.len());
700 }
7a630df7 701
05d755b2 702 update_active_workers(Some(&upid))?;
479f6e40
DM
703
704 Ok(worker)
705 }
706
882594c5 707 /// Spawn a new tokio task/future.
660c6846
DM
708 pub fn spawn<F, T>(
709 worker_type: &str,
710 worker_id: Option<String>,
e7cb4dc5 711 userid: Userid,
660c6846
DM
712 to_stdout: bool,
713 f: F,
714 ) -> Result<String, Error>
479f6e40 715 where F: Send + 'static + FnOnce(Arc<WorkerTask>) -> T,
75fef4b4 716 T: Send + 'static + Future<Output = Result<(), Error>>,
479f6e40 717 {
e7cb4dc5 718 let worker = WorkerTask::new(worker_type, worker_id, userid, to_stdout)?;
660c6846 719 let upid_str = worker.upid.to_string();
75fef4b4
WB
720 let f = f(worker.clone());
721 tokio::spawn(async move {
722 let result = f.await;
dd8e744f 723 worker.log_result(&result);
75fef4b4 724 });
479f6e40 725
660c6846 726 Ok(upid_str)
479f6e40
DM
727 }
728
882594c5 729 /// Create a new worker thread.
660c6846
DM
730 pub fn new_thread<F>(
731 worker_type: &str,
732 worker_id: Option<String>,
e7cb4dc5 733 userid: Userid,
660c6846
DM
734 to_stdout: bool,
735 f: F,
736 ) -> Result<String, Error>
d3f4c08f 737 where F: Send + UnwindSafe + 'static + FnOnce(Arc<WorkerTask>) -> Result<(), Error>
479f6e40
DM
738 {
739 println!("register worker thread");
740
e7cb4dc5 741 let worker = WorkerTask::new(worker_type, worker_id, userid, to_stdout)?;
660c6846 742 let upid_str = worker.upid.to_string();
479f6e40 743
217170e1 744 let _child = std::thread::Builder::new().name(upid_str.clone()).spawn(move || {
d3f4c08f
DM
745 let worker1 = worker.clone();
746 let result = match std::panic::catch_unwind(move || f(worker1)) {
747 Ok(r) => r,
748 Err(panic) => {
749 match panic.downcast::<&str>() {
750 Ok(panic_msg) => {
751 Err(format_err!("worker panicked: {}", panic_msg))
752 }
753 Err(_) => {
754 Err(format_err!("worker panicked: unknown type."))
755 }
756 }
757 }
758 };
759
dd8e744f 760 worker.log_result(&result);
479f6e40
DM
761 });
762
660c6846 763 Ok(upid_str)
479f6e40
DM
764 }
765
4c116baf
DC
766 /// create state from self and a result
767 pub fn create_state(&self, result: &Result<(), Error>) -> TaskState {
f6de2c73 768 let warn_count = self.data.lock().unwrap().warn_count;
cef03f41 769
6a7be83e 770 let endtime = proxmox::tools::time::epoch_i64();
77bd2a46 771
4b01c983 772 if let Err(err) = result {
77bd2a46 773 TaskState::Error { message: err.to_string(), endtime }
f6de2c73 774 } else if warn_count > 0 {
77bd2a46 775 TaskState::Warning { count: warn_count, endtime }
4b01c983 776 } else {
77bd2a46 777 TaskState::OK { endtime }
4b01c983 778 }
cef03f41
DC
779 }
780
781 /// Log task result, remove task from running list
782 pub fn log_result(&self, result: &Result<(), Error>) {
4c116baf
DC
783 let state = self.create_state(result);
784 self.log(state.result_text());
418def7a
DM
785
786 WORKER_TASK_LIST.lock().unwrap().remove(&self.upid.task_id);
787 let _ = update_active_workers(None);
788 super::set_worker_count(WORKER_TASK_LIST.lock().unwrap().len());
4b01c983
DM
789 }
790
882594c5 791 /// Log a message.
479f6e40
DM
792 pub fn log<S: AsRef<str>>(&self, msg: S) {
793 let mut data = self.data.lock().unwrap();
794 data.logger.log(msg);
795 }
796
f6de2c73
DC
797 /// Log a message as warning.
798 pub fn warn<S: AsRef<str>>(&self, msg: S) {
799 let mut data = self.data.lock().unwrap();
800 data.logger.log(format!("WARN: {}", msg.as_ref()));
801 data.warn_count += 1;
802 }
803
882594c5 804 /// Set progress indicator
479f6e40
DM
805 pub fn progress(&self, progress: f64) {
806 if progress >= 0.0 && progress <= 1.0 {
807 let mut data = self.data.lock().unwrap();
808 data.progress = progress;
809 } else {
810 // fixme: log!("task '{}': ignoring strange value for progress '{}'", self.upid, progress);
811 }
812 }
813
882594c5 814 /// Request abort
d607b886 815 pub fn request_abort(&self) {
98a181f0 816 eprintln!("set abort flag for worker {}", self.upid);
479f6e40 817 self.abort_requested.store(true, Ordering::SeqCst);
75bc49be
DM
818 // noitify listeners
819 let mut data = self.data.lock().unwrap();
820 loop {
821 match data.abort_listeners.pop() {
822 None => { break; },
823 Some(ch) => {
824 let _ = ch.send(()); // ignore erros here
825 },
826 }
827 }
479f6e40
DM
828 }
829
882594c5 830 /// Test if abort was requested.
479f6e40
DM
831 pub fn abort_requested(&self) -> bool {
832 self.abort_requested.load(Ordering::SeqCst)
833 }
834
882594c5 835 /// Fail if abort was requested.
479f6e40
DM
836 pub fn fail_on_abort(&self) -> Result<(), Error> {
837 if self.abort_requested() {
99641a6b 838 bail!("abort requested - aborting task");
479f6e40
DM
839 }
840 Ok(())
841 }
75bc49be
DM
842
843 /// Get a future which resolves on task abort
844 pub fn abort_future(&self) -> oneshot::Receiver<()> {
845 let (tx, rx) = oneshot::channel::<()>();
846
847 let mut data = self.data.lock().unwrap();
848 if self.abort_requested() {
849 let _ = tx.send(());
850 } else {
851 data.abort_listeners.push(tx);
852 }
853 rx
854 }
4bd2a9e4
DC
855
856 pub fn upid(&self) -> &UPID {
857 &self.upid
858 }
479f6e40 859}
d1993187
WB
860
861impl crate::task::TaskState for WorkerTask {
862 fn check_abort(&self) -> Result<(), Error> {
863 self.fail_on_abort()
864 }
865
866 fn log(&self, level: log::Level, message: &std::fmt::Arguments) {
867 match level {
868 log::Level::Error => self.warn(&message.to_string()),
869 log::Level::Warn => self.warn(&message.to_string()),
870 log::Level::Info => self.log(&message.to_string()),
871 log::Level::Debug => self.log(&format!("DEBUG: {}", message)),
872 log::Level::Trace => self.log(&format!("TRACE: {}", message)),
873 }
874 }
875}