]> git.proxmox.com Git - proxmox-backup.git/blob - src/bin/proxmox-backup-proxy.rs
split out pbs-runtime module
[proxmox-backup.git] / src / bin / proxmox-backup-proxy.rs
1 use std::sync::{Mutex, Arc};
2 use std::path::{Path, PathBuf};
3 use std::os::unix::io::AsRawFd;
4
5 use anyhow::{bail, format_err, Error};
6 use futures::*;
7
8 use openssl::ssl::{SslMethod, SslAcceptor, SslFiletype};
9 use tokio_stream::wrappers::ReceiverStream;
10 use serde_json::Value;
11
12 use proxmox::try_block;
13 use proxmox::api::RpcEnvironmentType;
14 use proxmox::sys::linux::socket::set_tcp_keepalive;
15
16 use proxmox_backup::{
17 backup::DataStore,
18 server::{
19 auth::default_api_auth,
20 WorkerTask,
21 ApiConfig,
22 rest::*,
23 jobstate::{
24 self,
25 Job,
26 },
27 rotate_task_log_archive,
28 },
29 tools::systemd::time::{
30 parse_calendar_event,
31 compute_next_event,
32 },
33 };
34
35 use pbs_buildcfg::configdir;
36
37 use proxmox_backup::api2::types::Authid;
38 use proxmox_backup::server;
39 use proxmox_backup::auth_helpers::*;
40 use proxmox_backup::tools::{
41 PROXMOX_BACKUP_TCP_KEEPALIVE_TIME,
42 daemon,
43 disks::{
44 DiskManage,
45 zfs_pool_stats,
46 get_pool_from_dataset,
47 },
48 logrotate::LogRotate,
49 };
50
51 use proxmox_backup::api2::pull::do_sync_job;
52 use proxmox_backup::api2::tape::backup::do_tape_backup_job;
53 use proxmox_backup::server::do_verification_job;
54 use proxmox_backup::server::do_prune_job;
55
56 fn main() -> Result<(), Error> {
57 proxmox_backup::tools::setup_safe_path_env();
58
59 let backup_uid = proxmox_backup::backup::backup_user()?.uid;
60 let backup_gid = proxmox_backup::backup::backup_group()?.gid;
61 let running_uid = nix::unistd::Uid::effective();
62 let running_gid = nix::unistd::Gid::effective();
63
64 if running_uid != backup_uid || running_gid != backup_gid {
65 bail!("proxy not running as backup user or group (got uid {} gid {})", running_uid, running_gid);
66 }
67
68 pbs_runtime::main(run())
69 }
70
71 async fn run() -> Result<(), Error> {
72 if let Err(err) = syslog::init(
73 syslog::Facility::LOG_DAEMON,
74 log::LevelFilter::Info,
75 Some("proxmox-backup-proxy")) {
76 bail!("unable to inititialize syslog - {}", err);
77 }
78
79 // Note: To debug early connection error use
80 // PROXMOX_DEBUG=1 ./target/release/proxmox-backup-proxy
81 let debug = std::env::var("PROXMOX_DEBUG").is_ok();
82
83 let _ = public_auth_key(); // load with lazy_static
84 let _ = csrf_secret(); // load with lazy_static
85
86 let mut config = ApiConfig::new(
87 pbs_buildcfg::JS_DIR,
88 &proxmox_backup::api2::ROUTER,
89 RpcEnvironmentType::PUBLIC,
90 default_api_auth(),
91 )?;
92
93 config.add_alias("novnc", "/usr/share/novnc-pve");
94 config.add_alias("extjs", "/usr/share/javascript/extjs");
95 config.add_alias("qrcodejs", "/usr/share/javascript/qrcodejs");
96 config.add_alias("fontawesome", "/usr/share/fonts-font-awesome");
97 config.add_alias("xtermjs", "/usr/share/pve-xtermjs");
98 config.add_alias("locale", "/usr/share/pbs-i18n");
99 config.add_alias("widgettoolkit", "/usr/share/javascript/proxmox-widget-toolkit");
100 config.add_alias("docs", "/usr/share/doc/proxmox-backup/html");
101
102 let mut indexpath = PathBuf::from(pbs_buildcfg::JS_DIR);
103 indexpath.push("index.hbs");
104 config.register_template("index", &indexpath)?;
105 config.register_template("console", "/usr/share/pve-xtermjs/index.html.hbs")?;
106
107 let mut commando_sock = server::CommandoSocket::new(server::our_ctrl_sock());
108
109 config.enable_file_log(pbs_buildcfg::API_ACCESS_LOG_FN, &mut commando_sock)?;
110
111 let rest_server = RestServer::new(config);
112
113 //openssl req -x509 -newkey rsa:4096 -keyout /etc/proxmox-backup/proxy.key -out /etc/proxmox-backup/proxy.pem -nodes
114
115 // we build the initial acceptor here as we cannot start if this fails
116 let acceptor = make_tls_acceptor()?;
117 let acceptor = Arc::new(Mutex::new(acceptor));
118
119 // to renew the acceptor we just add a command-socket handler
120 commando_sock.register_command(
121 "reload-certificate".to_string(),
122 {
123 let acceptor = Arc::clone(&acceptor);
124 move |_value| -> Result<_, Error> {
125 log::info!("reloading certificate");
126 match make_tls_acceptor() {
127 Err(err) => log::error!("error reloading certificate: {}", err),
128 Ok(new_acceptor) => {
129 let mut guard = acceptor.lock().unwrap();
130 *guard = new_acceptor;
131 }
132 }
133 Ok(Value::Null)
134 }
135 },
136 )?;
137
138 // to remove references for not configured datastores
139 commando_sock.register_command(
140 "datastore-removed".to_string(),
141 |_value| {
142 if let Err(err) = proxmox_backup::backup::DataStore::remove_unused_datastores() {
143 log::error!("could not refresh datastores: {}", err);
144 }
145 Ok(Value::Null)
146 }
147 )?;
148
149 let server = daemon::create_daemon(
150 ([0,0,0,0,0,0,0,0], 8007).into(),
151 move |listener, ready| {
152
153 let connections = accept_connections(listener, acceptor, debug);
154 let connections = hyper::server::accept::from_stream(ReceiverStream::new(connections));
155
156 Ok(ready
157 .and_then(|_| hyper::Server::builder(connections)
158 .serve(rest_server)
159 .with_graceful_shutdown(server::shutdown_future())
160 .map_err(Error::from)
161 )
162 .map_err(|err| eprintln!("server error: {}", err))
163 .map(|_| ())
164 )
165 },
166 "proxmox-backup-proxy.service",
167 );
168
169 server::write_pid(pbs_buildcfg::PROXMOX_BACKUP_PROXY_PID_FN)?;
170 daemon::systemd_notify(daemon::SystemdNotify::Ready)?;
171
172 let init_result: Result<(), Error> = try_block!({
173 server::register_task_control_commands(&mut commando_sock)?;
174 commando_sock.spawn()?;
175 server::server_state_init()?;
176 Ok(())
177 });
178
179 if let Err(err) = init_result {
180 bail!("unable to start daemon - {}", err);
181 }
182
183 start_task_scheduler();
184 start_stat_generator();
185
186 server.await?;
187 log::info!("server shutting down, waiting for active workers to complete");
188 proxmox_backup::server::last_worker_future().await?;
189 log::info!("done - exit server");
190
191 Ok(())
192 }
193
194 fn make_tls_acceptor() -> Result<SslAcceptor, Error> {
195 let key_path = configdir!("/proxy.key");
196 let cert_path = configdir!("/proxy.pem");
197
198 let mut acceptor = SslAcceptor::mozilla_intermediate_v5(SslMethod::tls()).unwrap();
199 acceptor.set_private_key_file(key_path, SslFiletype::PEM)
200 .map_err(|err| format_err!("unable to read proxy key {} - {}", key_path, err))?;
201 acceptor.set_certificate_chain_file(cert_path)
202 .map_err(|err| format_err!("unable to read proxy cert {} - {}", cert_path, err))?;
203 acceptor.check_private_key().unwrap();
204
205 Ok(acceptor.build())
206 }
207
208 type ClientStreamResult =
209 Result<std::pin::Pin<Box<tokio_openssl::SslStream<tokio::net::TcpStream>>>, Error>;
210 const MAX_PENDING_ACCEPTS: usize = 1024;
211
212 fn accept_connections(
213 listener: tokio::net::TcpListener,
214 acceptor: Arc<Mutex<openssl::ssl::SslAcceptor>>,
215 debug: bool,
216 ) -> tokio::sync::mpsc::Receiver<ClientStreamResult> {
217
218 let (sender, receiver) = tokio::sync::mpsc::channel(MAX_PENDING_ACCEPTS);
219
220 tokio::spawn(accept_connection(listener, acceptor, debug, sender));
221
222 receiver
223 }
224
225 async fn accept_connection(
226 listener: tokio::net::TcpListener,
227 acceptor: Arc<Mutex<openssl::ssl::SslAcceptor>>,
228 debug: bool,
229 sender: tokio::sync::mpsc::Sender<ClientStreamResult>,
230 ) {
231 let accept_counter = Arc::new(());
232
233 loop {
234 let (sock, _addr) = match listener.accept().await {
235 Ok(conn) => conn,
236 Err(err) => {
237 eprintln!("error accepting tcp connection: {}", err);
238 continue;
239 }
240 };
241
242 sock.set_nodelay(true).unwrap();
243 let _ = set_tcp_keepalive(sock.as_raw_fd(), PROXMOX_BACKUP_TCP_KEEPALIVE_TIME);
244
245 let ssl = { // limit acceptor_guard scope
246 // Acceptor can be reloaded using the command socket "reload-certificate" command
247 let acceptor_guard = acceptor.lock().unwrap();
248
249 match openssl::ssl::Ssl::new(acceptor_guard.context()) {
250 Ok(ssl) => ssl,
251 Err(err) => {
252 eprintln!("failed to create Ssl object from Acceptor context - {}", err);
253 continue;
254 },
255 }
256 };
257
258 let stream = match tokio_openssl::SslStream::new(ssl, sock) {
259 Ok(stream) => stream,
260 Err(err) => {
261 eprintln!("failed to create SslStream using ssl and connection socket - {}", err);
262 continue;
263 },
264 };
265
266 let mut stream = Box::pin(stream);
267 let sender = sender.clone();
268
269 if Arc::strong_count(&accept_counter) > MAX_PENDING_ACCEPTS {
270 eprintln!("connection rejected - to many open connections");
271 continue;
272 }
273
274 let accept_counter = Arc::clone(&accept_counter);
275 tokio::spawn(async move {
276 let accept_future = tokio::time::timeout(
277 Duration::new(10, 0), stream.as_mut().accept());
278
279 let result = accept_future.await;
280
281 match result {
282 Ok(Ok(())) => {
283 if sender.send(Ok(stream)).await.is_err() && debug {
284 eprintln!("detect closed connection channel");
285 }
286 }
287 Ok(Err(err)) => {
288 if debug {
289 eprintln!("https handshake failed - {}", err);
290 }
291 }
292 Err(_) => {
293 if debug {
294 eprintln!("https handshake timeout");
295 }
296 }
297 }
298
299 drop(accept_counter); // decrease reference count
300 });
301 }
302 }
303
304 fn start_stat_generator() {
305 let abort_future = server::shutdown_future();
306 let future = Box::pin(run_stat_generator());
307 let task = futures::future::select(future, abort_future);
308 tokio::spawn(task.map(|_| ()));
309 }
310
311 fn start_task_scheduler() {
312 let abort_future = server::shutdown_future();
313 let future = Box::pin(run_task_scheduler());
314 let task = futures::future::select(future, abort_future);
315 tokio::spawn(task.map(|_| ()));
316 }
317
318 use std::time::{SystemTime, Instant, Duration, UNIX_EPOCH};
319
320 fn next_minute() -> Result<Instant, Error> {
321 let now = SystemTime::now();
322 let epoch_now = now.duration_since(UNIX_EPOCH)?;
323 let epoch_next = Duration::from_secs((epoch_now.as_secs()/60 + 1)*60);
324 Ok(Instant::now() + epoch_next - epoch_now)
325 }
326
327 async fn run_task_scheduler() {
328
329 let mut count: usize = 0;
330
331 loop {
332 count += 1;
333
334 let delay_target = match next_minute() { // try to run very minute
335 Ok(d) => d,
336 Err(err) => {
337 eprintln!("task scheduler: compute next minute failed - {}", err);
338 tokio::time::sleep_until(tokio::time::Instant::from_std(Instant::now() + Duration::from_secs(60))).await;
339 continue;
340 }
341 };
342
343 if count > 2 { // wait 1..2 minutes before starting
344 match schedule_tasks().catch_unwind().await {
345 Err(panic) => {
346 match panic.downcast::<&str>() {
347 Ok(msg) => {
348 eprintln!("task scheduler panic: {}", msg);
349 }
350 Err(_) => {
351 eprintln!("task scheduler panic - unknown type");
352 }
353 }
354 }
355 Ok(Err(err)) => {
356 eprintln!("task scheduler failed - {:?}", err);
357 }
358 Ok(Ok(_)) => {}
359 }
360 }
361
362 tokio::time::sleep_until(tokio::time::Instant::from_std(delay_target)).await;
363 }
364 }
365
366 async fn schedule_tasks() -> Result<(), Error> {
367
368 schedule_datastore_garbage_collection().await;
369 schedule_datastore_prune().await;
370 schedule_datastore_sync_jobs().await;
371 schedule_datastore_verify_jobs().await;
372 schedule_tape_backup_jobs().await;
373 schedule_task_log_rotate().await;
374
375 Ok(())
376 }
377
378 async fn schedule_datastore_garbage_collection() {
379
380 use proxmox_backup::config::{
381 datastore::{
382 self,
383 DataStoreConfig,
384 },
385 };
386
387 let config = match datastore::config() {
388 Err(err) => {
389 eprintln!("unable to read datastore config - {}", err);
390 return;
391 }
392 Ok((config, _digest)) => config,
393 };
394
395 for (store, (_, store_config)) in config.sections {
396 let datastore = match DataStore::lookup_datastore(&store) {
397 Ok(datastore) => datastore,
398 Err(err) => {
399 eprintln!("lookup_datastore failed - {}", err);
400 continue;
401 }
402 };
403
404 let store_config: DataStoreConfig = match serde_json::from_value(store_config) {
405 Ok(c) => c,
406 Err(err) => {
407 eprintln!("datastore config from_value failed - {}", err);
408 continue;
409 }
410 };
411
412 let event_str = match store_config.gc_schedule {
413 Some(event_str) => event_str,
414 None => continue,
415 };
416
417 let event = match parse_calendar_event(&event_str) {
418 Ok(event) => event,
419 Err(err) => {
420 eprintln!("unable to parse schedule '{}' - {}", event_str, err);
421 continue;
422 }
423 };
424
425 if datastore.garbage_collection_running() { continue; }
426
427 let worker_type = "garbage_collection";
428
429 let last = match jobstate::last_run_time(worker_type, &store) {
430 Ok(time) => time,
431 Err(err) => {
432 eprintln!("could not get last run time of {} {}: {}", worker_type, store, err);
433 continue;
434 }
435 };
436
437 let next = match compute_next_event(&event, last, false) {
438 Ok(Some(next)) => next,
439 Ok(None) => continue,
440 Err(err) => {
441 eprintln!("compute_next_event for '{}' failed - {}", event_str, err);
442 continue;
443 }
444 };
445
446 let now = proxmox::tools::time::epoch_i64();
447
448 if next > now { continue; }
449
450 let job = match Job::new(worker_type, &store) {
451 Ok(job) => job,
452 Err(_) => continue, // could not get lock
453 };
454
455 let auth_id = Authid::root_auth_id();
456
457 if let Err(err) = crate::server::do_garbage_collection_job(job, datastore, auth_id, Some(event_str), false) {
458 eprintln!("unable to start garbage collection job on datastore {} - {}", store, err);
459 }
460 }
461 }
462
463 async fn schedule_datastore_prune() {
464
465 use proxmox_backup::{
466 backup::{
467 PruneOptions,
468 },
469 config::datastore::{
470 self,
471 DataStoreConfig,
472 },
473 };
474
475 let config = match datastore::config() {
476 Err(err) => {
477 eprintln!("unable to read datastore config - {}", err);
478 return;
479 }
480 Ok((config, _digest)) => config,
481 };
482
483 for (store, (_, store_config)) in config.sections {
484
485 let store_config: DataStoreConfig = match serde_json::from_value(store_config) {
486 Ok(c) => c,
487 Err(err) => {
488 eprintln!("datastore '{}' config from_value failed - {}", store, err);
489 continue;
490 }
491 };
492
493 let event_str = match store_config.prune_schedule {
494 Some(event_str) => event_str,
495 None => continue,
496 };
497
498 let prune_options = PruneOptions {
499 keep_last: store_config.keep_last,
500 keep_hourly: store_config.keep_hourly,
501 keep_daily: store_config.keep_daily,
502 keep_weekly: store_config.keep_weekly,
503 keep_monthly: store_config.keep_monthly,
504 keep_yearly: store_config.keep_yearly,
505 };
506
507 if !prune_options.keeps_something() { // no prune settings - keep all
508 continue;
509 }
510
511 let worker_type = "prune";
512 if check_schedule(worker_type, &event_str, &store) {
513 let job = match Job::new(worker_type, &store) {
514 Ok(job) => job,
515 Err(_) => continue, // could not get lock
516 };
517
518 let auth_id = Authid::root_auth_id().clone();
519 if let Err(err) = do_prune_job(job, prune_options, store.clone(), &auth_id, Some(event_str)) {
520 eprintln!("unable to start datastore prune job {} - {}", &store, err);
521 }
522 };
523 }
524 }
525
526 async fn schedule_datastore_sync_jobs() {
527
528 use proxmox_backup::config::sync::{
529 self,
530 SyncJobConfig,
531 };
532
533 let config = match sync::config() {
534 Err(err) => {
535 eprintln!("unable to read sync job config - {}", err);
536 return;
537 }
538 Ok((config, _digest)) => config,
539 };
540
541 for (job_id, (_, job_config)) in config.sections {
542 let job_config: SyncJobConfig = match serde_json::from_value(job_config) {
543 Ok(c) => c,
544 Err(err) => {
545 eprintln!("sync job config from_value failed - {}", err);
546 continue;
547 }
548 };
549
550 let event_str = match job_config.schedule {
551 Some(ref event_str) => event_str.clone(),
552 None => continue,
553 };
554
555 let worker_type = "syncjob";
556 if check_schedule(worker_type, &event_str, &job_id) {
557 let job = match Job::new(worker_type, &job_id) {
558 Ok(job) => job,
559 Err(_) => continue, // could not get lock
560 };
561
562 let auth_id = Authid::root_auth_id().clone();
563 if let Err(err) = do_sync_job(job, job_config, &auth_id, Some(event_str)) {
564 eprintln!("unable to start datastore sync job {} - {}", &job_id, err);
565 }
566 };
567 }
568 }
569
570 async fn schedule_datastore_verify_jobs() {
571
572 use proxmox_backup::config::verify::{
573 self,
574 VerificationJobConfig,
575 };
576
577 let config = match verify::config() {
578 Err(err) => {
579 eprintln!("unable to read verification job config - {}", err);
580 return;
581 }
582 Ok((config, _digest)) => config,
583 };
584 for (job_id, (_, job_config)) in config.sections {
585 let job_config: VerificationJobConfig = match serde_json::from_value(job_config) {
586 Ok(c) => c,
587 Err(err) => {
588 eprintln!("verification job config from_value failed - {}", err);
589 continue;
590 }
591 };
592 let event_str = match job_config.schedule {
593 Some(ref event_str) => event_str.clone(),
594 None => continue,
595 };
596
597 let worker_type = "verificationjob";
598 let auth_id = Authid::root_auth_id().clone();
599 if check_schedule(worker_type, &event_str, &job_id) {
600 let job = match Job::new(&worker_type, &job_id) {
601 Ok(job) => job,
602 Err(_) => continue, // could not get lock
603 };
604 if let Err(err) = do_verification_job(job, job_config, &auth_id, Some(event_str)) {
605 eprintln!("unable to start datastore verification job {} - {}", &job_id, err);
606 }
607 };
608 }
609 }
610
611 async fn schedule_tape_backup_jobs() {
612
613 use proxmox_backup::config::tape_job::{
614 self,
615 TapeBackupJobConfig,
616 };
617
618 let config = match tape_job::config() {
619 Err(err) => {
620 eprintln!("unable to read tape job config - {}", err);
621 return;
622 }
623 Ok((config, _digest)) => config,
624 };
625 for (job_id, (_, job_config)) in config.sections {
626 let job_config: TapeBackupJobConfig = match serde_json::from_value(job_config) {
627 Ok(c) => c,
628 Err(err) => {
629 eprintln!("tape backup job config from_value failed - {}", err);
630 continue;
631 }
632 };
633 let event_str = match job_config.schedule {
634 Some(ref event_str) => event_str.clone(),
635 None => continue,
636 };
637
638 let worker_type = "tape-backup-job";
639 let auth_id = Authid::root_auth_id().clone();
640 if check_schedule(worker_type, &event_str, &job_id) {
641 let job = match Job::new(&worker_type, &job_id) {
642 Ok(job) => job,
643 Err(_) => continue, // could not get lock
644 };
645 if let Err(err) = do_tape_backup_job(job, job_config.setup, &auth_id, Some(event_str)) {
646 eprintln!("unable to start tape backup job {} - {}", &job_id, err);
647 }
648 };
649 }
650 }
651
652
653 async fn schedule_task_log_rotate() {
654
655 let worker_type = "logrotate";
656 let job_id = "access-log_and_task-archive";
657
658 // schedule daily at 00:00 like normal logrotate
659 let schedule = "00:00";
660
661 if !check_schedule(worker_type, schedule, job_id) {
662 // if we never ran the rotation, schedule instantly
663 match jobstate::JobState::load(worker_type, job_id) {
664 Ok(state) => match state {
665 jobstate::JobState::Created { .. } => {},
666 _ => return,
667 },
668 _ => return,
669 }
670 }
671
672 let mut job = match Job::new(worker_type, job_id) {
673 Ok(job) => job,
674 Err(_) => return, // could not get lock
675 };
676
677 if let Err(err) = WorkerTask::new_thread(
678 worker_type,
679 None,
680 Authid::root_auth_id().clone(),
681 false,
682 move |worker| {
683 job.start(&worker.upid().to_string())?;
684 worker.log("starting task log rotation".to_string());
685
686 let result = try_block!({
687 let max_size = 512 * 1024 - 1; // an entry has ~ 100b, so > 5000 entries/file
688 let max_files = 20; // times twenty files gives > 100000 task entries
689 let has_rotated = rotate_task_log_archive(max_size, true, Some(max_files))?;
690 if has_rotated {
691 worker.log("task log archive was rotated".to_string());
692 } else {
693 worker.log("task log archive was not rotated".to_string());
694 }
695
696 let max_size = 32 * 1024 * 1024 - 1;
697 let max_files = 14;
698 let mut logrotate = LogRotate::new(pbs_buildcfg::API_ACCESS_LOG_FN, true)
699 .ok_or_else(|| format_err!("could not get API access log file names"))?;
700
701 if logrotate.rotate(max_size, None, Some(max_files))? {
702 println!("rotated access log, telling daemons to re-open log file");
703 pbs_runtime::block_on(command_reopen_logfiles())?;
704 worker.log("API access log was rotated".to_string());
705 } else {
706 worker.log("API access log was not rotated".to_string());
707 }
708
709 let mut logrotate = LogRotate::new(pbs_buildcfg::API_AUTH_LOG_FN, true)
710 .ok_or_else(|| format_err!("could not get API auth log file names"))?;
711
712 if logrotate.rotate(max_size, None, Some(max_files))? {
713 worker.log("API authentication log was rotated".to_string());
714 } else {
715 worker.log("API authentication log was not rotated".to_string());
716 }
717
718 Ok(())
719 });
720
721 let status = worker.create_state(&result);
722
723 if let Err(err) = job.finish(status) {
724 eprintln!("could not finish job state for {}: {}", worker_type, err);
725 }
726
727 result
728 },
729 ) {
730 eprintln!("unable to start task log rotation: {}", err);
731 }
732
733 }
734
735 async fn command_reopen_logfiles() -> Result<(), Error> {
736 // only care about the most recent daemon instance for each, proxy & api, as other older ones
737 // should not respond to new requests anyway, but only finish their current one and then exit.
738 let sock = server::our_ctrl_sock();
739 let f1 = server::send_command(sock, "{\"command\":\"api-access-log-reopen\"}\n");
740
741 let pid = server::read_pid(pbs_buildcfg::PROXMOX_BACKUP_API_PID_FN)?;
742 let sock = server::ctrl_sock_from_pid(pid);
743 let f2 = server::send_command(sock, "{\"command\":\"api-access-log-reopen\"}\n");
744
745 match futures::join!(f1, f2) {
746 (Err(e1), Err(e2)) => Err(format_err!("reopen commands failed, proxy: {}; api: {}", e1, e2)),
747 (Err(e1), Ok(_)) => Err(format_err!("reopen commands failed, proxy: {}", e1)),
748 (Ok(_), Err(e2)) => Err(format_err!("reopen commands failed, api: {}", e2)),
749 _ => Ok(()),
750 }
751 }
752
753 async fn run_stat_generator() {
754
755 let mut count = 0;
756 loop {
757 count += 1;
758 let save = if count >= 6 { count = 0; true } else { false };
759
760 let delay_target = Instant::now() + Duration::from_secs(10);
761
762 generate_host_stats(save).await;
763
764 tokio::time::sleep_until(tokio::time::Instant::from_std(delay_target)).await;
765
766 }
767
768 }
769
770 fn rrd_update_gauge(name: &str, value: f64, save: bool) {
771 use proxmox_backup::rrd;
772 if let Err(err) = rrd::update_value(name, value, rrd::DST::Gauge, save) {
773 eprintln!("rrd::update_value '{}' failed - {}", name, err);
774 }
775 }
776
777 fn rrd_update_derive(name: &str, value: f64, save: bool) {
778 use proxmox_backup::rrd;
779 if let Err(err) = rrd::update_value(name, value, rrd::DST::Derive, save) {
780 eprintln!("rrd::update_value '{}' failed - {}", name, err);
781 }
782 }
783
784 async fn generate_host_stats(save: bool) {
785 use proxmox::sys::linux::procfs::{
786 read_meminfo, read_proc_stat, read_proc_net_dev, read_loadavg};
787 use proxmox_backup::config::datastore;
788
789
790 pbs_runtime::block_in_place(move || {
791
792 match read_proc_stat() {
793 Ok(stat) => {
794 rrd_update_gauge("host/cpu", stat.cpu, save);
795 rrd_update_gauge("host/iowait", stat.iowait_percent, save);
796 }
797 Err(err) => {
798 eprintln!("read_proc_stat failed - {}", err);
799 }
800 }
801
802 match read_meminfo() {
803 Ok(meminfo) => {
804 rrd_update_gauge("host/memtotal", meminfo.memtotal as f64, save);
805 rrd_update_gauge("host/memused", meminfo.memused as f64, save);
806 rrd_update_gauge("host/swaptotal", meminfo.swaptotal as f64, save);
807 rrd_update_gauge("host/swapused", meminfo.swapused as f64, save);
808 }
809 Err(err) => {
810 eprintln!("read_meminfo failed - {}", err);
811 }
812 }
813
814 match read_proc_net_dev() {
815 Ok(netdev) => {
816 use proxmox_backup::config::network::is_physical_nic;
817 let mut netin = 0;
818 let mut netout = 0;
819 for item in netdev {
820 if !is_physical_nic(&item.device) { continue; }
821 netin += item.receive;
822 netout += item.send;
823 }
824 rrd_update_derive("host/netin", netin as f64, save);
825 rrd_update_derive("host/netout", netout as f64, save);
826 }
827 Err(err) => {
828 eprintln!("read_prox_net_dev failed - {}", err);
829 }
830 }
831
832 match read_loadavg() {
833 Ok(loadavg) => {
834 rrd_update_gauge("host/loadavg", loadavg.0 as f64, save);
835 }
836 Err(err) => {
837 eprintln!("read_loadavg failed - {}", err);
838 }
839 }
840
841 let disk_manager = DiskManage::new();
842
843 gather_disk_stats(disk_manager.clone(), Path::new("/"), "host", save);
844
845 match datastore::config() {
846 Ok((config, _)) => {
847 let datastore_list: Vec<datastore::DataStoreConfig> =
848 config.convert_to_typed_array("datastore").unwrap_or_default();
849
850 for config in datastore_list {
851
852 let rrd_prefix = format!("datastore/{}", config.name);
853 let path = std::path::Path::new(&config.path);
854 gather_disk_stats(disk_manager.clone(), path, &rrd_prefix, save);
855 }
856 }
857 Err(err) => {
858 eprintln!("read datastore config failed - {}", err);
859 }
860 }
861
862 });
863 }
864
865 fn check_schedule(worker_type: &str, event_str: &str, id: &str) -> bool {
866 let event = match parse_calendar_event(event_str) {
867 Ok(event) => event,
868 Err(err) => {
869 eprintln!("unable to parse schedule '{}' - {}", event_str, err);
870 return false;
871 }
872 };
873
874 let last = match jobstate::last_run_time(worker_type, &id) {
875 Ok(time) => time,
876 Err(err) => {
877 eprintln!("could not get last run time of {} {}: {}", worker_type, id, err);
878 return false;
879 }
880 };
881
882 let next = match compute_next_event(&event, last, false) {
883 Ok(Some(next)) => next,
884 Ok(None) => return false,
885 Err(err) => {
886 eprintln!("compute_next_event for '{}' failed - {}", event_str, err);
887 return false;
888 }
889 };
890
891 let now = proxmox::tools::time::epoch_i64();
892 next <= now
893 }
894
895 fn gather_disk_stats(disk_manager: Arc<DiskManage>, path: &Path, rrd_prefix: &str, save: bool) {
896
897 match proxmox_backup::tools::disks::disk_usage(path) {
898 Ok(status) => {
899 let rrd_key = format!("{}/total", rrd_prefix);
900 rrd_update_gauge(&rrd_key, status.total as f64, save);
901 let rrd_key = format!("{}/used", rrd_prefix);
902 rrd_update_gauge(&rrd_key, status.used as f64, save);
903 }
904 Err(err) => {
905 eprintln!("read disk_usage on {:?} failed - {}", path, err);
906 }
907 }
908
909 match disk_manager.find_mounted_device(path) {
910 Ok(None) => {},
911 Ok(Some((fs_type, device, source))) => {
912 let mut device_stat = None;
913 match fs_type.as_str() {
914 "zfs" => {
915 if let Some(source) = source {
916 let pool = get_pool_from_dataset(&source).unwrap_or(&source);
917 match zfs_pool_stats(pool) {
918 Ok(stat) => device_stat = stat,
919 Err(err) => eprintln!("zfs_pool_stats({:?}) failed - {}", pool, err),
920 }
921 }
922 }
923 _ => {
924 if let Ok(disk) = disk_manager.clone().disk_by_dev_num(device.into_dev_t()) {
925 match disk.read_stat() {
926 Ok(stat) => device_stat = stat,
927 Err(err) => eprintln!("disk.read_stat {:?} failed - {}", path, err),
928 }
929 }
930 }
931 }
932 if let Some(stat) = device_stat {
933 let rrd_key = format!("{}/read_ios", rrd_prefix);
934 rrd_update_derive(&rrd_key, stat.read_ios as f64, save);
935 let rrd_key = format!("{}/read_bytes", rrd_prefix);
936 rrd_update_derive(&rrd_key, (stat.read_sectors*512) as f64, save);
937
938 let rrd_key = format!("{}/write_ios", rrd_prefix);
939 rrd_update_derive(&rrd_key, stat.write_ios as f64, save);
940 let rrd_key = format!("{}/write_bytes", rrd_prefix);
941 rrd_update_derive(&rrd_key, (stat.write_sectors*512) as f64, save);
942
943 let rrd_key = format!("{}/io_ticks", rrd_prefix);
944 rrd_update_derive(&rrd_key, (stat.io_ticks as f64)/1000.0, save);
945 }
946 }
947 Err(err) => {
948 eprintln!("find_mounted_device failed - {}", err);
949 }
950 }
951 }