]> git.proxmox.com Git - proxmox-backup.git/blob - src/bin/proxmox-backup-proxy.rs
cleanup: move rrd cache related code into extra file
[proxmox-backup.git] / src / bin / proxmox-backup-proxy.rs
1 use std::sync::{Mutex, Arc};
2 use std::path::{Path, PathBuf};
3 use std::os::unix::io::AsRawFd;
4 use std::future::Future;
5 use std::pin::Pin;
6
7 use anyhow::{bail, format_err, Error};
8 use futures::*;
9 use http::request::Parts;
10 use http::Response;
11 use hyper::{Body, StatusCode};
12 use hyper::header;
13 use url::form_urlencoded;
14
15 use openssl::ssl::{SslMethod, SslAcceptor, SslFiletype};
16 use tokio_stream::wrappers::ReceiverStream;
17 use serde_json::{json, Value};
18 use http::{Method, HeaderMap};
19
20 use proxmox::sys::linux::socket::set_tcp_keepalive;
21 use proxmox::tools::fs::CreateOptions;
22 use proxmox_lang::try_block;
23 use proxmox_router::{RpcEnvironment, RpcEnvironmentType, UserInformation};
24
25 use pbs_tools::{task_log, task_warn};
26 use pbs_datastore::DataStore;
27
28 use proxmox_rest_server::{
29 rotate_task_log_archive, extract_cookie , AuthError, ApiConfig, RestServer, RestEnvironment,
30 ServerAdapter, WorkerTask, cleanup_old_tasks,
31 };
32
33 use proxmox_backup::rrd_cache::{ rrd_update_gauge, rrd_update_derive, initialize_rrd_cache};
34 use proxmox_backup::{
35 server::{
36 auth::check_pbs_auth,
37 jobstate::{
38 self,
39 Job,
40 },
41 },
42 };
43
44 use pbs_buildcfg::configdir;
45 use proxmox_systemd::time::{compute_next_event, parse_calendar_event};
46 use pbs_tools::logrotate::LogRotate;
47
48 use pbs_api_types::{
49 Authid, TapeBackupJobConfig, VerificationJobConfig, SyncJobConfig, DataStoreConfig,
50 PruneOptions,
51 };
52
53 use proxmox_rest_server::daemon;
54
55 use proxmox_backup::server;
56 use proxmox_backup::auth_helpers::*;
57 use proxmox_backup::tools::{
58 PROXMOX_BACKUP_TCP_KEEPALIVE_TIME,
59 disks::{
60 DiskManage,
61 zfs_pool_stats,
62 get_pool_from_dataset,
63 },
64 };
65
66
67 use proxmox_backup::api2::pull::do_sync_job;
68 use proxmox_backup::api2::tape::backup::do_tape_backup_job;
69 use proxmox_backup::server::do_verification_job;
70 use proxmox_backup::server::do_prune_job;
71
72 fn main() -> Result<(), Error> {
73 proxmox_backup::tools::setup_safe_path_env();
74
75 let backup_uid = pbs_config::backup_user()?.uid;
76 let backup_gid = pbs_config::backup_group()?.gid;
77 let running_uid = nix::unistd::Uid::effective();
78 let running_gid = nix::unistd::Gid::effective();
79
80 if running_uid != backup_uid || running_gid != backup_gid {
81 bail!("proxy not running as backup user or group (got uid {} gid {})", running_uid, running_gid);
82 }
83
84 pbs_runtime::main(run())
85 }
86
87
88 struct ProxmoxBackupProxyAdapter;
89
90 impl ServerAdapter for ProxmoxBackupProxyAdapter {
91
92 fn get_index(
93 &self,
94 env: RestEnvironment,
95 parts: Parts,
96 ) -> Pin<Box<dyn Future<Output = Response<Body>> + Send>> {
97 Box::pin(get_index_future(env, parts))
98 }
99
100 fn check_auth<'a>(
101 &'a self,
102 headers: &'a HeaderMap,
103 method: &'a Method,
104 ) -> Pin<Box<dyn Future<Output = Result<(String, Box<dyn UserInformation + Sync + Send>), AuthError>> + Send + 'a>> {
105 Box::pin(async move {
106 check_pbs_auth(headers, method).await
107 })
108 }
109 }
110
111 fn extract_lang_header(headers: &http::HeaderMap) -> Option<String> {
112 if let Some(Ok(cookie)) = headers.get("COOKIE").map(|v| v.to_str()) {
113 return extract_cookie(cookie, "PBSLangCookie");
114 }
115 None
116 }
117
118 async fn get_index_future(
119 env: RestEnvironment,
120 parts: Parts,
121 ) -> Response<Body> {
122
123 let auth_id = env.get_auth_id();
124 let api = env.api_config();
125 let language = extract_lang_header(&parts.headers);
126
127 // fixme: make all IO async
128
129 let (userid, csrf_token) = match auth_id {
130 Some(auth_id) => {
131 let auth_id = auth_id.parse::<Authid>();
132 match auth_id {
133 Ok(auth_id) if !auth_id.is_token() => {
134 let userid = auth_id.user().clone();
135 let new_csrf_token = assemble_csrf_prevention_token(csrf_secret(), &userid);
136 (Some(userid), Some(new_csrf_token))
137 }
138 _ => (None, None)
139 }
140 }
141 None => (None, None),
142 };
143
144 let nodename = proxmox::tools::nodename();
145 let user = userid.as_ref().map(|u| u.as_str()).unwrap_or("");
146
147 let csrf_token = csrf_token.unwrap_or_else(|| String::from(""));
148
149 let mut debug = false;
150 let mut template_file = "index";
151
152 if let Some(query_str) = parts.uri.query() {
153 for (k, v) in form_urlencoded::parse(query_str.as_bytes()).into_owned() {
154 if k == "debug" && v != "0" && v != "false" {
155 debug = true;
156 } else if k == "console" {
157 template_file = "console";
158 }
159 }
160 }
161
162 let mut lang = String::from("");
163 if let Some(language) = language {
164 if Path::new(&format!("/usr/share/pbs-i18n/pbs-lang-{}.js", language)).exists() {
165 lang = language;
166 }
167 }
168
169 let data = json!({
170 "NodeName": nodename,
171 "UserName": user,
172 "CSRFPreventionToken": csrf_token,
173 "language": lang,
174 "debug": debug,
175 });
176
177 let (ct, index) = match api.render_template(template_file, &data) {
178 Ok(index) => ("text/html", index),
179 Err(err) => ("text/plain", format!("Error rendering template: {}", err)),
180 };
181
182 let mut resp = Response::builder()
183 .status(StatusCode::OK)
184 .header(header::CONTENT_TYPE, ct)
185 .body(index.into())
186 .unwrap();
187
188 if let Some(userid) = userid {
189 resp.extensions_mut().insert(Authid::from((userid, None)));
190 }
191
192 resp
193 }
194
195 async fn run() -> Result<(), Error> {
196 if let Err(err) = syslog::init(
197 syslog::Facility::LOG_DAEMON,
198 log::LevelFilter::Info,
199 Some("proxmox-backup-proxy")) {
200 bail!("unable to inititialize syslog - {}", err);
201 }
202
203 // Note: To debug early connection error use
204 // PROXMOX_DEBUG=1 ./target/release/proxmox-backup-proxy
205 let debug = std::env::var("PROXMOX_DEBUG").is_ok();
206
207 let _ = public_auth_key(); // load with lazy_static
208 let _ = csrf_secret(); // load with lazy_static
209
210 let rrd_cache = initialize_rrd_cache()?;
211 rrd_cache.apply_journal()?;
212
213 let mut config = ApiConfig::new(
214 pbs_buildcfg::JS_DIR,
215 &proxmox_backup::api2::ROUTER,
216 RpcEnvironmentType::PUBLIC,
217 ProxmoxBackupProxyAdapter,
218 )?;
219
220 config.add_alias("novnc", "/usr/share/novnc-pve");
221 config.add_alias("extjs", "/usr/share/javascript/extjs");
222 config.add_alias("qrcodejs", "/usr/share/javascript/qrcodejs");
223 config.add_alias("fontawesome", "/usr/share/fonts-font-awesome");
224 config.add_alias("xtermjs", "/usr/share/pve-xtermjs");
225 config.add_alias("locale", "/usr/share/pbs-i18n");
226 config.add_alias("widgettoolkit", "/usr/share/javascript/proxmox-widget-toolkit");
227 config.add_alias("docs", "/usr/share/doc/proxmox-backup/html");
228
229 let mut indexpath = PathBuf::from(pbs_buildcfg::JS_DIR);
230 indexpath.push("index.hbs");
231 config.register_template("index", &indexpath)?;
232 config.register_template("console", "/usr/share/pve-xtermjs/index.html.hbs")?;
233
234 let backup_user = pbs_config::backup_user()?;
235 let mut commando_sock = proxmox_rest_server::CommandSocket::new(proxmox_rest_server::our_ctrl_sock(), backup_user.gid);
236
237 let dir_opts = CreateOptions::new().owner(backup_user.uid).group(backup_user.gid);
238 let file_opts = CreateOptions::new().owner(backup_user.uid).group(backup_user.gid);
239
240 config.enable_access_log(
241 pbs_buildcfg::API_ACCESS_LOG_FN,
242 Some(dir_opts.clone()),
243 Some(file_opts.clone()),
244 &mut commando_sock,
245 )?;
246
247 config.enable_auth_log(
248 pbs_buildcfg::API_AUTH_LOG_FN,
249 Some(dir_opts.clone()),
250 Some(file_opts.clone()),
251 &mut commando_sock,
252 )?;
253
254 let rest_server = RestServer::new(config);
255 proxmox_rest_server::init_worker_tasks(pbs_buildcfg::PROXMOX_BACKUP_LOG_DIR_M!().into(), file_opts.clone())?;
256
257 //openssl req -x509 -newkey rsa:4096 -keyout /etc/proxmox-backup/proxy.key -out /etc/proxmox-backup/proxy.pem -nodes
258
259 // we build the initial acceptor here as we cannot start if this fails
260 let acceptor = make_tls_acceptor()?;
261 let acceptor = Arc::new(Mutex::new(acceptor));
262
263 // to renew the acceptor we just add a command-socket handler
264 commando_sock.register_command(
265 "reload-certificate".to_string(),
266 {
267 let acceptor = Arc::clone(&acceptor);
268 move |_value| -> Result<_, Error> {
269 log::info!("reloading certificate");
270 match make_tls_acceptor() {
271 Err(err) => log::error!("error reloading certificate: {}", err),
272 Ok(new_acceptor) => {
273 let mut guard = acceptor.lock().unwrap();
274 *guard = new_acceptor;
275 }
276 }
277 Ok(Value::Null)
278 }
279 },
280 )?;
281
282 // to remove references for not configured datastores
283 commando_sock.register_command(
284 "datastore-removed".to_string(),
285 |_value| {
286 if let Err(err) = DataStore::remove_unused_datastores() {
287 log::error!("could not refresh datastores: {}", err);
288 }
289 Ok(Value::Null)
290 }
291 )?;
292
293 let server = daemon::create_daemon(
294 ([0,0,0,0,0,0,0,0], 8007).into(),
295 move |listener| {
296
297 let connections = accept_connections(listener, acceptor, debug);
298 let connections = hyper::server::accept::from_stream(ReceiverStream::new(connections));
299
300 Ok(async {
301 daemon::systemd_notify(daemon::SystemdNotify::Ready)?;
302
303 hyper::Server::builder(connections)
304 .serve(rest_server)
305 .with_graceful_shutdown(proxmox_rest_server::shutdown_future())
306 .map_err(Error::from)
307 .await
308 })
309 },
310 );
311
312 proxmox_rest_server::write_pid(pbs_buildcfg::PROXMOX_BACKUP_PROXY_PID_FN)?;
313
314 let init_result: Result<(), Error> = try_block!({
315 proxmox_rest_server::register_task_control_commands(&mut commando_sock)?;
316 commando_sock.spawn()?;
317 proxmox_rest_server::catch_shutdown_signal()?;
318 proxmox_rest_server::catch_reload_signal()?;
319 Ok(())
320 });
321
322 if let Err(err) = init_result {
323 bail!("unable to start daemon - {}", err);
324 }
325
326 start_task_scheduler();
327 start_stat_generator();
328
329 server.await?;
330 log::info!("server shutting down, waiting for active workers to complete");
331 proxmox_rest_server::last_worker_future().await?;
332 log::info!("done - exit server");
333
334 Ok(())
335 }
336
337 fn make_tls_acceptor() -> Result<SslAcceptor, Error> {
338 let key_path = configdir!("/proxy.key");
339 let cert_path = configdir!("/proxy.pem");
340
341 let mut acceptor = SslAcceptor::mozilla_intermediate_v5(SslMethod::tls()).unwrap();
342 acceptor.set_private_key_file(key_path, SslFiletype::PEM)
343 .map_err(|err| format_err!("unable to read proxy key {} - {}", key_path, err))?;
344 acceptor.set_certificate_chain_file(cert_path)
345 .map_err(|err| format_err!("unable to read proxy cert {} - {}", cert_path, err))?;
346 acceptor.check_private_key().unwrap();
347
348 Ok(acceptor.build())
349 }
350
351 type ClientStreamResult =
352 Result<std::pin::Pin<Box<tokio_openssl::SslStream<tokio::net::TcpStream>>>, Error>;
353 const MAX_PENDING_ACCEPTS: usize = 1024;
354
355 fn accept_connections(
356 listener: tokio::net::TcpListener,
357 acceptor: Arc<Mutex<openssl::ssl::SslAcceptor>>,
358 debug: bool,
359 ) -> tokio::sync::mpsc::Receiver<ClientStreamResult> {
360
361 let (sender, receiver) = tokio::sync::mpsc::channel(MAX_PENDING_ACCEPTS);
362
363 tokio::spawn(accept_connection(listener, acceptor, debug, sender));
364
365 receiver
366 }
367
368 async fn accept_connection(
369 listener: tokio::net::TcpListener,
370 acceptor: Arc<Mutex<openssl::ssl::SslAcceptor>>,
371 debug: bool,
372 sender: tokio::sync::mpsc::Sender<ClientStreamResult>,
373 ) {
374 let accept_counter = Arc::new(());
375
376 loop {
377 let (sock, _addr) = match listener.accept().await {
378 Ok(conn) => conn,
379 Err(err) => {
380 eprintln!("error accepting tcp connection: {}", err);
381 continue;
382 }
383 };
384
385 sock.set_nodelay(true).unwrap();
386 let _ = set_tcp_keepalive(sock.as_raw_fd(), PROXMOX_BACKUP_TCP_KEEPALIVE_TIME);
387
388 let ssl = { // limit acceptor_guard scope
389 // Acceptor can be reloaded using the command socket "reload-certificate" command
390 let acceptor_guard = acceptor.lock().unwrap();
391
392 match openssl::ssl::Ssl::new(acceptor_guard.context()) {
393 Ok(ssl) => ssl,
394 Err(err) => {
395 eprintln!("failed to create Ssl object from Acceptor context - {}", err);
396 continue;
397 },
398 }
399 };
400
401 let stream = match tokio_openssl::SslStream::new(ssl, sock) {
402 Ok(stream) => stream,
403 Err(err) => {
404 eprintln!("failed to create SslStream using ssl and connection socket - {}", err);
405 continue;
406 },
407 };
408
409 let mut stream = Box::pin(stream);
410 let sender = sender.clone();
411
412 if Arc::strong_count(&accept_counter) > MAX_PENDING_ACCEPTS {
413 eprintln!("connection rejected - to many open connections");
414 continue;
415 }
416
417 let accept_counter = Arc::clone(&accept_counter);
418 tokio::spawn(async move {
419 let accept_future = tokio::time::timeout(
420 Duration::new(10, 0), stream.as_mut().accept());
421
422 let result = accept_future.await;
423
424 match result {
425 Ok(Ok(())) => {
426 if sender.send(Ok(stream)).await.is_err() && debug {
427 eprintln!("detect closed connection channel");
428 }
429 }
430 Ok(Err(err)) => {
431 if debug {
432 eprintln!("https handshake failed - {}", err);
433 }
434 }
435 Err(_) => {
436 if debug {
437 eprintln!("https handshake timeout");
438 }
439 }
440 }
441
442 drop(accept_counter); // decrease reference count
443 });
444 }
445 }
446
447 fn start_stat_generator() {
448 let abort_future = proxmox_rest_server::shutdown_future();
449 let future = Box::pin(run_stat_generator());
450 let task = futures::future::select(future, abort_future);
451 tokio::spawn(task.map(|_| ()));
452 }
453
454 fn start_task_scheduler() {
455 let abort_future = proxmox_rest_server::shutdown_future();
456 let future = Box::pin(run_task_scheduler());
457 let task = futures::future::select(future, abort_future);
458 tokio::spawn(task.map(|_| ()));
459 }
460
461 use std::time::{SystemTime, Instant, Duration, UNIX_EPOCH};
462
463 fn next_minute() -> Result<Instant, Error> {
464 let now = SystemTime::now();
465 let epoch_now = now.duration_since(UNIX_EPOCH)?;
466 let epoch_next = Duration::from_secs((epoch_now.as_secs()/60 + 1)*60);
467 Ok(Instant::now() + epoch_next - epoch_now)
468 }
469
470 async fn run_task_scheduler() {
471
472 let mut count: usize = 0;
473
474 loop {
475 count += 1;
476
477 let delay_target = match next_minute() { // try to run very minute
478 Ok(d) => d,
479 Err(err) => {
480 eprintln!("task scheduler: compute next minute failed - {}", err);
481 tokio::time::sleep_until(tokio::time::Instant::from_std(Instant::now() + Duration::from_secs(60))).await;
482 continue;
483 }
484 };
485
486 if count > 2 { // wait 1..2 minutes before starting
487 match schedule_tasks().catch_unwind().await {
488 Err(panic) => {
489 match panic.downcast::<&str>() {
490 Ok(msg) => {
491 eprintln!("task scheduler panic: {}", msg);
492 }
493 Err(_) => {
494 eprintln!("task scheduler panic - unknown type");
495 }
496 }
497 }
498 Ok(Err(err)) => {
499 eprintln!("task scheduler failed - {:?}", err);
500 }
501 Ok(Ok(_)) => {}
502 }
503 }
504
505 tokio::time::sleep_until(tokio::time::Instant::from_std(delay_target)).await;
506 }
507 }
508
509 async fn schedule_tasks() -> Result<(), Error> {
510
511 schedule_datastore_garbage_collection().await;
512 schedule_datastore_prune().await;
513 schedule_datastore_sync_jobs().await;
514 schedule_datastore_verify_jobs().await;
515 schedule_tape_backup_jobs().await;
516 schedule_task_log_rotate().await;
517
518 Ok(())
519 }
520
521 async fn schedule_datastore_garbage_collection() {
522
523 let config = match pbs_config::datastore::config() {
524 Err(err) => {
525 eprintln!("unable to read datastore config - {}", err);
526 return;
527 }
528 Ok((config, _digest)) => config,
529 };
530
531 for (store, (_, store_config)) in config.sections {
532 let datastore = match DataStore::lookup_datastore(&store) {
533 Ok(datastore) => datastore,
534 Err(err) => {
535 eprintln!("lookup_datastore failed - {}", err);
536 continue;
537 }
538 };
539
540 let store_config: DataStoreConfig = match serde_json::from_value(store_config) {
541 Ok(c) => c,
542 Err(err) => {
543 eprintln!("datastore config from_value failed - {}", err);
544 continue;
545 }
546 };
547
548 let event_str = match store_config.gc_schedule {
549 Some(event_str) => event_str,
550 None => continue,
551 };
552
553 let event = match parse_calendar_event(&event_str) {
554 Ok(event) => event,
555 Err(err) => {
556 eprintln!("unable to parse schedule '{}' - {}", event_str, err);
557 continue;
558 }
559 };
560
561 if datastore.garbage_collection_running() { continue; }
562
563 let worker_type = "garbage_collection";
564
565 let last = match jobstate::last_run_time(worker_type, &store) {
566 Ok(time) => time,
567 Err(err) => {
568 eprintln!("could not get last run time of {} {}: {}", worker_type, store, err);
569 continue;
570 }
571 };
572
573 let next = match compute_next_event(&event, last, false) {
574 Ok(Some(next)) => next,
575 Ok(None) => continue,
576 Err(err) => {
577 eprintln!("compute_next_event for '{}' failed - {}", event_str, err);
578 continue;
579 }
580 };
581
582 let now = proxmox_time::epoch_i64();
583
584 if next > now { continue; }
585
586 let job = match Job::new(worker_type, &store) {
587 Ok(job) => job,
588 Err(_) => continue, // could not get lock
589 };
590
591 let auth_id = Authid::root_auth_id();
592
593 if let Err(err) = crate::server::do_garbage_collection_job(job, datastore, auth_id, Some(event_str), false) {
594 eprintln!("unable to start garbage collection job on datastore {} - {}", store, err);
595 }
596 }
597 }
598
599 async fn schedule_datastore_prune() {
600
601 let config = match pbs_config::datastore::config() {
602 Err(err) => {
603 eprintln!("unable to read datastore config - {}", err);
604 return;
605 }
606 Ok((config, _digest)) => config,
607 };
608
609 for (store, (_, store_config)) in config.sections {
610
611 let store_config: DataStoreConfig = match serde_json::from_value(store_config) {
612 Ok(c) => c,
613 Err(err) => {
614 eprintln!("datastore '{}' config from_value failed - {}", store, err);
615 continue;
616 }
617 };
618
619 let event_str = match store_config.prune_schedule {
620 Some(event_str) => event_str,
621 None => continue,
622 };
623
624 let prune_options = PruneOptions {
625 keep_last: store_config.keep_last,
626 keep_hourly: store_config.keep_hourly,
627 keep_daily: store_config.keep_daily,
628 keep_weekly: store_config.keep_weekly,
629 keep_monthly: store_config.keep_monthly,
630 keep_yearly: store_config.keep_yearly,
631 };
632
633 if !pbs_datastore::prune::keeps_something(&prune_options) { // no prune settings - keep all
634 continue;
635 }
636
637 let worker_type = "prune";
638 if check_schedule(worker_type, &event_str, &store) {
639 let job = match Job::new(worker_type, &store) {
640 Ok(job) => job,
641 Err(_) => continue, // could not get lock
642 };
643
644 let auth_id = Authid::root_auth_id().clone();
645 if let Err(err) = do_prune_job(job, prune_options, store.clone(), &auth_id, Some(event_str)) {
646 eprintln!("unable to start datastore prune job {} - {}", &store, err);
647 }
648 };
649 }
650 }
651
652 async fn schedule_datastore_sync_jobs() {
653
654
655 let config = match pbs_config::sync::config() {
656 Err(err) => {
657 eprintln!("unable to read sync job config - {}", err);
658 return;
659 }
660 Ok((config, _digest)) => config,
661 };
662
663 for (job_id, (_, job_config)) in config.sections {
664 let job_config: SyncJobConfig = match serde_json::from_value(job_config) {
665 Ok(c) => c,
666 Err(err) => {
667 eprintln!("sync job config from_value failed - {}", err);
668 continue;
669 }
670 };
671
672 let event_str = match job_config.schedule {
673 Some(ref event_str) => event_str.clone(),
674 None => continue,
675 };
676
677 let worker_type = "syncjob";
678 if check_schedule(worker_type, &event_str, &job_id) {
679 let job = match Job::new(worker_type, &job_id) {
680 Ok(job) => job,
681 Err(_) => continue, // could not get lock
682 };
683
684 let auth_id = Authid::root_auth_id().clone();
685 if let Err(err) = do_sync_job(job, job_config, &auth_id, Some(event_str), false) {
686 eprintln!("unable to start datastore sync job {} - {}", &job_id, err);
687 }
688 };
689 }
690 }
691
692 async fn schedule_datastore_verify_jobs() {
693
694 let config = match pbs_config::verify::config() {
695 Err(err) => {
696 eprintln!("unable to read verification job config - {}", err);
697 return;
698 }
699 Ok((config, _digest)) => config,
700 };
701 for (job_id, (_, job_config)) in config.sections {
702 let job_config: VerificationJobConfig = match serde_json::from_value(job_config) {
703 Ok(c) => c,
704 Err(err) => {
705 eprintln!("verification job config from_value failed - {}", err);
706 continue;
707 }
708 };
709 let event_str = match job_config.schedule {
710 Some(ref event_str) => event_str.clone(),
711 None => continue,
712 };
713
714 let worker_type = "verificationjob";
715 let auth_id = Authid::root_auth_id().clone();
716 if check_schedule(worker_type, &event_str, &job_id) {
717 let job = match Job::new(&worker_type, &job_id) {
718 Ok(job) => job,
719 Err(_) => continue, // could not get lock
720 };
721 if let Err(err) = do_verification_job(job, job_config, &auth_id, Some(event_str), false) {
722 eprintln!("unable to start datastore verification job {} - {}", &job_id, err);
723 }
724 };
725 }
726 }
727
728 async fn schedule_tape_backup_jobs() {
729
730 let config = match pbs_config::tape_job::config() {
731 Err(err) => {
732 eprintln!("unable to read tape job config - {}", err);
733 return;
734 }
735 Ok((config, _digest)) => config,
736 };
737 for (job_id, (_, job_config)) in config.sections {
738 let job_config: TapeBackupJobConfig = match serde_json::from_value(job_config) {
739 Ok(c) => c,
740 Err(err) => {
741 eprintln!("tape backup job config from_value failed - {}", err);
742 continue;
743 }
744 };
745 let event_str = match job_config.schedule {
746 Some(ref event_str) => event_str.clone(),
747 None => continue,
748 };
749
750 let worker_type = "tape-backup-job";
751 let auth_id = Authid::root_auth_id().clone();
752 if check_schedule(worker_type, &event_str, &job_id) {
753 let job = match Job::new(&worker_type, &job_id) {
754 Ok(job) => job,
755 Err(_) => continue, // could not get lock
756 };
757 if let Err(err) = do_tape_backup_job(job, job_config.setup, &auth_id, Some(event_str), false) {
758 eprintln!("unable to start tape backup job {} - {}", &job_id, err);
759 }
760 };
761 }
762 }
763
764
765 async fn schedule_task_log_rotate() {
766
767 let worker_type = "logrotate";
768 let job_id = "access-log_and_task-archive";
769
770 // schedule daily at 00:00 like normal logrotate
771 let schedule = "00:00";
772
773 if !check_schedule(worker_type, schedule, job_id) {
774 // if we never ran the rotation, schedule instantly
775 match jobstate::JobState::load(worker_type, job_id) {
776 Ok(state) => match state {
777 jobstate::JobState::Created { .. } => {},
778 _ => return,
779 },
780 _ => return,
781 }
782 }
783
784 let mut job = match Job::new(worker_type, job_id) {
785 Ok(job) => job,
786 Err(_) => return, // could not get lock
787 };
788
789 if let Err(err) = WorkerTask::new_thread(
790 worker_type,
791 None,
792 Authid::root_auth_id().to_string(),
793 false,
794 move |worker| {
795 job.start(&worker.upid().to_string())?;
796 task_log!(worker, "starting task log rotation");
797
798 let result = try_block!({
799 let max_size = 512 * 1024 - 1; // an entry has ~ 100b, so > 5000 entries/file
800 let max_files = 20; // times twenty files gives > 100000 task entries
801 let has_rotated = rotate_task_log_archive(max_size, true, Some(max_files))?;
802 if has_rotated {
803 task_log!(worker, "task log archive was rotated");
804 } else {
805 task_log!(worker, "task log archive was not rotated");
806 }
807
808 let max_size = 32 * 1024 * 1024 - 1;
809 let max_files = 14;
810 let mut logrotate = LogRotate::new(pbs_buildcfg::API_ACCESS_LOG_FN, true)
811 .ok_or_else(|| format_err!("could not get API access log file names"))?;
812
813 if logrotate.rotate(max_size, None, Some(max_files))? {
814 println!("rotated access log, telling daemons to re-open log file");
815 pbs_runtime::block_on(command_reopen_access_logfiles())?;
816 task_log!(worker, "API access log was rotated");
817 } else {
818 task_log!(worker, "API access log was not rotated");
819 }
820
821 let mut logrotate = LogRotate::new(pbs_buildcfg::API_AUTH_LOG_FN, true)
822 .ok_or_else(|| format_err!("could not get API auth log file names"))?;
823
824 if logrotate.rotate(max_size, None, Some(max_files))? {
825 println!("rotated auth log, telling daemons to re-open log file");
826 pbs_runtime::block_on(command_reopen_auth_logfiles())?;
827 task_log!(worker, "API authentication log was rotated");
828 } else {
829 task_log!(worker, "API authentication log was not rotated");
830 }
831
832 if has_rotated {
833 task_log!(worker, "cleaning up old task logs");
834 if let Err(err) = cleanup_old_tasks(true) {
835 task_warn!(worker, "could not completely cleanup old tasks: {}", err);
836 }
837 }
838
839 Ok(())
840 });
841
842 let status = worker.create_state(&result);
843
844 if let Err(err) = job.finish(status) {
845 eprintln!("could not finish job state for {}: {}", worker_type, err);
846 }
847
848 result
849 },
850 ) {
851 eprintln!("unable to start task log rotation: {}", err);
852 }
853
854 }
855
856 async fn command_reopen_access_logfiles() -> Result<(), Error> {
857 // only care about the most recent daemon instance for each, proxy & api, as other older ones
858 // should not respond to new requests anyway, but only finish their current one and then exit.
859 let sock = proxmox_rest_server::our_ctrl_sock();
860 let f1 = proxmox_rest_server::send_raw_command(sock, "{\"command\":\"api-access-log-reopen\"}\n");
861
862 let pid = proxmox_rest_server::read_pid(pbs_buildcfg::PROXMOX_BACKUP_API_PID_FN)?;
863 let sock = proxmox_rest_server::ctrl_sock_from_pid(pid);
864 let f2 = proxmox_rest_server::send_raw_command(sock, "{\"command\":\"api-access-log-reopen\"}\n");
865
866 match futures::join!(f1, f2) {
867 (Err(e1), Err(e2)) => Err(format_err!("reopen commands failed, proxy: {}; api: {}", e1, e2)),
868 (Err(e1), Ok(_)) => Err(format_err!("reopen commands failed, proxy: {}", e1)),
869 (Ok(_), Err(e2)) => Err(format_err!("reopen commands failed, api: {}", e2)),
870 _ => Ok(()),
871 }
872 }
873
874 async fn command_reopen_auth_logfiles() -> Result<(), Error> {
875 // only care about the most recent daemon instance for each, proxy & api, as other older ones
876 // should not respond to new requests anyway, but only finish their current one and then exit.
877 let sock = proxmox_rest_server::our_ctrl_sock();
878 let f1 = proxmox_rest_server::send_raw_command(sock, "{\"command\":\"api-auth-log-reopen\"}\n");
879
880 let pid = proxmox_rest_server::read_pid(pbs_buildcfg::PROXMOX_BACKUP_API_PID_FN)?;
881 let sock = proxmox_rest_server::ctrl_sock_from_pid(pid);
882 let f2 = proxmox_rest_server::send_raw_command(sock, "{\"command\":\"api-auth-log-reopen\"}\n");
883
884 match futures::join!(f1, f2) {
885 (Err(e1), Err(e2)) => Err(format_err!("reopen commands failed, proxy: {}; api: {}", e1, e2)),
886 (Err(e1), Ok(_)) => Err(format_err!("reopen commands failed, proxy: {}", e1)),
887 (Ok(_), Err(e2)) => Err(format_err!("reopen commands failed, api: {}", e2)),
888 _ => Ok(()),
889 }
890 }
891
892 async fn run_stat_generator() {
893
894 loop {
895 let delay_target = Instant::now() + Duration::from_secs(10);
896
897 generate_host_stats().await;
898
899 tokio::time::sleep_until(tokio::time::Instant::from_std(delay_target)).await;
900
901 }
902
903 }
904
905 async fn generate_host_stats() {
906 match tokio::task::spawn_blocking(generate_host_stats_sync).await {
907 Ok(()) => (),
908 Err(err) => log::error!("generate_host_stats paniced: {}", err),
909 }
910 }
911
912 fn generate_host_stats_sync() {
913 use proxmox::sys::linux::procfs::{
914 read_meminfo, read_proc_stat, read_proc_net_dev, read_loadavg};
915
916 match read_proc_stat() {
917 Ok(stat) => {
918 rrd_update_gauge("host/cpu", stat.cpu);
919 rrd_update_gauge("host/iowait", stat.iowait_percent);
920 }
921 Err(err) => {
922 eprintln!("read_proc_stat failed - {}", err);
923 }
924 }
925
926 match read_meminfo() {
927 Ok(meminfo) => {
928 rrd_update_gauge("host/memtotal", meminfo.memtotal as f64);
929 rrd_update_gauge("host/memused", meminfo.memused as f64);
930 rrd_update_gauge("host/swaptotal", meminfo.swaptotal as f64);
931 rrd_update_gauge("host/swapused", meminfo.swapused as f64);
932 }
933 Err(err) => {
934 eprintln!("read_meminfo failed - {}", err);
935 }
936 }
937
938 match read_proc_net_dev() {
939 Ok(netdev) => {
940 use pbs_config::network::is_physical_nic;
941 let mut netin = 0;
942 let mut netout = 0;
943 for item in netdev {
944 if !is_physical_nic(&item.device) { continue; }
945 netin += item.receive;
946 netout += item.send;
947 }
948 rrd_update_derive("host/netin", netin as f64);
949 rrd_update_derive("host/netout", netout as f64);
950 }
951 Err(err) => {
952 eprintln!("read_prox_net_dev failed - {}", err);
953 }
954 }
955
956 match read_loadavg() {
957 Ok(loadavg) => {
958 rrd_update_gauge("host/loadavg", loadavg.0 as f64);
959 }
960 Err(err) => {
961 eprintln!("read_loadavg failed - {}", err);
962 }
963 }
964
965 let disk_manager = DiskManage::new();
966
967 gather_disk_stats(disk_manager.clone(), Path::new("/"), "host");
968
969 match pbs_config::datastore::config() {
970 Ok((config, _)) => {
971 let datastore_list: Vec<DataStoreConfig> =
972 config.convert_to_typed_array("datastore").unwrap_or_default();
973
974 for config in datastore_list {
975
976 let rrd_prefix = format!("datastore/{}", config.name);
977 let path = std::path::Path::new(&config.path);
978 gather_disk_stats(disk_manager.clone(), path, &rrd_prefix);
979 }
980 }
981 Err(err) => {
982 eprintln!("read datastore config failed - {}", err);
983 }
984 }
985 }
986
987 fn check_schedule(worker_type: &str, event_str: &str, id: &str) -> bool {
988 let event = match parse_calendar_event(event_str) {
989 Ok(event) => event,
990 Err(err) => {
991 eprintln!("unable to parse schedule '{}' - {}", event_str, err);
992 return false;
993 }
994 };
995
996 let last = match jobstate::last_run_time(worker_type, &id) {
997 Ok(time) => time,
998 Err(err) => {
999 eprintln!("could not get last run time of {} {}: {}", worker_type, id, err);
1000 return false;
1001 }
1002 };
1003
1004 let next = match compute_next_event(&event, last, false) {
1005 Ok(Some(next)) => next,
1006 Ok(None) => return false,
1007 Err(err) => {
1008 eprintln!("compute_next_event for '{}' failed - {}", event_str, err);
1009 return false;
1010 }
1011 };
1012
1013 let now = proxmox_time::epoch_i64();
1014 next <= now
1015 }
1016
1017 fn gather_disk_stats(disk_manager: Arc<DiskManage>, path: &Path, rrd_prefix: &str) {
1018
1019 match proxmox_backup::tools::disks::disk_usage(path) {
1020 Ok(status) => {
1021 let rrd_key = format!("{}/total", rrd_prefix);
1022 rrd_update_gauge(&rrd_key, status.total as f64);
1023 let rrd_key = format!("{}/used", rrd_prefix);
1024 rrd_update_gauge(&rrd_key, status.used as f64);
1025 }
1026 Err(err) => {
1027 eprintln!("read disk_usage on {:?} failed - {}", path, err);
1028 }
1029 }
1030
1031 match disk_manager.find_mounted_device(path) {
1032 Ok(None) => {},
1033 Ok(Some((fs_type, device, source))) => {
1034 let mut device_stat = None;
1035 match fs_type.as_str() {
1036 "zfs" => {
1037 if let Some(source) = source {
1038 let pool = get_pool_from_dataset(&source).unwrap_or(&source);
1039 match zfs_pool_stats(pool) {
1040 Ok(stat) => device_stat = stat,
1041 Err(err) => eprintln!("zfs_pool_stats({:?}) failed - {}", pool, err),
1042 }
1043 }
1044 }
1045 _ => {
1046 if let Ok(disk) = disk_manager.clone().disk_by_dev_num(device.into_dev_t()) {
1047 match disk.read_stat() {
1048 Ok(stat) => device_stat = stat,
1049 Err(err) => eprintln!("disk.read_stat {:?} failed - {}", path, err),
1050 }
1051 }
1052 }
1053 }
1054 if let Some(stat) = device_stat {
1055 let rrd_key = format!("{}/read_ios", rrd_prefix);
1056 rrd_update_derive(&rrd_key, stat.read_ios as f64);
1057 let rrd_key = format!("{}/read_bytes", rrd_prefix);
1058 rrd_update_derive(&rrd_key, (stat.read_sectors*512) as f64);
1059
1060 let rrd_key = format!("{}/write_ios", rrd_prefix);
1061 rrd_update_derive(&rrd_key, stat.write_ios as f64);
1062 let rrd_key = format!("{}/write_bytes", rrd_prefix);
1063 rrd_update_derive(&rrd_key, (stat.write_sectors*512) as f64);
1064
1065 let rrd_key = format!("{}/io_ticks", rrd_prefix);
1066 rrd_update_derive(&rrd_key, (stat.io_ticks as f64)/1000.0);
1067 }
1068 }
1069 Err(err) => {
1070 eprintln!("find_mounted_device failed - {}", err);
1071 }
1072 }
1073 }