]> git.proxmox.com Git - proxmox-backup.git/blob - src/bin/proxmox-backup-proxy.rs
datastore: add check for maintenance in lookup
[proxmox-backup.git] / src / bin / proxmox-backup-proxy.rs
1 use std::future::Future;
2 use std::os::unix::io::AsRawFd;
3 use std::path::{Path, PathBuf};
4 use std::pin::Pin;
5 use std::sync::{Arc, Mutex};
6
7 use anyhow::{bail, format_err, Error};
8 use futures::*;
9 use http::request::Parts;
10 use http::Response;
11 use hyper::header;
12 use hyper::{Body, StatusCode};
13 use url::form_urlencoded;
14
15 use http::{HeaderMap, Method};
16 use openssl::ssl::{SslAcceptor, SslFiletype, SslMethod};
17 use serde_json::{json, Value};
18 use tokio_stream::wrappers::ReceiverStream;
19
20 use proxmox_http::client::{RateLimitedStream, ShareableRateLimit};
21 use proxmox_lang::try_block;
22 use proxmox_router::{RpcEnvironment, RpcEnvironmentType, UserInformation};
23 use proxmox_sys::fs::CreateOptions;
24 use proxmox_sys::linux::socket::set_tcp_keepalive;
25 use proxmox_sys::logrotate::LogRotate;
26 use proxmox_sys::{task_log, task_warn};
27
28 use pbs_datastore::DataStore;
29
30 use proxmox_rest_server::{
31 cleanup_old_tasks, cookie_from_header, rotate_task_log_archive, ApiConfig, AuthError,
32 RestEnvironment, RestServer, ServerAdapter, WorkerTask,
33 };
34
35 use proxmox_backup::rrd_cache::{
36 initialize_rrd_cache, rrd_sync_journal, rrd_update_derive, rrd_update_gauge,
37 };
38 use proxmox_backup::{
39 server::{
40 auth::check_pbs_auth,
41 jobstate::{self, Job},
42 },
43 traffic_control_cache::TRAFFIC_CONTROL_CACHE,
44 };
45
46 use pbs_buildcfg::configdir;
47 use proxmox_time::CalendarEvent;
48
49 use pbs_api_types::{
50 Authid, DataStoreConfig, PruneOptions, SyncJobConfig, TapeBackupJobConfig,
51 VerificationJobConfig, Operation
52 };
53
54 use proxmox_rest_server::daemon;
55
56 use proxmox_backup::auth_helpers::*;
57 use proxmox_backup::server;
58 use proxmox_backup::tools::{
59 disks::{zfs_dataset_stats, DiskManage},
60 PROXMOX_BACKUP_TCP_KEEPALIVE_TIME,
61 };
62
63 use proxmox_backup::api2::pull::do_sync_job;
64 use proxmox_backup::api2::tape::backup::do_tape_backup_job;
65 use proxmox_backup::server::do_prune_job;
66 use proxmox_backup::server::do_verification_job;
67
68 fn main() -> Result<(), Error> {
69 pbs_tools::setup_libc_malloc_opts();
70
71 proxmox_backup::tools::setup_safe_path_env();
72
73 let backup_uid = pbs_config::backup_user()?.uid;
74 let backup_gid = pbs_config::backup_group()?.gid;
75 let running_uid = nix::unistd::Uid::effective();
76 let running_gid = nix::unistd::Gid::effective();
77
78 if running_uid != backup_uid || running_gid != backup_gid {
79 bail!(
80 "proxy not running as backup user or group (got uid {} gid {})",
81 running_uid,
82 running_gid
83 );
84 }
85
86 proxmox_async::runtime::main(run())
87 }
88
89 struct ProxmoxBackupProxyAdapter;
90
91 impl ServerAdapter for ProxmoxBackupProxyAdapter {
92 fn get_index(
93 &self,
94 env: RestEnvironment,
95 parts: Parts,
96 ) -> Pin<Box<dyn Future<Output = Response<Body>> + Send>> {
97 Box::pin(get_index_future(env, parts))
98 }
99
100 fn check_auth<'a>(
101 &'a self,
102 headers: &'a HeaderMap,
103 method: &'a Method,
104 ) -> Pin<Box<dyn Future<Output = Result<(String, Box<dyn UserInformation + Sync + Send>), AuthError>> + Send + 'a>> {
105 Box::pin(async move {
106 check_pbs_auth(headers, method).await
107 })
108 }
109 }
110
111 /// check for a cookie with the user-preferred language, fallback to the config one if not set or
112 /// not existing
113 fn get_language(headers: &http::HeaderMap) -> String {
114 let exists = |l: &str| Path::new(&format!("/usr/share/pbs-i18n/pbs-lang-{}.js", l)).exists();
115
116 match cookie_from_header(headers, "PBSLangCookie") {
117 Some(cookie_lang) if exists(&cookie_lang) => cookie_lang,
118 _ => match proxmox_backup::config::node::config().map(|(cfg, _)| cfg.default_lang) {
119 Ok(Some(default_lang)) if exists(&default_lang) => default_lang,
120 _ => String::from(""),
121 },
122 }
123 }
124
125 async fn get_index_future(env: RestEnvironment, parts: Parts) -> Response<Body> {
126 let auth_id = env.get_auth_id();
127 let api = env.api_config();
128
129 // fixme: make all IO async
130
131 let (userid, csrf_token) = match auth_id {
132 Some(auth_id) => {
133 let auth_id = auth_id.parse::<Authid>();
134 match auth_id {
135 Ok(auth_id) if !auth_id.is_token() => {
136 let userid = auth_id.user().clone();
137 let new_csrf_token = assemble_csrf_prevention_token(csrf_secret(), &userid);
138 (Some(userid), Some(new_csrf_token))
139 }
140 _ => (None, None),
141 }
142 }
143 None => (None, None),
144 };
145
146 let nodename = proxmox_sys::nodename();
147 let user = userid.as_ref().map(|u| u.as_str()).unwrap_or("");
148
149 let csrf_token = csrf_token.unwrap_or_else(|| String::from(""));
150
151 let mut debug = false;
152 let mut template_file = "index";
153
154 if let Some(query_str) = parts.uri.query() {
155 for (k, v) in form_urlencoded::parse(query_str.as_bytes()).into_owned() {
156 if k == "debug" && v != "0" && v != "false" {
157 debug = true;
158 } else if k == "console" {
159 template_file = "console";
160 }
161 }
162 }
163
164 let data = json!({
165 "NodeName": nodename,
166 "UserName": user,
167 "CSRFPreventionToken": csrf_token,
168 "language": get_language(&parts.headers),
169 "debug": debug,
170 });
171
172 let (ct, index) = match api.render_template(template_file, &data) {
173 Ok(index) => ("text/html", index),
174 Err(err) => ("text/plain", format!("Error rendering template: {}", err)),
175 };
176
177 let mut resp = Response::builder()
178 .status(StatusCode::OK)
179 .header(header::CONTENT_TYPE, ct)
180 .body(index.into())
181 .unwrap();
182
183 if let Some(userid) = userid {
184 resp.extensions_mut().insert(Authid::from((userid, None)));
185 }
186
187 resp
188 }
189
190 async fn run() -> Result<(), Error> {
191 // Note: To debug early connection error use
192 // PROXMOX_DEBUG=1 ./target/release/proxmox-backup-proxy
193 let debug = std::env::var("PROXMOX_DEBUG").is_ok();
194
195 if let Err(err) = syslog::init(
196 syslog::Facility::LOG_DAEMON,
197 if debug { log::LevelFilter::Debug } else { log::LevelFilter::Info },
198 Some("proxmox-backup-proxy"),
199 ) {
200 bail!("unable to inititialize syslog - {}", err);
201 }
202
203 let _ = public_auth_key(); // load with lazy_static
204 let _ = csrf_secret(); // load with lazy_static
205
206 let rrd_cache = initialize_rrd_cache()?;
207 rrd_cache.apply_journal()?;
208
209 let mut config = ApiConfig::new(
210 pbs_buildcfg::JS_DIR,
211 &proxmox_backup::api2::ROUTER,
212 RpcEnvironmentType::PUBLIC,
213 ProxmoxBackupProxyAdapter,
214 )?;
215
216 config.add_alias("novnc", "/usr/share/novnc-pve");
217 config.add_alias("extjs", "/usr/share/javascript/extjs");
218 config.add_alias("qrcodejs", "/usr/share/javascript/qrcodejs");
219 config.add_alias("fontawesome", "/usr/share/fonts-font-awesome");
220 config.add_alias("xtermjs", "/usr/share/pve-xtermjs");
221 config.add_alias("locale", "/usr/share/pbs-i18n");
222 config.add_alias(
223 "widgettoolkit",
224 "/usr/share/javascript/proxmox-widget-toolkit",
225 );
226 config.add_alias("docs", "/usr/share/doc/proxmox-backup/html");
227
228 let mut indexpath = PathBuf::from(pbs_buildcfg::JS_DIR);
229 indexpath.push("index.hbs");
230 config.register_template("index", &indexpath)?;
231 config.register_template("console", "/usr/share/pve-xtermjs/index.html.hbs")?;
232
233 let backup_user = pbs_config::backup_user()?;
234 let mut commando_sock = proxmox_rest_server::CommandSocket::new(
235 proxmox_rest_server::our_ctrl_sock(),
236 backup_user.gid,
237 );
238
239 let dir_opts = CreateOptions::new()
240 .owner(backup_user.uid)
241 .group(backup_user.gid);
242 let file_opts = CreateOptions::new()
243 .owner(backup_user.uid)
244 .group(backup_user.gid);
245
246 config.enable_access_log(
247 pbs_buildcfg::API_ACCESS_LOG_FN,
248 Some(dir_opts.clone()),
249 Some(file_opts.clone()),
250 &mut commando_sock,
251 )?;
252
253 config.enable_auth_log(
254 pbs_buildcfg::API_AUTH_LOG_FN,
255 Some(dir_opts.clone()),
256 Some(file_opts.clone()),
257 &mut commando_sock,
258 )?;
259
260 let rest_server = RestServer::new(config);
261 proxmox_rest_server::init_worker_tasks(
262 pbs_buildcfg::PROXMOX_BACKUP_LOG_DIR_M!().into(),
263 file_opts.clone(),
264 )?;
265
266 //openssl req -x509 -newkey rsa:4096 -keyout /etc/proxmox-backup/proxy.key -out /etc/proxmox-backup/proxy.pem -nodes
267
268 // we build the initial acceptor here as we cannot start if this fails
269 let acceptor = make_tls_acceptor()?;
270 let acceptor = Arc::new(Mutex::new(acceptor));
271
272 // to renew the acceptor we just add a command-socket handler
273 commando_sock.register_command("reload-certificate".to_string(), {
274 let acceptor = Arc::clone(&acceptor);
275 move |_value| -> Result<_, Error> {
276 log::info!("reloading certificate");
277 match make_tls_acceptor() {
278 Err(err) => log::error!("error reloading certificate: {}", err),
279 Ok(new_acceptor) => {
280 let mut guard = acceptor.lock().unwrap();
281 *guard = new_acceptor;
282 }
283 }
284 Ok(Value::Null)
285 }
286 })?;
287
288 // to remove references for not configured datastores
289 commando_sock.register_command("datastore-removed".to_string(), |_value| {
290 if let Err(err) = DataStore::remove_unused_datastores() {
291 log::error!("could not refresh datastores: {}", err);
292 }
293 Ok(Value::Null)
294 })?;
295
296 let server = daemon::create_daemon(([0, 0, 0, 0, 0, 0, 0, 0], 8007).into(), move |listener| {
297 let connections = accept_connections(listener, acceptor, debug);
298 let connections = hyper::server::accept::from_stream(ReceiverStream::new(connections));
299
300 Ok(async {
301 daemon::systemd_notify(daemon::SystemdNotify::Ready)?;
302
303 hyper::Server::builder(connections)
304 .serve(rest_server)
305 .with_graceful_shutdown(proxmox_rest_server::shutdown_future())
306 .map_err(Error::from)
307 .await
308 })
309 });
310
311 proxmox_rest_server::write_pid(pbs_buildcfg::PROXMOX_BACKUP_PROXY_PID_FN)?;
312
313 let init_result: Result<(), Error> = try_block!({
314 proxmox_rest_server::register_task_control_commands(&mut commando_sock)?;
315 commando_sock.spawn()?;
316 proxmox_rest_server::catch_shutdown_signal()?;
317 proxmox_rest_server::catch_reload_signal()?;
318 Ok(())
319 });
320
321 if let Err(err) = init_result {
322 bail!("unable to start daemon - {}", err);
323 }
324
325 start_task_scheduler();
326 start_stat_generator();
327 start_traffic_control_updater();
328
329 server.await?;
330 log::info!("server shutting down, waiting for active workers to complete");
331 proxmox_rest_server::last_worker_future().await?;
332 log::info!("done - exit server");
333
334 Ok(())
335 }
336
337 fn make_tls_acceptor() -> Result<SslAcceptor, Error> {
338 let key_path = configdir!("/proxy.key");
339 let cert_path = configdir!("/proxy.pem");
340
341 let (config, _) = proxmox_backup::config::node::config()?;
342 let ciphers_tls_1_3 = config.ciphers_tls_1_3;
343 let ciphers_tls_1_2 = config.ciphers_tls_1_2;
344
345 let mut acceptor = SslAcceptor::mozilla_intermediate_v5(SslMethod::tls()).unwrap();
346 if let Some(ciphers) = ciphers_tls_1_3.as_deref() {
347 acceptor.set_ciphersuites(ciphers)?;
348 }
349 if let Some(ciphers) = ciphers_tls_1_2.as_deref() {
350 acceptor.set_cipher_list(ciphers)?;
351 }
352 acceptor
353 .set_private_key_file(key_path, SslFiletype::PEM)
354 .map_err(|err| format_err!("unable to read proxy key {} - {}", key_path, err))?;
355 acceptor
356 .set_certificate_chain_file(cert_path)
357 .map_err(|err| format_err!("unable to read proxy cert {} - {}", cert_path, err))?;
358 acceptor.set_options(openssl::ssl::SslOptions::NO_RENEGOTIATION);
359 acceptor.check_private_key().unwrap();
360
361 Ok(acceptor.build())
362 }
363
364 type ClientStreamResult = Result<
365 std::pin::Pin<Box<tokio_openssl::SslStream<RateLimitedStream<tokio::net::TcpStream>>>>,
366 Error,
367 >;
368 const MAX_PENDING_ACCEPTS: usize = 1024;
369
370 fn accept_connections(
371 listener: tokio::net::TcpListener,
372 acceptor: Arc<Mutex<openssl::ssl::SslAcceptor>>,
373 debug: bool,
374 ) -> tokio::sync::mpsc::Receiver<ClientStreamResult> {
375 let (sender, receiver) = tokio::sync::mpsc::channel(MAX_PENDING_ACCEPTS);
376
377 tokio::spawn(accept_connection(listener, acceptor, debug, sender));
378
379 receiver
380 }
381
382 async fn accept_connection(
383 listener: tokio::net::TcpListener,
384 acceptor: Arc<Mutex<openssl::ssl::SslAcceptor>>,
385 debug: bool,
386 sender: tokio::sync::mpsc::Sender<ClientStreamResult>,
387 ) {
388 let accept_counter = Arc::new(());
389
390 loop {
391 let (sock, peer) = match listener.accept().await {
392 Ok(conn) => conn,
393 Err(err) => {
394 eprintln!("error accepting tcp connection: {}", err);
395 continue;
396 }
397 };
398
399 sock.set_nodelay(true).unwrap();
400 let _ = set_tcp_keepalive(sock.as_raw_fd(), PROXMOX_BACKUP_TCP_KEEPALIVE_TIME);
401
402 let sock =
403 RateLimitedStream::with_limiter_update_cb(sock, move || lookup_rate_limiter(peer));
404
405 let ssl = {
406 // limit acceptor_guard scope
407 // Acceptor can be reloaded using the command socket "reload-certificate" command
408 let acceptor_guard = acceptor.lock().unwrap();
409
410 match openssl::ssl::Ssl::new(acceptor_guard.context()) {
411 Ok(ssl) => ssl,
412 Err(err) => {
413 eprintln!(
414 "failed to create Ssl object from Acceptor context - {}",
415 err
416 );
417 continue;
418 }
419 }
420 };
421
422 let stream = match tokio_openssl::SslStream::new(ssl, sock) {
423 Ok(stream) => stream,
424 Err(err) => {
425 eprintln!(
426 "failed to create SslStream using ssl and connection socket - {}",
427 err
428 );
429 continue;
430 }
431 };
432
433 let mut stream = Box::pin(stream);
434 let sender = sender.clone();
435
436 if Arc::strong_count(&accept_counter) > MAX_PENDING_ACCEPTS {
437 eprintln!("connection rejected - to many open connections");
438 continue;
439 }
440
441 let accept_counter = Arc::clone(&accept_counter);
442 tokio::spawn(async move {
443 let accept_future =
444 tokio::time::timeout(Duration::new(10, 0), stream.as_mut().accept());
445
446 let result = accept_future.await;
447
448 match result {
449 Ok(Ok(())) => {
450 if sender.send(Ok(stream)).await.is_err() && debug {
451 eprintln!("detect closed connection channel");
452 }
453 }
454 Ok(Err(err)) => {
455 if debug {
456 eprintln!("https handshake failed - {}", err);
457 }
458 }
459 Err(_) => {
460 if debug {
461 eprintln!("https handshake timeout");
462 }
463 }
464 }
465
466 drop(accept_counter); // decrease reference count
467 });
468 }
469 }
470
471 fn start_stat_generator() {
472 let abort_future = proxmox_rest_server::shutdown_future();
473 let future = Box::pin(run_stat_generator());
474 let task = futures::future::select(future, abort_future);
475 tokio::spawn(task.map(|_| ()));
476 }
477
478 fn start_task_scheduler() {
479 let abort_future = proxmox_rest_server::shutdown_future();
480 let future = Box::pin(run_task_scheduler());
481 let task = futures::future::select(future, abort_future);
482 tokio::spawn(task.map(|_| ()));
483 }
484
485 fn start_traffic_control_updater() {
486 let abort_future = proxmox_rest_server::shutdown_future();
487 let future = Box::pin(run_traffic_control_updater());
488 let task = futures::future::select(future, abort_future);
489 tokio::spawn(task.map(|_| ()));
490 }
491
492 use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
493
494 fn next_minute() -> Result<Instant, Error> {
495 let now = SystemTime::now();
496 let epoch_now = now.duration_since(UNIX_EPOCH)?;
497 let epoch_next = Duration::from_secs((epoch_now.as_secs() / 60 + 1) * 60);
498 Ok(Instant::now() + epoch_next - epoch_now)
499 }
500
501 async fn run_task_scheduler() {
502 let mut count: usize = 0;
503
504 loop {
505 count += 1;
506
507 let delay_target = match next_minute() {
508 // try to run very minute
509 Ok(d) => d,
510 Err(err) => {
511 eprintln!("task scheduler: compute next minute failed - {}", err);
512 tokio::time::sleep_until(tokio::time::Instant::from_std(
513 Instant::now() + Duration::from_secs(60),
514 ))
515 .await;
516 continue;
517 }
518 };
519
520 if count > 2 {
521 // wait 1..2 minutes before starting
522 match schedule_tasks().catch_unwind().await {
523 Err(panic) => match panic.downcast::<&str>() {
524 Ok(msg) => {
525 eprintln!("task scheduler panic: {}", msg);
526 }
527 Err(_) => {
528 eprintln!("task scheduler panic - unknown type");
529 }
530 },
531 Ok(Err(err)) => {
532 eprintln!("task scheduler failed - {:?}", err);
533 }
534 Ok(Ok(_)) => {}
535 }
536 }
537
538 tokio::time::sleep_until(tokio::time::Instant::from_std(delay_target)).await;
539 }
540 }
541
542 async fn schedule_tasks() -> Result<(), Error> {
543 schedule_datastore_garbage_collection().await;
544 schedule_datastore_prune().await;
545 schedule_datastore_sync_jobs().await;
546 schedule_datastore_verify_jobs().await;
547 schedule_tape_backup_jobs().await;
548 schedule_task_log_rotate().await;
549
550 Ok(())
551 }
552
553 async fn schedule_datastore_garbage_collection() {
554 let config = match pbs_config::datastore::config() {
555 Err(err) => {
556 eprintln!("unable to read datastore config - {}", err);
557 return;
558 }
559 Ok((config, _digest)) => config,
560 };
561
562 for (store, (_, store_config)) in config.sections {
563 let datastore = match DataStore::lookup_datastore(&store, Some(Operation::Write)) {
564 Ok(datastore) => datastore,
565 Err(err) => {
566 eprintln!("lookup_datastore failed - {}", err);
567 continue;
568 }
569 };
570
571 let store_config: DataStoreConfig = match serde_json::from_value(store_config) {
572 Ok(c) => c,
573 Err(err) => {
574 eprintln!("datastore config from_value failed - {}", err);
575 continue;
576 }
577 };
578
579 let event_str = match store_config.gc_schedule {
580 Some(event_str) => event_str,
581 None => continue,
582 };
583
584 let event: CalendarEvent = match event_str.parse() {
585 Ok(event) => event,
586 Err(err) => {
587 eprintln!("unable to parse schedule '{}' - {}", event_str, err);
588 continue;
589 }
590 };
591
592 if datastore.garbage_collection_running() {
593 continue;
594 }
595
596 let worker_type = "garbage_collection";
597
598 let last = match jobstate::last_run_time(worker_type, &store) {
599 Ok(time) => time,
600 Err(err) => {
601 eprintln!(
602 "could not get last run time of {} {}: {}",
603 worker_type, store, err
604 );
605 continue;
606 }
607 };
608
609 let next = match event.compute_next_event(last) {
610 Ok(Some(next)) => next,
611 Ok(None) => continue,
612 Err(err) => {
613 eprintln!("compute_next_event for '{}' failed - {}", event_str, err);
614 continue;
615 }
616 };
617
618 let now = proxmox_time::epoch_i64();
619
620 if next > now {
621 continue;
622 }
623
624 let job = match Job::new(worker_type, &store) {
625 Ok(job) => job,
626 Err(_) => continue, // could not get lock
627 };
628
629 let auth_id = Authid::root_auth_id();
630
631 if let Err(err) = crate::server::do_garbage_collection_job(
632 job,
633 datastore,
634 auth_id,
635 Some(event_str),
636 false,
637 ) {
638 eprintln!(
639 "unable to start garbage collection job on datastore {} - {}",
640 store, err
641 );
642 }
643 }
644 }
645
646 async fn schedule_datastore_prune() {
647 let config = match pbs_config::datastore::config() {
648 Err(err) => {
649 eprintln!("unable to read datastore config - {}", err);
650 return;
651 }
652 Ok((config, _digest)) => config,
653 };
654
655 for (store, (_, store_config)) in config.sections {
656 let store_config: DataStoreConfig = match serde_json::from_value(store_config) {
657 Ok(c) => c,
658 Err(err) => {
659 eprintln!("datastore '{}' config from_value failed - {}", store, err);
660 continue;
661 }
662 };
663
664 let event_str = match store_config.prune_schedule {
665 Some(event_str) => event_str,
666 None => continue,
667 };
668
669 let prune_options = PruneOptions {
670 keep_last: store_config.keep_last,
671 keep_hourly: store_config.keep_hourly,
672 keep_daily: store_config.keep_daily,
673 keep_weekly: store_config.keep_weekly,
674 keep_monthly: store_config.keep_monthly,
675 keep_yearly: store_config.keep_yearly,
676 };
677
678 if !pbs_datastore::prune::keeps_something(&prune_options) {
679 // no prune settings - keep all
680 continue;
681 }
682
683 let worker_type = "prune";
684 if check_schedule(worker_type, &event_str, &store) {
685 let job = match Job::new(worker_type, &store) {
686 Ok(job) => job,
687 Err(_) => continue, // could not get lock
688 };
689
690 let auth_id = Authid::root_auth_id().clone();
691 if let Err(err) =
692 do_prune_job(job, prune_options, store.clone(), &auth_id, Some(event_str))
693 {
694 eprintln!("unable to start datastore prune job {} - {}", &store, err);
695 }
696 };
697 }
698 }
699
700 async fn schedule_datastore_sync_jobs() {
701 let config = match pbs_config::sync::config() {
702 Err(err) => {
703 eprintln!("unable to read sync job config - {}", err);
704 return;
705 }
706 Ok((config, _digest)) => config,
707 };
708
709 for (job_id, (_, job_config)) in config.sections {
710 let job_config: SyncJobConfig = match serde_json::from_value(job_config) {
711 Ok(c) => c,
712 Err(err) => {
713 eprintln!("sync job config from_value failed - {}", err);
714 continue;
715 }
716 };
717
718 let event_str = match job_config.schedule {
719 Some(ref event_str) => event_str.clone(),
720 None => continue,
721 };
722
723 let worker_type = "syncjob";
724 if check_schedule(worker_type, &event_str, &job_id) {
725 let job = match Job::new(worker_type, &job_id) {
726 Ok(job) => job,
727 Err(_) => continue, // could not get lock
728 };
729
730 let auth_id = Authid::root_auth_id().clone();
731 if let Err(err) = do_sync_job(job, job_config, &auth_id, Some(event_str), false) {
732 eprintln!("unable to start datastore sync job {} - {}", &job_id, err);
733 }
734 };
735 }
736 }
737
738 async fn schedule_datastore_verify_jobs() {
739 let config = match pbs_config::verify::config() {
740 Err(err) => {
741 eprintln!("unable to read verification job config - {}", err);
742 return;
743 }
744 Ok((config, _digest)) => config,
745 };
746 for (job_id, (_, job_config)) in config.sections {
747 let job_config: VerificationJobConfig = match serde_json::from_value(job_config) {
748 Ok(c) => c,
749 Err(err) => {
750 eprintln!("verification job config from_value failed - {}", err);
751 continue;
752 }
753 };
754 let event_str = match job_config.schedule {
755 Some(ref event_str) => event_str.clone(),
756 None => continue,
757 };
758
759 let worker_type = "verificationjob";
760 let auth_id = Authid::root_auth_id().clone();
761 if check_schedule(worker_type, &event_str, &job_id) {
762 let job = match Job::new(worker_type, &job_id) {
763 Ok(job) => job,
764 Err(_) => continue, // could not get lock
765 };
766 if let Err(err) = do_verification_job(job, job_config, &auth_id, Some(event_str), false)
767 {
768 eprintln!(
769 "unable to start datastore verification job {} - {}",
770 &job_id, err
771 );
772 }
773 };
774 }
775 }
776
777 async fn schedule_tape_backup_jobs() {
778 let config = match pbs_config::tape_job::config() {
779 Err(err) => {
780 eprintln!("unable to read tape job config - {}", err);
781 return;
782 }
783 Ok((config, _digest)) => config,
784 };
785 for (job_id, (_, job_config)) in config.sections {
786 let job_config: TapeBackupJobConfig = match serde_json::from_value(job_config) {
787 Ok(c) => c,
788 Err(err) => {
789 eprintln!("tape backup job config from_value failed - {}", err);
790 continue;
791 }
792 };
793 let event_str = match job_config.schedule {
794 Some(ref event_str) => event_str.clone(),
795 None => continue,
796 };
797
798 let worker_type = "tape-backup-job";
799 let auth_id = Authid::root_auth_id().clone();
800 if check_schedule(worker_type, &event_str, &job_id) {
801 let job = match Job::new(worker_type, &job_id) {
802 Ok(job) => job,
803 Err(_) => continue, // could not get lock
804 };
805 if let Err(err) =
806 do_tape_backup_job(job, job_config.setup, &auth_id, Some(event_str), false)
807 {
808 eprintln!("unable to start tape backup job {} - {}", &job_id, err);
809 }
810 };
811 }
812 }
813
814 async fn schedule_task_log_rotate() {
815 let worker_type = "logrotate";
816 let job_id = "access-log_and_task-archive";
817
818 // schedule daily at 00:00 like normal logrotate
819 let schedule = "00:00";
820
821 if !check_schedule(worker_type, schedule, job_id) {
822 // if we never ran the rotation, schedule instantly
823 match jobstate::JobState::load(worker_type, job_id) {
824 Ok(state) => match state {
825 jobstate::JobState::Created { .. } => {}
826 _ => return,
827 },
828 _ => return,
829 }
830 }
831
832 let mut job = match Job::new(worker_type, job_id) {
833 Ok(job) => job,
834 Err(_) => return, // could not get lock
835 };
836
837 if let Err(err) = WorkerTask::new_thread(
838 worker_type,
839 None,
840 Authid::root_auth_id().to_string(),
841 false,
842 move |worker| {
843 job.start(&worker.upid().to_string())?;
844 task_log!(worker, "starting task log rotation");
845
846 let result = try_block!({
847 let max_size = 512 * 1024 - 1; // an entry has ~ 100b, so > 5000 entries/file
848 let max_files = 20; // times twenty files gives > 100000 task entries
849
850 let max_days = proxmox_backup::config::node::config()
851 .map(|(cfg, _)| cfg.task_log_max_days)
852 .ok()
853 .flatten();
854
855 let user = pbs_config::backup_user()?;
856 let options = proxmox_sys::fs::CreateOptions::new()
857 .owner(user.uid)
858 .group(user.gid);
859
860 let has_rotated = rotate_task_log_archive(
861 max_size,
862 true,
863 Some(max_files),
864 max_days,
865 Some(options.clone()),
866 )?;
867
868 if has_rotated {
869 task_log!(worker, "task log archive was rotated");
870 } else {
871 task_log!(worker, "task log archive was not rotated");
872 }
873
874 let max_size = 32 * 1024 * 1024 - 1;
875 let max_files = 14;
876
877 let mut logrotate = LogRotate::new(
878 pbs_buildcfg::API_ACCESS_LOG_FN,
879 true,
880 Some(max_files),
881 Some(options.clone()),
882 )?;
883
884 if logrotate.rotate(max_size)? {
885 println!("rotated access log, telling daemons to re-open log file");
886 proxmox_async::runtime::block_on(command_reopen_access_logfiles())?;
887 task_log!(worker, "API access log was rotated");
888 } else {
889 task_log!(worker, "API access log was not rotated");
890 }
891
892 let mut logrotate = LogRotate::new(
893 pbs_buildcfg::API_AUTH_LOG_FN,
894 true,
895 Some(max_files),
896 Some(options),
897 )?;
898
899 if logrotate.rotate(max_size)? {
900 println!("rotated auth log, telling daemons to re-open log file");
901 proxmox_async::runtime::block_on(command_reopen_auth_logfiles())?;
902 task_log!(worker, "API authentication log was rotated");
903 } else {
904 task_log!(worker, "API authentication log was not rotated");
905 }
906
907 if has_rotated {
908 task_log!(worker, "cleaning up old task logs");
909 if let Err(err) = cleanup_old_tasks(&worker, true) {
910 task_warn!(worker, "could not completely cleanup old tasks: {}", err);
911 }
912 }
913
914 Ok(())
915 });
916
917 let status = worker.create_state(&result);
918
919 if let Err(err) = job.finish(status) {
920 eprintln!("could not finish job state for {}: {}", worker_type, err);
921 }
922
923 result
924 },
925 ) {
926 eprintln!("unable to start task log rotation: {}", err);
927 }
928 }
929
930 async fn command_reopen_access_logfiles() -> Result<(), Error> {
931 // only care about the most recent daemon instance for each, proxy & api, as other older ones
932 // should not respond to new requests anyway, but only finish their current one and then exit.
933 let sock = proxmox_rest_server::our_ctrl_sock();
934 let f1 =
935 proxmox_rest_server::send_raw_command(sock, "{\"command\":\"api-access-log-reopen\"}\n");
936
937 let pid = proxmox_rest_server::read_pid(pbs_buildcfg::PROXMOX_BACKUP_API_PID_FN)?;
938 let sock = proxmox_rest_server::ctrl_sock_from_pid(pid);
939 let f2 =
940 proxmox_rest_server::send_raw_command(sock, "{\"command\":\"api-access-log-reopen\"}\n");
941
942 match futures::join!(f1, f2) {
943 (Err(e1), Err(e2)) => Err(format_err!(
944 "reopen commands failed, proxy: {}; api: {}",
945 e1,
946 e2
947 )),
948 (Err(e1), Ok(_)) => Err(format_err!("reopen commands failed, proxy: {}", e1)),
949 (Ok(_), Err(e2)) => Err(format_err!("reopen commands failed, api: {}", e2)),
950 _ => Ok(()),
951 }
952 }
953
954 async fn command_reopen_auth_logfiles() -> Result<(), Error> {
955 // only care about the most recent daemon instance for each, proxy & api, as other older ones
956 // should not respond to new requests anyway, but only finish their current one and then exit.
957 let sock = proxmox_rest_server::our_ctrl_sock();
958 let f1 = proxmox_rest_server::send_raw_command(sock, "{\"command\":\"api-auth-log-reopen\"}\n");
959
960 let pid = proxmox_rest_server::read_pid(pbs_buildcfg::PROXMOX_BACKUP_API_PID_FN)?;
961 let sock = proxmox_rest_server::ctrl_sock_from_pid(pid);
962 let f2 = proxmox_rest_server::send_raw_command(sock, "{\"command\":\"api-auth-log-reopen\"}\n");
963
964 match futures::join!(f1, f2) {
965 (Err(e1), Err(e2)) => Err(format_err!(
966 "reopen commands failed, proxy: {}; api: {}",
967 e1,
968 e2
969 )),
970 (Err(e1), Ok(_)) => Err(format_err!("reopen commands failed, proxy: {}", e1)),
971 (Ok(_), Err(e2)) => Err(format_err!("reopen commands failed, api: {}", e2)),
972 _ => Ok(()),
973 }
974 }
975
976 async fn run_stat_generator() {
977 loop {
978 let delay_target = Instant::now() + Duration::from_secs(10);
979
980 generate_host_stats().await;
981
982 rrd_sync_journal();
983
984 tokio::time::sleep_until(tokio::time::Instant::from_std(delay_target)).await;
985 }
986 }
987
988 async fn generate_host_stats() {
989 match tokio::task::spawn_blocking(generate_host_stats_sync).await {
990 Ok(()) => (),
991 Err(err) => log::error!("generate_host_stats paniced: {}", err),
992 }
993 }
994
995 fn generate_host_stats_sync() {
996 use proxmox_sys::linux::procfs::{
997 read_loadavg, read_meminfo, read_proc_net_dev, read_proc_stat,
998 };
999
1000 match read_proc_stat() {
1001 Ok(stat) => {
1002 rrd_update_gauge("host/cpu", stat.cpu);
1003 rrd_update_gauge("host/iowait", stat.iowait_percent);
1004 }
1005 Err(err) => {
1006 eprintln!("read_proc_stat failed - {}", err);
1007 }
1008 }
1009
1010 match read_meminfo() {
1011 Ok(meminfo) => {
1012 rrd_update_gauge("host/memtotal", meminfo.memtotal as f64);
1013 rrd_update_gauge("host/memused", meminfo.memused as f64);
1014 rrd_update_gauge("host/swaptotal", meminfo.swaptotal as f64);
1015 rrd_update_gauge("host/swapused", meminfo.swapused as f64);
1016 }
1017 Err(err) => {
1018 eprintln!("read_meminfo failed - {}", err);
1019 }
1020 }
1021
1022 match read_proc_net_dev() {
1023 Ok(netdev) => {
1024 use pbs_config::network::is_physical_nic;
1025 let mut netin = 0;
1026 let mut netout = 0;
1027 for item in netdev {
1028 if !is_physical_nic(&item.device) {
1029 continue;
1030 }
1031 netin += item.receive;
1032 netout += item.send;
1033 }
1034 rrd_update_derive("host/netin", netin as f64);
1035 rrd_update_derive("host/netout", netout as f64);
1036 }
1037 Err(err) => {
1038 eprintln!("read_prox_net_dev failed - {}", err);
1039 }
1040 }
1041
1042 match read_loadavg() {
1043 Ok(loadavg) => {
1044 rrd_update_gauge("host/loadavg", loadavg.0 as f64);
1045 }
1046 Err(err) => {
1047 eprintln!("read_loadavg failed - {}", err);
1048 }
1049 }
1050
1051 let disk_manager = DiskManage::new();
1052
1053 gather_disk_stats(disk_manager.clone(), Path::new("/"), "host");
1054
1055 match pbs_config::datastore::config() {
1056 Ok((config, _)) => {
1057 let datastore_list: Vec<DataStoreConfig> = config
1058 .convert_to_typed_array("datastore")
1059 .unwrap_or_default();
1060
1061 for config in datastore_list {
1062 let rrd_prefix = format!("datastore/{}", config.name);
1063 let path = std::path::Path::new(&config.path);
1064 gather_disk_stats(disk_manager.clone(), path, &rrd_prefix);
1065 }
1066 }
1067 Err(err) => {
1068 eprintln!("read datastore config failed - {}", err);
1069 }
1070 }
1071 }
1072
1073 fn check_schedule(worker_type: &str, event_str: &str, id: &str) -> bool {
1074 let event: CalendarEvent = match event_str.parse() {
1075 Ok(event) => event,
1076 Err(err) => {
1077 eprintln!("unable to parse schedule '{}' - {}", event_str, err);
1078 return false;
1079 }
1080 };
1081
1082 let last = match jobstate::last_run_time(worker_type, id) {
1083 Ok(time) => time,
1084 Err(err) => {
1085 eprintln!(
1086 "could not get last run time of {} {}: {}",
1087 worker_type, id, err
1088 );
1089 return false;
1090 }
1091 };
1092
1093 let next = match event.compute_next_event(last) {
1094 Ok(Some(next)) => next,
1095 Ok(None) => return false,
1096 Err(err) => {
1097 eprintln!("compute_next_event for '{}' failed - {}", event_str, err);
1098 return false;
1099 }
1100 };
1101
1102 let now = proxmox_time::epoch_i64();
1103 next <= now
1104 }
1105
1106 fn gather_disk_stats(disk_manager: Arc<DiskManage>, path: &Path, rrd_prefix: &str) {
1107 match proxmox_backup::tools::disks::disk_usage(path) {
1108 Ok(status) => {
1109 let rrd_key = format!("{}/total", rrd_prefix);
1110 rrd_update_gauge(&rrd_key, status.total as f64);
1111 let rrd_key = format!("{}/used", rrd_prefix);
1112 rrd_update_gauge(&rrd_key, status.used as f64);
1113 }
1114 Err(err) => {
1115 eprintln!("read disk_usage on {:?} failed - {}", path, err);
1116 }
1117 }
1118
1119 match disk_manager.find_mounted_device(path) {
1120 Ok(None) => {}
1121 Ok(Some((fs_type, device, source))) => {
1122 let mut device_stat = None;
1123 match (fs_type.as_str(), source) {
1124 ("zfs", Some(source)) => match source.into_string() {
1125 Ok(dataset) => match zfs_dataset_stats(&dataset) {
1126 Ok(stat) => device_stat = Some(stat),
1127 Err(err) => eprintln!("zfs_dataset_stats({:?}) failed - {}", dataset, err),
1128 },
1129 Err(source) => {
1130 eprintln!("zfs_pool_stats({:?}) failed - invalid characters", source)
1131 }
1132 },
1133 _ => {
1134 if let Ok(disk) = disk_manager.clone().disk_by_dev_num(device.into_dev_t()) {
1135 match disk.read_stat() {
1136 Ok(stat) => device_stat = stat,
1137 Err(err) => eprintln!("disk.read_stat {:?} failed - {}", path, err),
1138 }
1139 }
1140 }
1141 }
1142 if let Some(stat) = device_stat {
1143 let rrd_key = format!("{}/read_ios", rrd_prefix);
1144 rrd_update_derive(&rrd_key, stat.read_ios as f64);
1145 let rrd_key = format!("{}/read_bytes", rrd_prefix);
1146 rrd_update_derive(&rrd_key, (stat.read_sectors * 512) as f64);
1147
1148 let rrd_key = format!("{}/write_ios", rrd_prefix);
1149 rrd_update_derive(&rrd_key, stat.write_ios as f64);
1150 let rrd_key = format!("{}/write_bytes", rrd_prefix);
1151 rrd_update_derive(&rrd_key, (stat.write_sectors * 512) as f64);
1152
1153 let rrd_key = format!("{}/io_ticks", rrd_prefix);
1154 rrd_update_derive(&rrd_key, (stat.io_ticks as f64) / 1000.0);
1155 }
1156 }
1157 Err(err) => {
1158 eprintln!("find_mounted_device failed - {}", err);
1159 }
1160 }
1161 }
1162
1163 // Rate Limiter lookup
1164
1165 // Test WITH
1166 // proxmox-backup-client restore vm/201/2021-10-22T09:55:56Z drive-scsi0.img img1.img --repository localhost:store2
1167
1168 async fn run_traffic_control_updater() {
1169 loop {
1170 let delay_target = Instant::now() + Duration::from_secs(1);
1171
1172 {
1173 let mut cache = TRAFFIC_CONTROL_CACHE.lock().unwrap();
1174 cache.compute_current_rates();
1175 }
1176
1177 tokio::time::sleep_until(tokio::time::Instant::from_std(delay_target)).await;
1178 }
1179 }
1180
1181 fn lookup_rate_limiter(
1182 peer: std::net::SocketAddr,
1183 ) -> (
1184 Option<Arc<dyn ShareableRateLimit>>,
1185 Option<Arc<dyn ShareableRateLimit>>,
1186 ) {
1187 let mut cache = TRAFFIC_CONTROL_CACHE.lock().unwrap();
1188
1189 let now = proxmox_time::epoch_i64();
1190
1191 cache.reload(now);
1192
1193 let (_rule_name, read_limiter, write_limiter) = cache.lookup_rate_limiter(peer, now);
1194
1195 (read_limiter, write_limiter)
1196 }