]> git.proxmox.com Git - proxmox-backup.git/blob - src/bin/proxmox-backup-proxy.rs
134dc7b724b8be7423e70f4f2212be8ca9e8edce
[proxmox-backup.git] / src / bin / proxmox-backup-proxy.rs
1 use std::sync::{Mutex, Arc};
2 use std::path::{Path, PathBuf};
3 use std::os::unix::io::AsRawFd;
4 use std::future::Future;
5 use std::pin::Pin;
6
7 use anyhow::{bail, format_err, Error};
8 use futures::*;
9 use http::request::Parts;
10 use http::Response;
11 use hyper::{Body, StatusCode};
12 use hyper::header;
13 use url::form_urlencoded;
14
15 use openssl::ssl::{SslMethod, SslAcceptor, SslFiletype};
16 use tokio_stream::wrappers::ReceiverStream;
17 use serde_json::{json, Value};
18 use http::{Method, HeaderMap};
19
20 use proxmox::sys::linux::socket::set_tcp_keepalive;
21 use proxmox::tools::fs::CreateOptions;
22 use proxmox_lang::try_block;
23 use proxmox_router::{RpcEnvironment, RpcEnvironmentType, UserInformation};
24 use proxmox_http::client::{RateLimitedStream, ShareableRateLimit};
25 use proxmox_sys::{task_log, task_warn};
26 use proxmox_sys::logrotate::LogRotate;
27
28 use pbs_datastore::DataStore;
29
30 use proxmox_rest_server::{
31 rotate_task_log_archive, extract_cookie , AuthError, ApiConfig, RestServer, RestEnvironment,
32 ServerAdapter, WorkerTask, cleanup_old_tasks,
33 };
34
35 use proxmox_backup::rrd_cache::{
36 initialize_rrd_cache, rrd_update_gauge, rrd_update_derive, rrd_sync_journal,
37 };
38 use proxmox_backup::{
39 TRAFFIC_CONTROL_CACHE,
40 server::{
41 auth::check_pbs_auth,
42 jobstate::{
43 self,
44 Job,
45 },
46 },
47 };
48
49 use pbs_buildcfg::configdir;
50 use proxmox_time::{compute_next_event, parse_calendar_event};
51
52 use pbs_api_types::{
53 Authid, TapeBackupJobConfig, VerificationJobConfig, SyncJobConfig, DataStoreConfig,
54 PruneOptions,
55 };
56
57 use proxmox_rest_server::daemon;
58
59 use proxmox_backup::server;
60 use proxmox_backup::auth_helpers::*;
61 use proxmox_backup::tools::{
62 PROXMOX_BACKUP_TCP_KEEPALIVE_TIME,
63 disks::{
64 DiskManage,
65 zfs_pool_stats,
66 get_pool_from_dataset,
67 },
68 };
69
70
71 use proxmox_backup::api2::pull::do_sync_job;
72 use proxmox_backup::api2::tape::backup::do_tape_backup_job;
73 use proxmox_backup::server::do_verification_job;
74 use proxmox_backup::server::do_prune_job;
75
76 fn main() -> Result<(), Error> {
77 proxmox_backup::tools::setup_safe_path_env();
78
79 let backup_uid = pbs_config::backup_user()?.uid;
80 let backup_gid = pbs_config::backup_group()?.gid;
81 let running_uid = nix::unistd::Uid::effective();
82 let running_gid = nix::unistd::Gid::effective();
83
84 if running_uid != backup_uid || running_gid != backup_gid {
85 bail!("proxy not running as backup user or group (got uid {} gid {})", running_uid, running_gid);
86 }
87
88 pbs_runtime::main(run())
89 }
90
91
92 struct ProxmoxBackupProxyAdapter;
93
94 impl ServerAdapter for ProxmoxBackupProxyAdapter {
95
96 fn get_index(
97 &self,
98 env: RestEnvironment,
99 parts: Parts,
100 ) -> Pin<Box<dyn Future<Output = Response<Body>> + Send>> {
101 Box::pin(get_index_future(env, parts))
102 }
103
104 fn check_auth<'a>(
105 &'a self,
106 headers: &'a HeaderMap,
107 method: &'a Method,
108 ) -> Pin<Box<dyn Future<Output = Result<(String, Box<dyn UserInformation + Sync + Send>), AuthError>> + Send + 'a>> {
109 Box::pin(async move {
110 check_pbs_auth(headers, method).await
111 })
112 }
113 }
114
115 fn extract_lang_header(headers: &http::HeaderMap) -> Option<String> {
116 if let Some(Ok(cookie)) = headers.get("COOKIE").map(|v| v.to_str()) {
117 return extract_cookie(cookie, "PBSLangCookie");
118 }
119 None
120 }
121
122 async fn get_index_future(
123 env: RestEnvironment,
124 parts: Parts,
125 ) -> Response<Body> {
126
127 let auth_id = env.get_auth_id();
128 let api = env.api_config();
129 let language = extract_lang_header(&parts.headers);
130
131 // fixme: make all IO async
132
133 let (userid, csrf_token) = match auth_id {
134 Some(auth_id) => {
135 let auth_id = auth_id.parse::<Authid>();
136 match auth_id {
137 Ok(auth_id) if !auth_id.is_token() => {
138 let userid = auth_id.user().clone();
139 let new_csrf_token = assemble_csrf_prevention_token(csrf_secret(), &userid);
140 (Some(userid), Some(new_csrf_token))
141 }
142 _ => (None, None)
143 }
144 }
145 None => (None, None),
146 };
147
148 let nodename = proxmox::tools::nodename();
149 let user = userid.as_ref().map(|u| u.as_str()).unwrap_or("");
150
151 let csrf_token = csrf_token.unwrap_or_else(|| String::from(""));
152
153 let mut debug = false;
154 let mut template_file = "index";
155
156 if let Some(query_str) = parts.uri.query() {
157 for (k, v) in form_urlencoded::parse(query_str.as_bytes()).into_owned() {
158 if k == "debug" && v != "0" && v != "false" {
159 debug = true;
160 } else if k == "console" {
161 template_file = "console";
162 }
163 }
164 }
165
166 let mut lang = String::from("");
167 if let Some(language) = language {
168 if Path::new(&format!("/usr/share/pbs-i18n/pbs-lang-{}.js", language)).exists() {
169 lang = language;
170 }
171 }
172
173 let data = json!({
174 "NodeName": nodename,
175 "UserName": user,
176 "CSRFPreventionToken": csrf_token,
177 "language": lang,
178 "debug": debug,
179 });
180
181 let (ct, index) = match api.render_template(template_file, &data) {
182 Ok(index) => ("text/html", index),
183 Err(err) => ("text/plain", format!("Error rendering template: {}", err)),
184 };
185
186 let mut resp = Response::builder()
187 .status(StatusCode::OK)
188 .header(header::CONTENT_TYPE, ct)
189 .body(index.into())
190 .unwrap();
191
192 if let Some(userid) = userid {
193 resp.extensions_mut().insert(Authid::from((userid, None)));
194 }
195
196 resp
197 }
198
199 async fn run() -> Result<(), Error> {
200 if let Err(err) = syslog::init(
201 syslog::Facility::LOG_DAEMON,
202 log::LevelFilter::Info,
203 Some("proxmox-backup-proxy")) {
204 bail!("unable to inititialize syslog - {}", err);
205 }
206
207 // Note: To debug early connection error use
208 // PROXMOX_DEBUG=1 ./target/release/proxmox-backup-proxy
209 let debug = std::env::var("PROXMOX_DEBUG").is_ok();
210
211 let _ = public_auth_key(); // load with lazy_static
212 let _ = csrf_secret(); // load with lazy_static
213
214 let rrd_cache = initialize_rrd_cache()?;
215 rrd_cache.apply_journal()?;
216
217 let mut config = ApiConfig::new(
218 pbs_buildcfg::JS_DIR,
219 &proxmox_backup::api2::ROUTER,
220 RpcEnvironmentType::PUBLIC,
221 ProxmoxBackupProxyAdapter,
222 )?;
223
224 config.add_alias("novnc", "/usr/share/novnc-pve");
225 config.add_alias("extjs", "/usr/share/javascript/extjs");
226 config.add_alias("qrcodejs", "/usr/share/javascript/qrcodejs");
227 config.add_alias("fontawesome", "/usr/share/fonts-font-awesome");
228 config.add_alias("xtermjs", "/usr/share/pve-xtermjs");
229 config.add_alias("locale", "/usr/share/pbs-i18n");
230 config.add_alias("widgettoolkit", "/usr/share/javascript/proxmox-widget-toolkit");
231 config.add_alias("docs", "/usr/share/doc/proxmox-backup/html");
232
233 let mut indexpath = PathBuf::from(pbs_buildcfg::JS_DIR);
234 indexpath.push("index.hbs");
235 config.register_template("index", &indexpath)?;
236 config.register_template("console", "/usr/share/pve-xtermjs/index.html.hbs")?;
237
238 let backup_user = pbs_config::backup_user()?;
239 let mut commando_sock = proxmox_rest_server::CommandSocket::new(proxmox_rest_server::our_ctrl_sock(), backup_user.gid);
240
241 let dir_opts = CreateOptions::new().owner(backup_user.uid).group(backup_user.gid);
242 let file_opts = CreateOptions::new().owner(backup_user.uid).group(backup_user.gid);
243
244 config.enable_access_log(
245 pbs_buildcfg::API_ACCESS_LOG_FN,
246 Some(dir_opts.clone()),
247 Some(file_opts.clone()),
248 &mut commando_sock,
249 )?;
250
251 config.enable_auth_log(
252 pbs_buildcfg::API_AUTH_LOG_FN,
253 Some(dir_opts.clone()),
254 Some(file_opts.clone()),
255 &mut commando_sock,
256 )?;
257
258 let rest_server = RestServer::new(config);
259 proxmox_rest_server::init_worker_tasks(pbs_buildcfg::PROXMOX_BACKUP_LOG_DIR_M!().into(), file_opts.clone())?;
260
261 //openssl req -x509 -newkey rsa:4096 -keyout /etc/proxmox-backup/proxy.key -out /etc/proxmox-backup/proxy.pem -nodes
262
263 // we build the initial acceptor here as we cannot start if this fails
264 let acceptor = make_tls_acceptor()?;
265 let acceptor = Arc::new(Mutex::new(acceptor));
266
267 // to renew the acceptor we just add a command-socket handler
268 commando_sock.register_command(
269 "reload-certificate".to_string(),
270 {
271 let acceptor = Arc::clone(&acceptor);
272 move |_value| -> Result<_, Error> {
273 log::info!("reloading certificate");
274 match make_tls_acceptor() {
275 Err(err) => log::error!("error reloading certificate: {}", err),
276 Ok(new_acceptor) => {
277 let mut guard = acceptor.lock().unwrap();
278 *guard = new_acceptor;
279 }
280 }
281 Ok(Value::Null)
282 }
283 },
284 )?;
285
286 // to remove references for not configured datastores
287 commando_sock.register_command(
288 "datastore-removed".to_string(),
289 |_value| {
290 if let Err(err) = DataStore::remove_unused_datastores() {
291 log::error!("could not refresh datastores: {}", err);
292 }
293 Ok(Value::Null)
294 }
295 )?;
296
297 let server = daemon::create_daemon(
298 ([0,0,0,0,0,0,0,0], 8007).into(),
299 move |listener| {
300
301 let connections = accept_connections(listener, acceptor, debug);
302 let connections = hyper::server::accept::from_stream(ReceiverStream::new(connections));
303
304 Ok(async {
305 daemon::systemd_notify(daemon::SystemdNotify::Ready)?;
306
307 hyper::Server::builder(connections)
308 .serve(rest_server)
309 .with_graceful_shutdown(proxmox_rest_server::shutdown_future())
310 .map_err(Error::from)
311 .await
312 })
313 },
314 );
315
316 proxmox_rest_server::write_pid(pbs_buildcfg::PROXMOX_BACKUP_PROXY_PID_FN)?;
317
318 let init_result: Result<(), Error> = try_block!({
319 proxmox_rest_server::register_task_control_commands(&mut commando_sock)?;
320 commando_sock.spawn()?;
321 proxmox_rest_server::catch_shutdown_signal()?;
322 proxmox_rest_server::catch_reload_signal()?;
323 Ok(())
324 });
325
326 if let Err(err) = init_result {
327 bail!("unable to start daemon - {}", err);
328 }
329
330 start_task_scheduler();
331 start_stat_generator();
332 start_traffic_control_updater();
333
334 server.await?;
335 log::info!("server shutting down, waiting for active workers to complete");
336 proxmox_rest_server::last_worker_future().await?;
337 log::info!("done - exit server");
338
339 Ok(())
340 }
341
342 fn make_tls_acceptor() -> Result<SslAcceptor, Error> {
343 let key_path = configdir!("/proxy.key");
344 let cert_path = configdir!("/proxy.pem");
345
346 let mut acceptor = SslAcceptor::mozilla_intermediate_v5(SslMethod::tls()).unwrap();
347 acceptor.set_private_key_file(key_path, SslFiletype::PEM)
348 .map_err(|err| format_err!("unable to read proxy key {} - {}", key_path, err))?;
349 acceptor.set_certificate_chain_file(cert_path)
350 .map_err(|err| format_err!("unable to read proxy cert {} - {}", cert_path, err))?;
351 acceptor.check_private_key().unwrap();
352
353 Ok(acceptor.build())
354 }
355
356 type ClientStreamResult =
357 Result<std::pin::Pin<Box<tokio_openssl::SslStream<RateLimitedStream<tokio::net::TcpStream>>>>, Error>;
358 const MAX_PENDING_ACCEPTS: usize = 1024;
359
360 fn accept_connections(
361 listener: tokio::net::TcpListener,
362 acceptor: Arc<Mutex<openssl::ssl::SslAcceptor>>,
363 debug: bool,
364 ) -> tokio::sync::mpsc::Receiver<ClientStreamResult> {
365
366 let (sender, receiver) = tokio::sync::mpsc::channel(MAX_PENDING_ACCEPTS);
367
368 tokio::spawn(accept_connection(listener, acceptor, debug, sender));
369
370 receiver
371 }
372
373 async fn accept_connection(
374 listener: tokio::net::TcpListener,
375 acceptor: Arc<Mutex<openssl::ssl::SslAcceptor>>,
376 debug: bool,
377 sender: tokio::sync::mpsc::Sender<ClientStreamResult>,
378 ) {
379 let accept_counter = Arc::new(());
380
381 loop {
382 let (sock, _addr) = match listener.accept().await {
383 Ok(conn) => conn,
384 Err(err) => {
385 eprintln!("error accepting tcp connection: {}", err);
386 continue;
387 }
388 };
389
390 sock.set_nodelay(true).unwrap();
391 let _ = set_tcp_keepalive(sock.as_raw_fd(), PROXMOX_BACKUP_TCP_KEEPALIVE_TIME);
392
393 let peer = sock.peer_addr().ok();
394 let sock = RateLimitedStream::with_limiter_update_cb(sock, move || lookup_rate_limiter(peer));
395
396 let ssl = { // limit acceptor_guard scope
397 // Acceptor can be reloaded using the command socket "reload-certificate" command
398 let acceptor_guard = acceptor.lock().unwrap();
399
400 match openssl::ssl::Ssl::new(acceptor_guard.context()) {
401 Ok(ssl) => ssl,
402 Err(err) => {
403 eprintln!("failed to create Ssl object from Acceptor context - {}", err);
404 continue;
405 },
406 }
407 };
408
409 let stream = match tokio_openssl::SslStream::new(ssl, sock) {
410 Ok(stream) => stream,
411 Err(err) => {
412 eprintln!("failed to create SslStream using ssl and connection socket - {}", err);
413 continue;
414 },
415 };
416
417 let mut stream = Box::pin(stream);
418 let sender = sender.clone();
419
420 if Arc::strong_count(&accept_counter) > MAX_PENDING_ACCEPTS {
421 eprintln!("connection rejected - to many open connections");
422 continue;
423 }
424
425 let accept_counter = Arc::clone(&accept_counter);
426 tokio::spawn(async move {
427 let accept_future = tokio::time::timeout(
428 Duration::new(10, 0), stream.as_mut().accept());
429
430 let result = accept_future.await;
431
432 match result {
433 Ok(Ok(())) => {
434 if sender.send(Ok(stream)).await.is_err() && debug {
435 eprintln!("detect closed connection channel");
436 }
437 }
438 Ok(Err(err)) => {
439 if debug {
440 eprintln!("https handshake failed - {}", err);
441 }
442 }
443 Err(_) => {
444 if debug {
445 eprintln!("https handshake timeout");
446 }
447 }
448 }
449
450 drop(accept_counter); // decrease reference count
451 });
452 }
453 }
454
455 fn start_stat_generator() {
456 let abort_future = proxmox_rest_server::shutdown_future();
457 let future = Box::pin(run_stat_generator());
458 let task = futures::future::select(future, abort_future);
459 tokio::spawn(task.map(|_| ()));
460 }
461
462 fn start_task_scheduler() {
463 let abort_future = proxmox_rest_server::shutdown_future();
464 let future = Box::pin(run_task_scheduler());
465 let task = futures::future::select(future, abort_future);
466 tokio::spawn(task.map(|_| ()));
467 }
468
469 fn start_traffic_control_updater() {
470 let abort_future = proxmox_rest_server::shutdown_future();
471 let future = Box::pin(run_traffic_control_updater());
472 let task = futures::future::select(future, abort_future);
473 tokio::spawn(task.map(|_| ()));
474 }
475
476 use std::time::{SystemTime, Instant, Duration, UNIX_EPOCH};
477
478 fn next_minute() -> Result<Instant, Error> {
479 let now = SystemTime::now();
480 let epoch_now = now.duration_since(UNIX_EPOCH)?;
481 let epoch_next = Duration::from_secs((epoch_now.as_secs()/60 + 1)*60);
482 Ok(Instant::now() + epoch_next - epoch_now)
483 }
484
485 async fn run_task_scheduler() {
486
487 let mut count: usize = 0;
488
489 loop {
490 count += 1;
491
492 let delay_target = match next_minute() { // try to run very minute
493 Ok(d) => d,
494 Err(err) => {
495 eprintln!("task scheduler: compute next minute failed - {}", err);
496 tokio::time::sleep_until(tokio::time::Instant::from_std(Instant::now() + Duration::from_secs(60))).await;
497 continue;
498 }
499 };
500
501 if count > 2 { // wait 1..2 minutes before starting
502 match schedule_tasks().catch_unwind().await {
503 Err(panic) => {
504 match panic.downcast::<&str>() {
505 Ok(msg) => {
506 eprintln!("task scheduler panic: {}", msg);
507 }
508 Err(_) => {
509 eprintln!("task scheduler panic - unknown type");
510 }
511 }
512 }
513 Ok(Err(err)) => {
514 eprintln!("task scheduler failed - {:?}", err);
515 }
516 Ok(Ok(_)) => {}
517 }
518 }
519
520 tokio::time::sleep_until(tokio::time::Instant::from_std(delay_target)).await;
521 }
522 }
523
524 async fn schedule_tasks() -> Result<(), Error> {
525
526 schedule_datastore_garbage_collection().await;
527 schedule_datastore_prune().await;
528 schedule_datastore_sync_jobs().await;
529 schedule_datastore_verify_jobs().await;
530 schedule_tape_backup_jobs().await;
531 schedule_task_log_rotate().await;
532
533 Ok(())
534 }
535
536 async fn schedule_datastore_garbage_collection() {
537
538 let config = match pbs_config::datastore::config() {
539 Err(err) => {
540 eprintln!("unable to read datastore config - {}", err);
541 return;
542 }
543 Ok((config, _digest)) => config,
544 };
545
546 for (store, (_, store_config)) in config.sections {
547 let datastore = match DataStore::lookup_datastore(&store) {
548 Ok(datastore) => datastore,
549 Err(err) => {
550 eprintln!("lookup_datastore failed - {}", err);
551 continue;
552 }
553 };
554
555 let store_config: DataStoreConfig = match serde_json::from_value(store_config) {
556 Ok(c) => c,
557 Err(err) => {
558 eprintln!("datastore config from_value failed - {}", err);
559 continue;
560 }
561 };
562
563 let event_str = match store_config.gc_schedule {
564 Some(event_str) => event_str,
565 None => continue,
566 };
567
568 let event = match parse_calendar_event(&event_str) {
569 Ok(event) => event,
570 Err(err) => {
571 eprintln!("unable to parse schedule '{}' - {}", event_str, err);
572 continue;
573 }
574 };
575
576 if datastore.garbage_collection_running() { continue; }
577
578 let worker_type = "garbage_collection";
579
580 let last = match jobstate::last_run_time(worker_type, &store) {
581 Ok(time) => time,
582 Err(err) => {
583 eprintln!("could not get last run time of {} {}: {}", worker_type, store, err);
584 continue;
585 }
586 };
587
588 let next = match compute_next_event(&event, last, false) {
589 Ok(Some(next)) => next,
590 Ok(None) => continue,
591 Err(err) => {
592 eprintln!("compute_next_event for '{}' failed - {}", event_str, err);
593 continue;
594 }
595 };
596
597 let now = proxmox_time::epoch_i64();
598
599 if next > now { continue; }
600
601 let job = match Job::new(worker_type, &store) {
602 Ok(job) => job,
603 Err(_) => continue, // could not get lock
604 };
605
606 let auth_id = Authid::root_auth_id();
607
608 if let Err(err) = crate::server::do_garbage_collection_job(job, datastore, auth_id, Some(event_str), false) {
609 eprintln!("unable to start garbage collection job on datastore {} - {}", store, err);
610 }
611 }
612 }
613
614 async fn schedule_datastore_prune() {
615
616 let config = match pbs_config::datastore::config() {
617 Err(err) => {
618 eprintln!("unable to read datastore config - {}", err);
619 return;
620 }
621 Ok((config, _digest)) => config,
622 };
623
624 for (store, (_, store_config)) in config.sections {
625
626 let store_config: DataStoreConfig = match serde_json::from_value(store_config) {
627 Ok(c) => c,
628 Err(err) => {
629 eprintln!("datastore '{}' config from_value failed - {}", store, err);
630 continue;
631 }
632 };
633
634 let event_str = match store_config.prune_schedule {
635 Some(event_str) => event_str,
636 None => continue,
637 };
638
639 let prune_options = PruneOptions {
640 keep_last: store_config.keep_last,
641 keep_hourly: store_config.keep_hourly,
642 keep_daily: store_config.keep_daily,
643 keep_weekly: store_config.keep_weekly,
644 keep_monthly: store_config.keep_monthly,
645 keep_yearly: store_config.keep_yearly,
646 };
647
648 if !pbs_datastore::prune::keeps_something(&prune_options) { // no prune settings - keep all
649 continue;
650 }
651
652 let worker_type = "prune";
653 if check_schedule(worker_type, &event_str, &store) {
654 let job = match Job::new(worker_type, &store) {
655 Ok(job) => job,
656 Err(_) => continue, // could not get lock
657 };
658
659 let auth_id = Authid::root_auth_id().clone();
660 if let Err(err) = do_prune_job(job, prune_options, store.clone(), &auth_id, Some(event_str)) {
661 eprintln!("unable to start datastore prune job {} - {}", &store, err);
662 }
663 };
664 }
665 }
666
667 async fn schedule_datastore_sync_jobs() {
668
669
670 let config = match pbs_config::sync::config() {
671 Err(err) => {
672 eprintln!("unable to read sync job config - {}", err);
673 return;
674 }
675 Ok((config, _digest)) => config,
676 };
677
678 for (job_id, (_, job_config)) in config.sections {
679 let job_config: SyncJobConfig = match serde_json::from_value(job_config) {
680 Ok(c) => c,
681 Err(err) => {
682 eprintln!("sync job config from_value failed - {}", err);
683 continue;
684 }
685 };
686
687 let event_str = match job_config.schedule {
688 Some(ref event_str) => event_str.clone(),
689 None => continue,
690 };
691
692 let worker_type = "syncjob";
693 if check_schedule(worker_type, &event_str, &job_id) {
694 let job = match Job::new(worker_type, &job_id) {
695 Ok(job) => job,
696 Err(_) => continue, // could not get lock
697 };
698
699 let auth_id = Authid::root_auth_id().clone();
700 if let Err(err) = do_sync_job(job, job_config, &auth_id, Some(event_str), false) {
701 eprintln!("unable to start datastore sync job {} - {}", &job_id, err);
702 }
703 };
704 }
705 }
706
707 async fn schedule_datastore_verify_jobs() {
708
709 let config = match pbs_config::verify::config() {
710 Err(err) => {
711 eprintln!("unable to read verification job config - {}", err);
712 return;
713 }
714 Ok((config, _digest)) => config,
715 };
716 for (job_id, (_, job_config)) in config.sections {
717 let job_config: VerificationJobConfig = match serde_json::from_value(job_config) {
718 Ok(c) => c,
719 Err(err) => {
720 eprintln!("verification job config from_value failed - {}", err);
721 continue;
722 }
723 };
724 let event_str = match job_config.schedule {
725 Some(ref event_str) => event_str.clone(),
726 None => continue,
727 };
728
729 let worker_type = "verificationjob";
730 let auth_id = Authid::root_auth_id().clone();
731 if check_schedule(worker_type, &event_str, &job_id) {
732 let job = match Job::new(&worker_type, &job_id) {
733 Ok(job) => job,
734 Err(_) => continue, // could not get lock
735 };
736 if let Err(err) = do_verification_job(job, job_config, &auth_id, Some(event_str), false) {
737 eprintln!("unable to start datastore verification job {} - {}", &job_id, err);
738 }
739 };
740 }
741 }
742
743 async fn schedule_tape_backup_jobs() {
744
745 let config = match pbs_config::tape_job::config() {
746 Err(err) => {
747 eprintln!("unable to read tape job config - {}", err);
748 return;
749 }
750 Ok((config, _digest)) => config,
751 };
752 for (job_id, (_, job_config)) in config.sections {
753 let job_config: TapeBackupJobConfig = match serde_json::from_value(job_config) {
754 Ok(c) => c,
755 Err(err) => {
756 eprintln!("tape backup job config from_value failed - {}", err);
757 continue;
758 }
759 };
760 let event_str = match job_config.schedule {
761 Some(ref event_str) => event_str.clone(),
762 None => continue,
763 };
764
765 let worker_type = "tape-backup-job";
766 let auth_id = Authid::root_auth_id().clone();
767 if check_schedule(worker_type, &event_str, &job_id) {
768 let job = match Job::new(&worker_type, &job_id) {
769 Ok(job) => job,
770 Err(_) => continue, // could not get lock
771 };
772 if let Err(err) = do_tape_backup_job(job, job_config.setup, &auth_id, Some(event_str), false) {
773 eprintln!("unable to start tape backup job {} - {}", &job_id, err);
774 }
775 };
776 }
777 }
778
779
780 async fn schedule_task_log_rotate() {
781
782 let worker_type = "logrotate";
783 let job_id = "access-log_and_task-archive";
784
785 // schedule daily at 00:00 like normal logrotate
786 let schedule = "00:00";
787
788 if !check_schedule(worker_type, schedule, job_id) {
789 // if we never ran the rotation, schedule instantly
790 match jobstate::JobState::load(worker_type, job_id) {
791 Ok(state) => match state {
792 jobstate::JobState::Created { .. } => {},
793 _ => return,
794 },
795 _ => return,
796 }
797 }
798
799 let mut job = match Job::new(worker_type, job_id) {
800 Ok(job) => job,
801 Err(_) => return, // could not get lock
802 };
803
804 if let Err(err) = WorkerTask::new_thread(
805 worker_type,
806 None,
807 Authid::root_auth_id().to_string(),
808 false,
809 move |worker| {
810 job.start(&worker.upid().to_string())?;
811 task_log!(worker, "starting task log rotation");
812
813 let result = try_block!({
814 let max_size = 512 * 1024 - 1; // an entry has ~ 100b, so > 5000 entries/file
815 let max_files = 20; // times twenty files gives > 100000 task entries
816
817 let user = pbs_config::backup_user()?;
818 let options = proxmox::tools::fs::CreateOptions::new()
819 .owner(user.uid)
820 .group(user.gid);
821
822 let has_rotated = rotate_task_log_archive(
823 max_size,
824 true,
825 Some(max_files),
826 Some(options.clone()),
827 )?;
828
829 if has_rotated {
830 task_log!(worker, "task log archive was rotated");
831 } else {
832 task_log!(worker, "task log archive was not rotated");
833 }
834
835 let max_size = 32 * 1024 * 1024 - 1;
836 let max_files = 14;
837
838
839 let mut logrotate = LogRotate::new(
840 pbs_buildcfg::API_ACCESS_LOG_FN,
841 true,
842 Some(max_files),
843 Some(options.clone()),
844 )?;
845
846 if logrotate.rotate(max_size)? {
847 println!("rotated access log, telling daemons to re-open log file");
848 pbs_runtime::block_on(command_reopen_access_logfiles())?;
849 task_log!(worker, "API access log was rotated");
850 } else {
851 task_log!(worker, "API access log was not rotated");
852 }
853
854 let mut logrotate = LogRotate::new(
855 pbs_buildcfg::API_AUTH_LOG_FN,
856 true,
857 Some(max_files),
858 Some(options),
859 )?;
860
861 if logrotate.rotate(max_size)? {
862 println!("rotated auth log, telling daemons to re-open log file");
863 pbs_runtime::block_on(command_reopen_auth_logfiles())?;
864 task_log!(worker, "API authentication log was rotated");
865 } else {
866 task_log!(worker, "API authentication log was not rotated");
867 }
868
869 if has_rotated {
870 task_log!(worker, "cleaning up old task logs");
871 if let Err(err) = cleanup_old_tasks(true) {
872 task_warn!(worker, "could not completely cleanup old tasks: {}", err);
873 }
874 }
875
876 Ok(())
877 });
878
879 let status = worker.create_state(&result);
880
881 if let Err(err) = job.finish(status) {
882 eprintln!("could not finish job state for {}: {}", worker_type, err);
883 }
884
885 result
886 },
887 ) {
888 eprintln!("unable to start task log rotation: {}", err);
889 }
890
891 }
892
893 async fn command_reopen_access_logfiles() -> Result<(), Error> {
894 // only care about the most recent daemon instance for each, proxy & api, as other older ones
895 // should not respond to new requests anyway, but only finish their current one and then exit.
896 let sock = proxmox_rest_server::our_ctrl_sock();
897 let f1 = proxmox_rest_server::send_raw_command(sock, "{\"command\":\"api-access-log-reopen\"}\n");
898
899 let pid = proxmox_rest_server::read_pid(pbs_buildcfg::PROXMOX_BACKUP_API_PID_FN)?;
900 let sock = proxmox_rest_server::ctrl_sock_from_pid(pid);
901 let f2 = proxmox_rest_server::send_raw_command(sock, "{\"command\":\"api-access-log-reopen\"}\n");
902
903 match futures::join!(f1, f2) {
904 (Err(e1), Err(e2)) => Err(format_err!("reopen commands failed, proxy: {}; api: {}", e1, e2)),
905 (Err(e1), Ok(_)) => Err(format_err!("reopen commands failed, proxy: {}", e1)),
906 (Ok(_), Err(e2)) => Err(format_err!("reopen commands failed, api: {}", e2)),
907 _ => Ok(()),
908 }
909 }
910
911 async fn command_reopen_auth_logfiles() -> Result<(), Error> {
912 // only care about the most recent daemon instance for each, proxy & api, as other older ones
913 // should not respond to new requests anyway, but only finish their current one and then exit.
914 let sock = proxmox_rest_server::our_ctrl_sock();
915 let f1 = proxmox_rest_server::send_raw_command(sock, "{\"command\":\"api-auth-log-reopen\"}\n");
916
917 let pid = proxmox_rest_server::read_pid(pbs_buildcfg::PROXMOX_BACKUP_API_PID_FN)?;
918 let sock = proxmox_rest_server::ctrl_sock_from_pid(pid);
919 let f2 = proxmox_rest_server::send_raw_command(sock, "{\"command\":\"api-auth-log-reopen\"}\n");
920
921 match futures::join!(f1, f2) {
922 (Err(e1), Err(e2)) => Err(format_err!("reopen commands failed, proxy: {}; api: {}", e1, e2)),
923 (Err(e1), Ok(_)) => Err(format_err!("reopen commands failed, proxy: {}", e1)),
924 (Ok(_), Err(e2)) => Err(format_err!("reopen commands failed, api: {}", e2)),
925 _ => Ok(()),
926 }
927 }
928
929 async fn run_stat_generator() {
930
931 loop {
932 let delay_target = Instant::now() + Duration::from_secs(10);
933
934 generate_host_stats().await;
935
936 rrd_sync_journal();
937
938 tokio::time::sleep_until(tokio::time::Instant::from_std(delay_target)).await;
939
940 }
941
942 }
943
944 async fn generate_host_stats() {
945 match tokio::task::spawn_blocking(generate_host_stats_sync).await {
946 Ok(()) => (),
947 Err(err) => log::error!("generate_host_stats paniced: {}", err),
948 }
949 }
950
951 fn generate_host_stats_sync() {
952 use proxmox::sys::linux::procfs::{
953 read_meminfo, read_proc_stat, read_proc_net_dev, read_loadavg};
954
955 match read_proc_stat() {
956 Ok(stat) => {
957 rrd_update_gauge("host/cpu", stat.cpu);
958 rrd_update_gauge("host/iowait", stat.iowait_percent);
959 }
960 Err(err) => {
961 eprintln!("read_proc_stat failed - {}", err);
962 }
963 }
964
965 match read_meminfo() {
966 Ok(meminfo) => {
967 rrd_update_gauge("host/memtotal", meminfo.memtotal as f64);
968 rrd_update_gauge("host/memused", meminfo.memused as f64);
969 rrd_update_gauge("host/swaptotal", meminfo.swaptotal as f64);
970 rrd_update_gauge("host/swapused", meminfo.swapused as f64);
971 }
972 Err(err) => {
973 eprintln!("read_meminfo failed - {}", err);
974 }
975 }
976
977 match read_proc_net_dev() {
978 Ok(netdev) => {
979 use pbs_config::network::is_physical_nic;
980 let mut netin = 0;
981 let mut netout = 0;
982 for item in netdev {
983 if !is_physical_nic(&item.device) { continue; }
984 netin += item.receive;
985 netout += item.send;
986 }
987 rrd_update_derive("host/netin", netin as f64);
988 rrd_update_derive("host/netout", netout as f64);
989 }
990 Err(err) => {
991 eprintln!("read_prox_net_dev failed - {}", err);
992 }
993 }
994
995 match read_loadavg() {
996 Ok(loadavg) => {
997 rrd_update_gauge("host/loadavg", loadavg.0 as f64);
998 }
999 Err(err) => {
1000 eprintln!("read_loadavg failed - {}", err);
1001 }
1002 }
1003
1004 let disk_manager = DiskManage::new();
1005
1006 gather_disk_stats(disk_manager.clone(), Path::new("/"), "host");
1007
1008 match pbs_config::datastore::config() {
1009 Ok((config, _)) => {
1010 let datastore_list: Vec<DataStoreConfig> =
1011 config.convert_to_typed_array("datastore").unwrap_or_default();
1012
1013 for config in datastore_list {
1014
1015 let rrd_prefix = format!("datastore/{}", config.name);
1016 let path = std::path::Path::new(&config.path);
1017 gather_disk_stats(disk_manager.clone(), path, &rrd_prefix);
1018 }
1019 }
1020 Err(err) => {
1021 eprintln!("read datastore config failed - {}", err);
1022 }
1023 }
1024 }
1025
1026 fn check_schedule(worker_type: &str, event_str: &str, id: &str) -> bool {
1027 let event = match parse_calendar_event(event_str) {
1028 Ok(event) => event,
1029 Err(err) => {
1030 eprintln!("unable to parse schedule '{}' - {}", event_str, err);
1031 return false;
1032 }
1033 };
1034
1035 let last = match jobstate::last_run_time(worker_type, &id) {
1036 Ok(time) => time,
1037 Err(err) => {
1038 eprintln!("could not get last run time of {} {}: {}", worker_type, id, err);
1039 return false;
1040 }
1041 };
1042
1043 let next = match compute_next_event(&event, last, false) {
1044 Ok(Some(next)) => next,
1045 Ok(None) => return false,
1046 Err(err) => {
1047 eprintln!("compute_next_event for '{}' failed - {}", event_str, err);
1048 return false;
1049 }
1050 };
1051
1052 let now = proxmox_time::epoch_i64();
1053 next <= now
1054 }
1055
1056 fn gather_disk_stats(disk_manager: Arc<DiskManage>, path: &Path, rrd_prefix: &str) {
1057
1058 match proxmox_backup::tools::disks::disk_usage(path) {
1059 Ok(status) => {
1060 let rrd_key = format!("{}/total", rrd_prefix);
1061 rrd_update_gauge(&rrd_key, status.total as f64);
1062 let rrd_key = format!("{}/used", rrd_prefix);
1063 rrd_update_gauge(&rrd_key, status.used as f64);
1064 }
1065 Err(err) => {
1066 eprintln!("read disk_usage on {:?} failed - {}", path, err);
1067 }
1068 }
1069
1070 match disk_manager.find_mounted_device(path) {
1071 Ok(None) => {},
1072 Ok(Some((fs_type, device, source))) => {
1073 let mut device_stat = None;
1074 match fs_type.as_str() {
1075 "zfs" => {
1076 if let Some(source) = source {
1077 let pool = get_pool_from_dataset(&source).unwrap_or(&source);
1078 match zfs_pool_stats(pool) {
1079 Ok(stat) => device_stat = stat,
1080 Err(err) => eprintln!("zfs_pool_stats({:?}) failed - {}", pool, err),
1081 }
1082 }
1083 }
1084 _ => {
1085 if let Ok(disk) = disk_manager.clone().disk_by_dev_num(device.into_dev_t()) {
1086 match disk.read_stat() {
1087 Ok(stat) => device_stat = stat,
1088 Err(err) => eprintln!("disk.read_stat {:?} failed - {}", path, err),
1089 }
1090 }
1091 }
1092 }
1093 if let Some(stat) = device_stat {
1094 let rrd_key = format!("{}/read_ios", rrd_prefix);
1095 rrd_update_derive(&rrd_key, stat.read_ios as f64);
1096 let rrd_key = format!("{}/read_bytes", rrd_prefix);
1097 rrd_update_derive(&rrd_key, (stat.read_sectors*512) as f64);
1098
1099 let rrd_key = format!("{}/write_ios", rrd_prefix);
1100 rrd_update_derive(&rrd_key, stat.write_ios as f64);
1101 let rrd_key = format!("{}/write_bytes", rrd_prefix);
1102 rrd_update_derive(&rrd_key, (stat.write_sectors*512) as f64);
1103
1104 let rrd_key = format!("{}/io_ticks", rrd_prefix);
1105 rrd_update_derive(&rrd_key, (stat.io_ticks as f64)/1000.0);
1106 }
1107 }
1108 Err(err) => {
1109 eprintln!("find_mounted_device failed - {}", err);
1110 }
1111 }
1112 }
1113
1114 // Rate Limiter lookup
1115
1116 // Test WITH
1117 // proxmox-backup-client restore vm/201/2021-10-22T09:55:56Z drive-scsi0.img img1.img --repository localhost:store2
1118
1119 async fn run_traffic_control_updater() {
1120
1121 loop {
1122 let delay_target = Instant::now() + Duration::from_secs(1);
1123
1124 {
1125 let mut cache = TRAFFIC_CONTROL_CACHE.lock().unwrap();
1126 cache.compute_current_rates();
1127 }
1128
1129 tokio::time::sleep_until(tokio::time::Instant::from_std(delay_target)).await;
1130 }
1131
1132 }
1133
1134 fn lookup_rate_limiter(
1135 peer: Option<std::net::SocketAddr>,
1136 ) -> (Option<Arc<dyn ShareableRateLimit>>, Option<Arc<dyn ShareableRateLimit>>) {
1137 let mut cache = TRAFFIC_CONTROL_CACHE.lock().unwrap();
1138
1139 let now = proxmox_time::epoch_i64();
1140
1141 cache.reload(now);
1142
1143 let (_rule_name, read_limiter, write_limiter) = cache.lookup_rate_limiter(peer, now);
1144
1145 (read_limiter, write_limiter)
1146 }