]>
Commit | Line | Data |
---|---|---|
1 | use std::sync::{Mutex, Arc}; | |
2 | use std::path::{Path, PathBuf}; | |
3 | use std::os::unix::io::AsRawFd; | |
4 | use std::future::Future; | |
5 | use std::pin::Pin; | |
6 | ||
7 | use anyhow::{bail, format_err, Error}; | |
8 | use futures::*; | |
9 | use http::request::Parts; | |
10 | use http::Response; | |
11 | use hyper::{Body, StatusCode}; | |
12 | use hyper::header; | |
13 | use url::form_urlencoded; | |
14 | ||
15 | use openssl::ssl::{SslMethod, SslAcceptor, SslFiletype}; | |
16 | use tokio_stream::wrappers::ReceiverStream; | |
17 | use serde_json::{json, Value}; | |
18 | use http::{Method, HeaderMap}; | |
19 | ||
20 | use proxmox::sys::linux::socket::set_tcp_keepalive; | |
21 | use proxmox::tools::fs::CreateOptions; | |
22 | use proxmox_lang::try_block; | |
23 | use proxmox_router::{RpcEnvironment, RpcEnvironmentType, UserInformation}; | |
24 | use proxmox_http::client::{RateLimiter, RateLimitedStream}; | |
25 | ||
26 | use pbs_tools::{task_log, task_warn}; | |
27 | use pbs_datastore::DataStore; | |
28 | ||
29 | use proxmox_rest_server::{ | |
30 | rotate_task_log_archive, extract_cookie , AuthError, ApiConfig, RestServer, RestEnvironment, | |
31 | ServerAdapter, WorkerTask, cleanup_old_tasks, | |
32 | }; | |
33 | ||
34 | use proxmox_backup::rrd_cache::{ | |
35 | initialize_rrd_cache, rrd_update_gauge, rrd_update_derive, rrd_sync_journal, | |
36 | }; | |
37 | use proxmox_backup::{ | |
38 | server::{ | |
39 | auth::check_pbs_auth, | |
40 | jobstate::{ | |
41 | self, | |
42 | Job, | |
43 | }, | |
44 | }, | |
45 | }; | |
46 | ||
47 | use pbs_buildcfg::configdir; | |
48 | use proxmox_systemd::time::{compute_next_event, parse_calendar_event}; | |
49 | use pbs_tools::logrotate::LogRotate; | |
50 | ||
51 | use pbs_api_types::{ | |
52 | Authid, TapeBackupJobConfig, VerificationJobConfig, SyncJobConfig, DataStoreConfig, | |
53 | PruneOptions, | |
54 | }; | |
55 | ||
56 | use proxmox_rest_server::daemon; | |
57 | ||
58 | use proxmox_backup::server; | |
59 | use proxmox_backup::auth_helpers::*; | |
60 | use proxmox_backup::tools::{ | |
61 | PROXMOX_BACKUP_TCP_KEEPALIVE_TIME, | |
62 | disks::{ | |
63 | DiskManage, | |
64 | zfs_pool_stats, | |
65 | get_pool_from_dataset, | |
66 | }, | |
67 | }; | |
68 | ||
69 | ||
70 | use proxmox_backup::api2::pull::do_sync_job; | |
71 | use proxmox_backup::api2::tape::backup::do_tape_backup_job; | |
72 | use proxmox_backup::server::do_verification_job; | |
73 | use proxmox_backup::server::do_prune_job; | |
74 | use proxmox_backup::TrafficControlCache; | |
75 | ||
76 | fn main() -> Result<(), Error> { | |
77 | proxmox_backup::tools::setup_safe_path_env(); | |
78 | ||
79 | let backup_uid = pbs_config::backup_user()?.uid; | |
80 | let backup_gid = pbs_config::backup_group()?.gid; | |
81 | let running_uid = nix::unistd::Uid::effective(); | |
82 | let running_gid = nix::unistd::Gid::effective(); | |
83 | ||
84 | if running_uid != backup_uid || running_gid != backup_gid { | |
85 | bail!("proxy not running as backup user or group (got uid {} gid {})", running_uid, running_gid); | |
86 | } | |
87 | ||
88 | pbs_runtime::main(run()) | |
89 | } | |
90 | ||
91 | ||
92 | struct ProxmoxBackupProxyAdapter; | |
93 | ||
94 | impl ServerAdapter for ProxmoxBackupProxyAdapter { | |
95 | ||
96 | fn get_index( | |
97 | &self, | |
98 | env: RestEnvironment, | |
99 | parts: Parts, | |
100 | ) -> Pin<Box<dyn Future<Output = Response<Body>> + Send>> { | |
101 | Box::pin(get_index_future(env, parts)) | |
102 | } | |
103 | ||
104 | fn check_auth<'a>( | |
105 | &'a self, | |
106 | headers: &'a HeaderMap, | |
107 | method: &'a Method, | |
108 | ) -> Pin<Box<dyn Future<Output = Result<(String, Box<dyn UserInformation + Sync + Send>), AuthError>> + Send + 'a>> { | |
109 | Box::pin(async move { | |
110 | check_pbs_auth(headers, method).await | |
111 | }) | |
112 | } | |
113 | } | |
114 | ||
115 | fn extract_lang_header(headers: &http::HeaderMap) -> Option<String> { | |
116 | if let Some(Ok(cookie)) = headers.get("COOKIE").map(|v| v.to_str()) { | |
117 | return extract_cookie(cookie, "PBSLangCookie"); | |
118 | } | |
119 | None | |
120 | } | |
121 | ||
122 | async fn get_index_future( | |
123 | env: RestEnvironment, | |
124 | parts: Parts, | |
125 | ) -> Response<Body> { | |
126 | ||
127 | let auth_id = env.get_auth_id(); | |
128 | let api = env.api_config(); | |
129 | let language = extract_lang_header(&parts.headers); | |
130 | ||
131 | // fixme: make all IO async | |
132 | ||
133 | let (userid, csrf_token) = match auth_id { | |
134 | Some(auth_id) => { | |
135 | let auth_id = auth_id.parse::<Authid>(); | |
136 | match auth_id { | |
137 | Ok(auth_id) if !auth_id.is_token() => { | |
138 | let userid = auth_id.user().clone(); | |
139 | let new_csrf_token = assemble_csrf_prevention_token(csrf_secret(), &userid); | |
140 | (Some(userid), Some(new_csrf_token)) | |
141 | } | |
142 | _ => (None, None) | |
143 | } | |
144 | } | |
145 | None => (None, None), | |
146 | }; | |
147 | ||
148 | let nodename = proxmox::tools::nodename(); | |
149 | let user = userid.as_ref().map(|u| u.as_str()).unwrap_or(""); | |
150 | ||
151 | let csrf_token = csrf_token.unwrap_or_else(|| String::from("")); | |
152 | ||
153 | let mut debug = false; | |
154 | let mut template_file = "index"; | |
155 | ||
156 | if let Some(query_str) = parts.uri.query() { | |
157 | for (k, v) in form_urlencoded::parse(query_str.as_bytes()).into_owned() { | |
158 | if k == "debug" && v != "0" && v != "false" { | |
159 | debug = true; | |
160 | } else if k == "console" { | |
161 | template_file = "console"; | |
162 | } | |
163 | } | |
164 | } | |
165 | ||
166 | let mut lang = String::from(""); | |
167 | if let Some(language) = language { | |
168 | if Path::new(&format!("/usr/share/pbs-i18n/pbs-lang-{}.js", language)).exists() { | |
169 | lang = language; | |
170 | } | |
171 | } | |
172 | ||
173 | let data = json!({ | |
174 | "NodeName": nodename, | |
175 | "UserName": user, | |
176 | "CSRFPreventionToken": csrf_token, | |
177 | "language": lang, | |
178 | "debug": debug, | |
179 | }); | |
180 | ||
181 | let (ct, index) = match api.render_template(template_file, &data) { | |
182 | Ok(index) => ("text/html", index), | |
183 | Err(err) => ("text/plain", format!("Error rendering template: {}", err)), | |
184 | }; | |
185 | ||
186 | let mut resp = Response::builder() | |
187 | .status(StatusCode::OK) | |
188 | .header(header::CONTENT_TYPE, ct) | |
189 | .body(index.into()) | |
190 | .unwrap(); | |
191 | ||
192 | if let Some(userid) = userid { | |
193 | resp.extensions_mut().insert(Authid::from((userid, None))); | |
194 | } | |
195 | ||
196 | resp | |
197 | } | |
198 | ||
199 | async fn run() -> Result<(), Error> { | |
200 | if let Err(err) = syslog::init( | |
201 | syslog::Facility::LOG_DAEMON, | |
202 | log::LevelFilter::Info, | |
203 | Some("proxmox-backup-proxy")) { | |
204 | bail!("unable to inititialize syslog - {}", err); | |
205 | } | |
206 | ||
207 | // Note: To debug early connection error use | |
208 | // PROXMOX_DEBUG=1 ./target/release/proxmox-backup-proxy | |
209 | let debug = std::env::var("PROXMOX_DEBUG").is_ok(); | |
210 | ||
211 | let _ = public_auth_key(); // load with lazy_static | |
212 | let _ = csrf_secret(); // load with lazy_static | |
213 | ||
214 | let rrd_cache = initialize_rrd_cache()?; | |
215 | rrd_cache.apply_journal()?; | |
216 | ||
217 | let mut config = ApiConfig::new( | |
218 | pbs_buildcfg::JS_DIR, | |
219 | &proxmox_backup::api2::ROUTER, | |
220 | RpcEnvironmentType::PUBLIC, | |
221 | ProxmoxBackupProxyAdapter, | |
222 | )?; | |
223 | ||
224 | config.add_alias("novnc", "/usr/share/novnc-pve"); | |
225 | config.add_alias("extjs", "/usr/share/javascript/extjs"); | |
226 | config.add_alias("qrcodejs", "/usr/share/javascript/qrcodejs"); | |
227 | config.add_alias("fontawesome", "/usr/share/fonts-font-awesome"); | |
228 | config.add_alias("xtermjs", "/usr/share/pve-xtermjs"); | |
229 | config.add_alias("locale", "/usr/share/pbs-i18n"); | |
230 | config.add_alias("widgettoolkit", "/usr/share/javascript/proxmox-widget-toolkit"); | |
231 | config.add_alias("docs", "/usr/share/doc/proxmox-backup/html"); | |
232 | ||
233 | let mut indexpath = PathBuf::from(pbs_buildcfg::JS_DIR); | |
234 | indexpath.push("index.hbs"); | |
235 | config.register_template("index", &indexpath)?; | |
236 | config.register_template("console", "/usr/share/pve-xtermjs/index.html.hbs")?; | |
237 | ||
238 | let backup_user = pbs_config::backup_user()?; | |
239 | let mut commando_sock = proxmox_rest_server::CommandSocket::new(proxmox_rest_server::our_ctrl_sock(), backup_user.gid); | |
240 | ||
241 | let dir_opts = CreateOptions::new().owner(backup_user.uid).group(backup_user.gid); | |
242 | let file_opts = CreateOptions::new().owner(backup_user.uid).group(backup_user.gid); | |
243 | ||
244 | config.enable_access_log( | |
245 | pbs_buildcfg::API_ACCESS_LOG_FN, | |
246 | Some(dir_opts.clone()), | |
247 | Some(file_opts.clone()), | |
248 | &mut commando_sock, | |
249 | )?; | |
250 | ||
251 | config.enable_auth_log( | |
252 | pbs_buildcfg::API_AUTH_LOG_FN, | |
253 | Some(dir_opts.clone()), | |
254 | Some(file_opts.clone()), | |
255 | &mut commando_sock, | |
256 | )?; | |
257 | ||
258 | let rest_server = RestServer::new(config); | |
259 | proxmox_rest_server::init_worker_tasks(pbs_buildcfg::PROXMOX_BACKUP_LOG_DIR_M!().into(), file_opts.clone())?; | |
260 | ||
261 | //openssl req -x509 -newkey rsa:4096 -keyout /etc/proxmox-backup/proxy.key -out /etc/proxmox-backup/proxy.pem -nodes | |
262 | ||
263 | // we build the initial acceptor here as we cannot start if this fails | |
264 | let acceptor = make_tls_acceptor()?; | |
265 | let acceptor = Arc::new(Mutex::new(acceptor)); | |
266 | ||
267 | // to renew the acceptor we just add a command-socket handler | |
268 | commando_sock.register_command( | |
269 | "reload-certificate".to_string(), | |
270 | { | |
271 | let acceptor = Arc::clone(&acceptor); | |
272 | move |_value| -> Result<_, Error> { | |
273 | log::info!("reloading certificate"); | |
274 | match make_tls_acceptor() { | |
275 | Err(err) => log::error!("error reloading certificate: {}", err), | |
276 | Ok(new_acceptor) => { | |
277 | let mut guard = acceptor.lock().unwrap(); | |
278 | *guard = new_acceptor; | |
279 | } | |
280 | } | |
281 | Ok(Value::Null) | |
282 | } | |
283 | }, | |
284 | )?; | |
285 | ||
286 | // to remove references for not configured datastores | |
287 | commando_sock.register_command( | |
288 | "datastore-removed".to_string(), | |
289 | |_value| { | |
290 | if let Err(err) = DataStore::remove_unused_datastores() { | |
291 | log::error!("could not refresh datastores: {}", err); | |
292 | } | |
293 | Ok(Value::Null) | |
294 | } | |
295 | )?; | |
296 | ||
297 | let server = daemon::create_daemon( | |
298 | ([0,0,0,0,0,0,0,0], 8007).into(), | |
299 | move |listener| { | |
300 | ||
301 | let connections = accept_connections(listener, acceptor, debug); | |
302 | let connections = hyper::server::accept::from_stream(ReceiverStream::new(connections)); | |
303 | ||
304 | Ok(async { | |
305 | daemon::systemd_notify(daemon::SystemdNotify::Ready)?; | |
306 | ||
307 | hyper::Server::builder(connections) | |
308 | .serve(rest_server) | |
309 | .with_graceful_shutdown(proxmox_rest_server::shutdown_future()) | |
310 | .map_err(Error::from) | |
311 | .await | |
312 | }) | |
313 | }, | |
314 | ); | |
315 | ||
316 | proxmox_rest_server::write_pid(pbs_buildcfg::PROXMOX_BACKUP_PROXY_PID_FN)?; | |
317 | ||
318 | let init_result: Result<(), Error> = try_block!({ | |
319 | proxmox_rest_server::register_task_control_commands(&mut commando_sock)?; | |
320 | commando_sock.spawn()?; | |
321 | proxmox_rest_server::catch_shutdown_signal()?; | |
322 | proxmox_rest_server::catch_reload_signal()?; | |
323 | Ok(()) | |
324 | }); | |
325 | ||
326 | if let Err(err) = init_result { | |
327 | bail!("unable to start daemon - {}", err); | |
328 | } | |
329 | ||
330 | start_task_scheduler(); | |
331 | start_stat_generator(); | |
332 | ||
333 | server.await?; | |
334 | log::info!("server shutting down, waiting for active workers to complete"); | |
335 | proxmox_rest_server::last_worker_future().await?; | |
336 | log::info!("done - exit server"); | |
337 | ||
338 | Ok(()) | |
339 | } | |
340 | ||
341 | fn make_tls_acceptor() -> Result<SslAcceptor, Error> { | |
342 | let key_path = configdir!("/proxy.key"); | |
343 | let cert_path = configdir!("/proxy.pem"); | |
344 | ||
345 | let mut acceptor = SslAcceptor::mozilla_intermediate_v5(SslMethod::tls()).unwrap(); | |
346 | acceptor.set_private_key_file(key_path, SslFiletype::PEM) | |
347 | .map_err(|err| format_err!("unable to read proxy key {} - {}", key_path, err))?; | |
348 | acceptor.set_certificate_chain_file(cert_path) | |
349 | .map_err(|err| format_err!("unable to read proxy cert {} - {}", cert_path, err))?; | |
350 | acceptor.check_private_key().unwrap(); | |
351 | ||
352 | Ok(acceptor.build()) | |
353 | } | |
354 | ||
355 | type ClientStreamResult = | |
356 | Result<std::pin::Pin<Box<tokio_openssl::SslStream<RateLimitedStream<tokio::net::TcpStream>>>>, Error>; | |
357 | const MAX_PENDING_ACCEPTS: usize = 1024; | |
358 | ||
359 | fn accept_connections( | |
360 | listener: tokio::net::TcpListener, | |
361 | acceptor: Arc<Mutex<openssl::ssl::SslAcceptor>>, | |
362 | debug: bool, | |
363 | ) -> tokio::sync::mpsc::Receiver<ClientStreamResult> { | |
364 | ||
365 | let (sender, receiver) = tokio::sync::mpsc::channel(MAX_PENDING_ACCEPTS); | |
366 | ||
367 | tokio::spawn(accept_connection(listener, acceptor, debug, sender)); | |
368 | ||
369 | receiver | |
370 | } | |
371 | ||
372 | async fn accept_connection( | |
373 | listener: tokio::net::TcpListener, | |
374 | acceptor: Arc<Mutex<openssl::ssl::SslAcceptor>>, | |
375 | debug: bool, | |
376 | sender: tokio::sync::mpsc::Sender<ClientStreamResult>, | |
377 | ) { | |
378 | let accept_counter = Arc::new(()); | |
379 | ||
380 | loop { | |
381 | let (sock, _addr) = match listener.accept().await { | |
382 | Ok(conn) => conn, | |
383 | Err(err) => { | |
384 | eprintln!("error accepting tcp connection: {}", err); | |
385 | continue; | |
386 | } | |
387 | }; | |
388 | ||
389 | sock.set_nodelay(true).unwrap(); | |
390 | let _ = set_tcp_keepalive(sock.as_raw_fd(), PROXMOX_BACKUP_TCP_KEEPALIVE_TIME); | |
391 | ||
392 | let peer = sock.peer_addr().ok(); | |
393 | let sock = RateLimitedStream::with_limiter_update_cb(sock, move || lookup_rate_limiter(peer)); | |
394 | ||
395 | let ssl = { // limit acceptor_guard scope | |
396 | // Acceptor can be reloaded using the command socket "reload-certificate" command | |
397 | let acceptor_guard = acceptor.lock().unwrap(); | |
398 | ||
399 | match openssl::ssl::Ssl::new(acceptor_guard.context()) { | |
400 | Ok(ssl) => ssl, | |
401 | Err(err) => { | |
402 | eprintln!("failed to create Ssl object from Acceptor context - {}", err); | |
403 | continue; | |
404 | }, | |
405 | } | |
406 | }; | |
407 | ||
408 | let stream = match tokio_openssl::SslStream::new(ssl, sock) { | |
409 | Ok(stream) => stream, | |
410 | Err(err) => { | |
411 | eprintln!("failed to create SslStream using ssl and connection socket - {}", err); | |
412 | continue; | |
413 | }, | |
414 | }; | |
415 | ||
416 | let mut stream = Box::pin(stream); | |
417 | let sender = sender.clone(); | |
418 | ||
419 | if Arc::strong_count(&accept_counter) > MAX_PENDING_ACCEPTS { | |
420 | eprintln!("connection rejected - to many open connections"); | |
421 | continue; | |
422 | } | |
423 | ||
424 | let accept_counter = Arc::clone(&accept_counter); | |
425 | tokio::spawn(async move { | |
426 | let accept_future = tokio::time::timeout( | |
427 | Duration::new(10, 0), stream.as_mut().accept()); | |
428 | ||
429 | let result = accept_future.await; | |
430 | ||
431 | match result { | |
432 | Ok(Ok(())) => { | |
433 | if sender.send(Ok(stream)).await.is_err() && debug { | |
434 | eprintln!("detect closed connection channel"); | |
435 | } | |
436 | } | |
437 | Ok(Err(err)) => { | |
438 | if debug { | |
439 | eprintln!("https handshake failed - {}", err); | |
440 | } | |
441 | } | |
442 | Err(_) => { | |
443 | if debug { | |
444 | eprintln!("https handshake timeout"); | |
445 | } | |
446 | } | |
447 | } | |
448 | ||
449 | drop(accept_counter); // decrease reference count | |
450 | }); | |
451 | } | |
452 | } | |
453 | ||
454 | fn start_stat_generator() { | |
455 | let abort_future = proxmox_rest_server::shutdown_future(); | |
456 | let future = Box::pin(run_stat_generator()); | |
457 | let task = futures::future::select(future, abort_future); | |
458 | tokio::spawn(task.map(|_| ())); | |
459 | } | |
460 | ||
461 | fn start_task_scheduler() { | |
462 | let abort_future = proxmox_rest_server::shutdown_future(); | |
463 | let future = Box::pin(run_task_scheduler()); | |
464 | let task = futures::future::select(future, abort_future); | |
465 | tokio::spawn(task.map(|_| ())); | |
466 | } | |
467 | ||
468 | use std::time::{SystemTime, Instant, Duration, UNIX_EPOCH}; | |
469 | ||
470 | fn next_minute() -> Result<Instant, Error> { | |
471 | let now = SystemTime::now(); | |
472 | let epoch_now = now.duration_since(UNIX_EPOCH)?; | |
473 | let epoch_next = Duration::from_secs((epoch_now.as_secs()/60 + 1)*60); | |
474 | Ok(Instant::now() + epoch_next - epoch_now) | |
475 | } | |
476 | ||
477 | async fn run_task_scheduler() { | |
478 | ||
479 | let mut count: usize = 0; | |
480 | ||
481 | loop { | |
482 | count += 1; | |
483 | ||
484 | let delay_target = match next_minute() { // try to run very minute | |
485 | Ok(d) => d, | |
486 | Err(err) => { | |
487 | eprintln!("task scheduler: compute next minute failed - {}", err); | |
488 | tokio::time::sleep_until(tokio::time::Instant::from_std(Instant::now() + Duration::from_secs(60))).await; | |
489 | continue; | |
490 | } | |
491 | }; | |
492 | ||
493 | if count > 2 { // wait 1..2 minutes before starting | |
494 | match schedule_tasks().catch_unwind().await { | |
495 | Err(panic) => { | |
496 | match panic.downcast::<&str>() { | |
497 | Ok(msg) => { | |
498 | eprintln!("task scheduler panic: {}", msg); | |
499 | } | |
500 | Err(_) => { | |
501 | eprintln!("task scheduler panic - unknown type"); | |
502 | } | |
503 | } | |
504 | } | |
505 | Ok(Err(err)) => { | |
506 | eprintln!("task scheduler failed - {:?}", err); | |
507 | } | |
508 | Ok(Ok(_)) => {} | |
509 | } | |
510 | } | |
511 | ||
512 | tokio::time::sleep_until(tokio::time::Instant::from_std(delay_target)).await; | |
513 | } | |
514 | } | |
515 | ||
516 | async fn schedule_tasks() -> Result<(), Error> { | |
517 | ||
518 | schedule_datastore_garbage_collection().await; | |
519 | schedule_datastore_prune().await; | |
520 | schedule_datastore_sync_jobs().await; | |
521 | schedule_datastore_verify_jobs().await; | |
522 | schedule_tape_backup_jobs().await; | |
523 | schedule_task_log_rotate().await; | |
524 | ||
525 | Ok(()) | |
526 | } | |
527 | ||
528 | async fn schedule_datastore_garbage_collection() { | |
529 | ||
530 | let config = match pbs_config::datastore::config() { | |
531 | Err(err) => { | |
532 | eprintln!("unable to read datastore config - {}", err); | |
533 | return; | |
534 | } | |
535 | Ok((config, _digest)) => config, | |
536 | }; | |
537 | ||
538 | for (store, (_, store_config)) in config.sections { | |
539 | let datastore = match DataStore::lookup_datastore(&store) { | |
540 | Ok(datastore) => datastore, | |
541 | Err(err) => { | |
542 | eprintln!("lookup_datastore failed - {}", err); | |
543 | continue; | |
544 | } | |
545 | }; | |
546 | ||
547 | let store_config: DataStoreConfig = match serde_json::from_value(store_config) { | |
548 | Ok(c) => c, | |
549 | Err(err) => { | |
550 | eprintln!("datastore config from_value failed - {}", err); | |
551 | continue; | |
552 | } | |
553 | }; | |
554 | ||
555 | let event_str = match store_config.gc_schedule { | |
556 | Some(event_str) => event_str, | |
557 | None => continue, | |
558 | }; | |
559 | ||
560 | let event = match parse_calendar_event(&event_str) { | |
561 | Ok(event) => event, | |
562 | Err(err) => { | |
563 | eprintln!("unable to parse schedule '{}' - {}", event_str, err); | |
564 | continue; | |
565 | } | |
566 | }; | |
567 | ||
568 | if datastore.garbage_collection_running() { continue; } | |
569 | ||
570 | let worker_type = "garbage_collection"; | |
571 | ||
572 | let last = match jobstate::last_run_time(worker_type, &store) { | |
573 | Ok(time) => time, | |
574 | Err(err) => { | |
575 | eprintln!("could not get last run time of {} {}: {}", worker_type, store, err); | |
576 | continue; | |
577 | } | |
578 | }; | |
579 | ||
580 | let next = match compute_next_event(&event, last, false) { | |
581 | Ok(Some(next)) => next, | |
582 | Ok(None) => continue, | |
583 | Err(err) => { | |
584 | eprintln!("compute_next_event for '{}' failed - {}", event_str, err); | |
585 | continue; | |
586 | } | |
587 | }; | |
588 | ||
589 | let now = proxmox_time::epoch_i64(); | |
590 | ||
591 | if next > now { continue; } | |
592 | ||
593 | let job = match Job::new(worker_type, &store) { | |
594 | Ok(job) => job, | |
595 | Err(_) => continue, // could not get lock | |
596 | }; | |
597 | ||
598 | let auth_id = Authid::root_auth_id(); | |
599 | ||
600 | if let Err(err) = crate::server::do_garbage_collection_job(job, datastore, auth_id, Some(event_str), false) { | |
601 | eprintln!("unable to start garbage collection job on datastore {} - {}", store, err); | |
602 | } | |
603 | } | |
604 | } | |
605 | ||
606 | async fn schedule_datastore_prune() { | |
607 | ||
608 | let config = match pbs_config::datastore::config() { | |
609 | Err(err) => { | |
610 | eprintln!("unable to read datastore config - {}", err); | |
611 | return; | |
612 | } | |
613 | Ok((config, _digest)) => config, | |
614 | }; | |
615 | ||
616 | for (store, (_, store_config)) in config.sections { | |
617 | ||
618 | let store_config: DataStoreConfig = match serde_json::from_value(store_config) { | |
619 | Ok(c) => c, | |
620 | Err(err) => { | |
621 | eprintln!("datastore '{}' config from_value failed - {}", store, err); | |
622 | continue; | |
623 | } | |
624 | }; | |
625 | ||
626 | let event_str = match store_config.prune_schedule { | |
627 | Some(event_str) => event_str, | |
628 | None => continue, | |
629 | }; | |
630 | ||
631 | let prune_options = PruneOptions { | |
632 | keep_last: store_config.keep_last, | |
633 | keep_hourly: store_config.keep_hourly, | |
634 | keep_daily: store_config.keep_daily, | |
635 | keep_weekly: store_config.keep_weekly, | |
636 | keep_monthly: store_config.keep_monthly, | |
637 | keep_yearly: store_config.keep_yearly, | |
638 | }; | |
639 | ||
640 | if !pbs_datastore::prune::keeps_something(&prune_options) { // no prune settings - keep all | |
641 | continue; | |
642 | } | |
643 | ||
644 | let worker_type = "prune"; | |
645 | if check_schedule(worker_type, &event_str, &store) { | |
646 | let job = match Job::new(worker_type, &store) { | |
647 | Ok(job) => job, | |
648 | Err(_) => continue, // could not get lock | |
649 | }; | |
650 | ||
651 | let auth_id = Authid::root_auth_id().clone(); | |
652 | if let Err(err) = do_prune_job(job, prune_options, store.clone(), &auth_id, Some(event_str)) { | |
653 | eprintln!("unable to start datastore prune job {} - {}", &store, err); | |
654 | } | |
655 | }; | |
656 | } | |
657 | } | |
658 | ||
659 | async fn schedule_datastore_sync_jobs() { | |
660 | ||
661 | ||
662 | let config = match pbs_config::sync::config() { | |
663 | Err(err) => { | |
664 | eprintln!("unable to read sync job config - {}", err); | |
665 | return; | |
666 | } | |
667 | Ok((config, _digest)) => config, | |
668 | }; | |
669 | ||
670 | for (job_id, (_, job_config)) in config.sections { | |
671 | let job_config: SyncJobConfig = match serde_json::from_value(job_config) { | |
672 | Ok(c) => c, | |
673 | Err(err) => { | |
674 | eprintln!("sync job config from_value failed - {}", err); | |
675 | continue; | |
676 | } | |
677 | }; | |
678 | ||
679 | let event_str = match job_config.schedule { | |
680 | Some(ref event_str) => event_str.clone(), | |
681 | None => continue, | |
682 | }; | |
683 | ||
684 | let worker_type = "syncjob"; | |
685 | if check_schedule(worker_type, &event_str, &job_id) { | |
686 | let job = match Job::new(worker_type, &job_id) { | |
687 | Ok(job) => job, | |
688 | Err(_) => continue, // could not get lock | |
689 | }; | |
690 | ||
691 | let auth_id = Authid::root_auth_id().clone(); | |
692 | if let Err(err) = do_sync_job(job, job_config, &auth_id, Some(event_str), false) { | |
693 | eprintln!("unable to start datastore sync job {} - {}", &job_id, err); | |
694 | } | |
695 | }; | |
696 | } | |
697 | } | |
698 | ||
699 | async fn schedule_datastore_verify_jobs() { | |
700 | ||
701 | let config = match pbs_config::verify::config() { | |
702 | Err(err) => { | |
703 | eprintln!("unable to read verification job config - {}", err); | |
704 | return; | |
705 | } | |
706 | Ok((config, _digest)) => config, | |
707 | }; | |
708 | for (job_id, (_, job_config)) in config.sections { | |
709 | let job_config: VerificationJobConfig = match serde_json::from_value(job_config) { | |
710 | Ok(c) => c, | |
711 | Err(err) => { | |
712 | eprintln!("verification job config from_value failed - {}", err); | |
713 | continue; | |
714 | } | |
715 | }; | |
716 | let event_str = match job_config.schedule { | |
717 | Some(ref event_str) => event_str.clone(), | |
718 | None => continue, | |
719 | }; | |
720 | ||
721 | let worker_type = "verificationjob"; | |
722 | let auth_id = Authid::root_auth_id().clone(); | |
723 | if check_schedule(worker_type, &event_str, &job_id) { | |
724 | let job = match Job::new(&worker_type, &job_id) { | |
725 | Ok(job) => job, | |
726 | Err(_) => continue, // could not get lock | |
727 | }; | |
728 | if let Err(err) = do_verification_job(job, job_config, &auth_id, Some(event_str), false) { | |
729 | eprintln!("unable to start datastore verification job {} - {}", &job_id, err); | |
730 | } | |
731 | }; | |
732 | } | |
733 | } | |
734 | ||
735 | async fn schedule_tape_backup_jobs() { | |
736 | ||
737 | let config = match pbs_config::tape_job::config() { | |
738 | Err(err) => { | |
739 | eprintln!("unable to read tape job config - {}", err); | |
740 | return; | |
741 | } | |
742 | Ok((config, _digest)) => config, | |
743 | }; | |
744 | for (job_id, (_, job_config)) in config.sections { | |
745 | let job_config: TapeBackupJobConfig = match serde_json::from_value(job_config) { | |
746 | Ok(c) => c, | |
747 | Err(err) => { | |
748 | eprintln!("tape backup job config from_value failed - {}", err); | |
749 | continue; | |
750 | } | |
751 | }; | |
752 | let event_str = match job_config.schedule { | |
753 | Some(ref event_str) => event_str.clone(), | |
754 | None => continue, | |
755 | }; | |
756 | ||
757 | let worker_type = "tape-backup-job"; | |
758 | let auth_id = Authid::root_auth_id().clone(); | |
759 | if check_schedule(worker_type, &event_str, &job_id) { | |
760 | let job = match Job::new(&worker_type, &job_id) { | |
761 | Ok(job) => job, | |
762 | Err(_) => continue, // could not get lock | |
763 | }; | |
764 | if let Err(err) = do_tape_backup_job(job, job_config.setup, &auth_id, Some(event_str), false) { | |
765 | eprintln!("unable to start tape backup job {} - {}", &job_id, err); | |
766 | } | |
767 | }; | |
768 | } | |
769 | } | |
770 | ||
771 | ||
772 | async fn schedule_task_log_rotate() { | |
773 | ||
774 | let worker_type = "logrotate"; | |
775 | let job_id = "access-log_and_task-archive"; | |
776 | ||
777 | // schedule daily at 00:00 like normal logrotate | |
778 | let schedule = "00:00"; | |
779 | ||
780 | if !check_schedule(worker_type, schedule, job_id) { | |
781 | // if we never ran the rotation, schedule instantly | |
782 | match jobstate::JobState::load(worker_type, job_id) { | |
783 | Ok(state) => match state { | |
784 | jobstate::JobState::Created { .. } => {}, | |
785 | _ => return, | |
786 | }, | |
787 | _ => return, | |
788 | } | |
789 | } | |
790 | ||
791 | let mut job = match Job::new(worker_type, job_id) { | |
792 | Ok(job) => job, | |
793 | Err(_) => return, // could not get lock | |
794 | }; | |
795 | ||
796 | if let Err(err) = WorkerTask::new_thread( | |
797 | worker_type, | |
798 | None, | |
799 | Authid::root_auth_id().to_string(), | |
800 | false, | |
801 | move |worker| { | |
802 | job.start(&worker.upid().to_string())?; | |
803 | task_log!(worker, "starting task log rotation"); | |
804 | ||
805 | let result = try_block!({ | |
806 | let max_size = 512 * 1024 - 1; // an entry has ~ 100b, so > 5000 entries/file | |
807 | let max_files = 20; // times twenty files gives > 100000 task entries | |
808 | let has_rotated = rotate_task_log_archive(max_size, true, Some(max_files))?; | |
809 | if has_rotated { | |
810 | task_log!(worker, "task log archive was rotated"); | |
811 | } else { | |
812 | task_log!(worker, "task log archive was not rotated"); | |
813 | } | |
814 | ||
815 | let max_size = 32 * 1024 * 1024 - 1; | |
816 | let max_files = 14; | |
817 | let mut logrotate = LogRotate::new(pbs_buildcfg::API_ACCESS_LOG_FN, true) | |
818 | .ok_or_else(|| format_err!("could not get API access log file names"))?; | |
819 | ||
820 | if logrotate.rotate(max_size, None, Some(max_files))? { | |
821 | println!("rotated access log, telling daemons to re-open log file"); | |
822 | pbs_runtime::block_on(command_reopen_access_logfiles())?; | |
823 | task_log!(worker, "API access log was rotated"); | |
824 | } else { | |
825 | task_log!(worker, "API access log was not rotated"); | |
826 | } | |
827 | ||
828 | let mut logrotate = LogRotate::new(pbs_buildcfg::API_AUTH_LOG_FN, true) | |
829 | .ok_or_else(|| format_err!("could not get API auth log file names"))?; | |
830 | ||
831 | if logrotate.rotate(max_size, None, Some(max_files))? { | |
832 | println!("rotated auth log, telling daemons to re-open log file"); | |
833 | pbs_runtime::block_on(command_reopen_auth_logfiles())?; | |
834 | task_log!(worker, "API authentication log was rotated"); | |
835 | } else { | |
836 | task_log!(worker, "API authentication log was not rotated"); | |
837 | } | |
838 | ||
839 | if has_rotated { | |
840 | task_log!(worker, "cleaning up old task logs"); | |
841 | if let Err(err) = cleanup_old_tasks(true) { | |
842 | task_warn!(worker, "could not completely cleanup old tasks: {}", err); | |
843 | } | |
844 | } | |
845 | ||
846 | Ok(()) | |
847 | }); | |
848 | ||
849 | let status = worker.create_state(&result); | |
850 | ||
851 | if let Err(err) = job.finish(status) { | |
852 | eprintln!("could not finish job state for {}: {}", worker_type, err); | |
853 | } | |
854 | ||
855 | result | |
856 | }, | |
857 | ) { | |
858 | eprintln!("unable to start task log rotation: {}", err); | |
859 | } | |
860 | ||
861 | } | |
862 | ||
863 | async fn command_reopen_access_logfiles() -> Result<(), Error> { | |
864 | // only care about the most recent daemon instance for each, proxy & api, as other older ones | |
865 | // should not respond to new requests anyway, but only finish their current one and then exit. | |
866 | let sock = proxmox_rest_server::our_ctrl_sock(); | |
867 | let f1 = proxmox_rest_server::send_raw_command(sock, "{\"command\":\"api-access-log-reopen\"}\n"); | |
868 | ||
869 | let pid = proxmox_rest_server::read_pid(pbs_buildcfg::PROXMOX_BACKUP_API_PID_FN)?; | |
870 | let sock = proxmox_rest_server::ctrl_sock_from_pid(pid); | |
871 | let f2 = proxmox_rest_server::send_raw_command(sock, "{\"command\":\"api-access-log-reopen\"}\n"); | |
872 | ||
873 | match futures::join!(f1, f2) { | |
874 | (Err(e1), Err(e2)) => Err(format_err!("reopen commands failed, proxy: {}; api: {}", e1, e2)), | |
875 | (Err(e1), Ok(_)) => Err(format_err!("reopen commands failed, proxy: {}", e1)), | |
876 | (Ok(_), Err(e2)) => Err(format_err!("reopen commands failed, api: {}", e2)), | |
877 | _ => Ok(()), | |
878 | } | |
879 | } | |
880 | ||
881 | async fn command_reopen_auth_logfiles() -> Result<(), Error> { | |
882 | // only care about the most recent daemon instance for each, proxy & api, as other older ones | |
883 | // should not respond to new requests anyway, but only finish their current one and then exit. | |
884 | let sock = proxmox_rest_server::our_ctrl_sock(); | |
885 | let f1 = proxmox_rest_server::send_raw_command(sock, "{\"command\":\"api-auth-log-reopen\"}\n"); | |
886 | ||
887 | let pid = proxmox_rest_server::read_pid(pbs_buildcfg::PROXMOX_BACKUP_API_PID_FN)?; | |
888 | let sock = proxmox_rest_server::ctrl_sock_from_pid(pid); | |
889 | let f2 = proxmox_rest_server::send_raw_command(sock, "{\"command\":\"api-auth-log-reopen\"}\n"); | |
890 | ||
891 | match futures::join!(f1, f2) { | |
892 | (Err(e1), Err(e2)) => Err(format_err!("reopen commands failed, proxy: {}; api: {}", e1, e2)), | |
893 | (Err(e1), Ok(_)) => Err(format_err!("reopen commands failed, proxy: {}", e1)), | |
894 | (Ok(_), Err(e2)) => Err(format_err!("reopen commands failed, api: {}", e2)), | |
895 | _ => Ok(()), | |
896 | } | |
897 | } | |
898 | ||
899 | async fn run_stat_generator() { | |
900 | ||
901 | loop { | |
902 | let delay_target = Instant::now() + Duration::from_secs(10); | |
903 | ||
904 | generate_host_stats().await; | |
905 | ||
906 | rrd_sync_journal(); | |
907 | ||
908 | tokio::time::sleep_until(tokio::time::Instant::from_std(delay_target)).await; | |
909 | ||
910 | } | |
911 | ||
912 | } | |
913 | ||
914 | async fn generate_host_stats() { | |
915 | match tokio::task::spawn_blocking(generate_host_stats_sync).await { | |
916 | Ok(()) => (), | |
917 | Err(err) => log::error!("generate_host_stats paniced: {}", err), | |
918 | } | |
919 | } | |
920 | ||
921 | fn generate_host_stats_sync() { | |
922 | use proxmox::sys::linux::procfs::{ | |
923 | read_meminfo, read_proc_stat, read_proc_net_dev, read_loadavg}; | |
924 | ||
925 | match read_proc_stat() { | |
926 | Ok(stat) => { | |
927 | rrd_update_gauge("host/cpu", stat.cpu); | |
928 | rrd_update_gauge("host/iowait", stat.iowait_percent); | |
929 | } | |
930 | Err(err) => { | |
931 | eprintln!("read_proc_stat failed - {}", err); | |
932 | } | |
933 | } | |
934 | ||
935 | match read_meminfo() { | |
936 | Ok(meminfo) => { | |
937 | rrd_update_gauge("host/memtotal", meminfo.memtotal as f64); | |
938 | rrd_update_gauge("host/memused", meminfo.memused as f64); | |
939 | rrd_update_gauge("host/swaptotal", meminfo.swaptotal as f64); | |
940 | rrd_update_gauge("host/swapused", meminfo.swapused as f64); | |
941 | } | |
942 | Err(err) => { | |
943 | eprintln!("read_meminfo failed - {}", err); | |
944 | } | |
945 | } | |
946 | ||
947 | match read_proc_net_dev() { | |
948 | Ok(netdev) => { | |
949 | use pbs_config::network::is_physical_nic; | |
950 | let mut netin = 0; | |
951 | let mut netout = 0; | |
952 | for item in netdev { | |
953 | if !is_physical_nic(&item.device) { continue; } | |
954 | netin += item.receive; | |
955 | netout += item.send; | |
956 | } | |
957 | rrd_update_derive("host/netin", netin as f64); | |
958 | rrd_update_derive("host/netout", netout as f64); | |
959 | } | |
960 | Err(err) => { | |
961 | eprintln!("read_prox_net_dev failed - {}", err); | |
962 | } | |
963 | } | |
964 | ||
965 | match read_loadavg() { | |
966 | Ok(loadavg) => { | |
967 | rrd_update_gauge("host/loadavg", loadavg.0 as f64); | |
968 | } | |
969 | Err(err) => { | |
970 | eprintln!("read_loadavg failed - {}", err); | |
971 | } | |
972 | } | |
973 | ||
974 | let disk_manager = DiskManage::new(); | |
975 | ||
976 | gather_disk_stats(disk_manager.clone(), Path::new("/"), "host"); | |
977 | ||
978 | match pbs_config::datastore::config() { | |
979 | Ok((config, _)) => { | |
980 | let datastore_list: Vec<DataStoreConfig> = | |
981 | config.convert_to_typed_array("datastore").unwrap_or_default(); | |
982 | ||
983 | for config in datastore_list { | |
984 | ||
985 | let rrd_prefix = format!("datastore/{}", config.name); | |
986 | let path = std::path::Path::new(&config.path); | |
987 | gather_disk_stats(disk_manager.clone(), path, &rrd_prefix); | |
988 | } | |
989 | } | |
990 | Err(err) => { | |
991 | eprintln!("read datastore config failed - {}", err); | |
992 | } | |
993 | } | |
994 | } | |
995 | ||
996 | fn check_schedule(worker_type: &str, event_str: &str, id: &str) -> bool { | |
997 | let event = match parse_calendar_event(event_str) { | |
998 | Ok(event) => event, | |
999 | Err(err) => { | |
1000 | eprintln!("unable to parse schedule '{}' - {}", event_str, err); | |
1001 | return false; | |
1002 | } | |
1003 | }; | |
1004 | ||
1005 | let last = match jobstate::last_run_time(worker_type, &id) { | |
1006 | Ok(time) => time, | |
1007 | Err(err) => { | |
1008 | eprintln!("could not get last run time of {} {}: {}", worker_type, id, err); | |
1009 | return false; | |
1010 | } | |
1011 | }; | |
1012 | ||
1013 | let next = match compute_next_event(&event, last, false) { | |
1014 | Ok(Some(next)) => next, | |
1015 | Ok(None) => return false, | |
1016 | Err(err) => { | |
1017 | eprintln!("compute_next_event for '{}' failed - {}", event_str, err); | |
1018 | return false; | |
1019 | } | |
1020 | }; | |
1021 | ||
1022 | let now = proxmox_time::epoch_i64(); | |
1023 | next <= now | |
1024 | } | |
1025 | ||
1026 | fn gather_disk_stats(disk_manager: Arc<DiskManage>, path: &Path, rrd_prefix: &str) { | |
1027 | ||
1028 | match proxmox_backup::tools::disks::disk_usage(path) { | |
1029 | Ok(status) => { | |
1030 | let rrd_key = format!("{}/total", rrd_prefix); | |
1031 | rrd_update_gauge(&rrd_key, status.total as f64); | |
1032 | let rrd_key = format!("{}/used", rrd_prefix); | |
1033 | rrd_update_gauge(&rrd_key, status.used as f64); | |
1034 | } | |
1035 | Err(err) => { | |
1036 | eprintln!("read disk_usage on {:?} failed - {}", path, err); | |
1037 | } | |
1038 | } | |
1039 | ||
1040 | match disk_manager.find_mounted_device(path) { | |
1041 | Ok(None) => {}, | |
1042 | Ok(Some((fs_type, device, source))) => { | |
1043 | let mut device_stat = None; | |
1044 | match fs_type.as_str() { | |
1045 | "zfs" => { | |
1046 | if let Some(source) = source { | |
1047 | let pool = get_pool_from_dataset(&source).unwrap_or(&source); | |
1048 | match zfs_pool_stats(pool) { | |
1049 | Ok(stat) => device_stat = stat, | |
1050 | Err(err) => eprintln!("zfs_pool_stats({:?}) failed - {}", pool, err), | |
1051 | } | |
1052 | } | |
1053 | } | |
1054 | _ => { | |
1055 | if let Ok(disk) = disk_manager.clone().disk_by_dev_num(device.into_dev_t()) { | |
1056 | match disk.read_stat() { | |
1057 | Ok(stat) => device_stat = stat, | |
1058 | Err(err) => eprintln!("disk.read_stat {:?} failed - {}", path, err), | |
1059 | } | |
1060 | } | |
1061 | } | |
1062 | } | |
1063 | if let Some(stat) = device_stat { | |
1064 | let rrd_key = format!("{}/read_ios", rrd_prefix); | |
1065 | rrd_update_derive(&rrd_key, stat.read_ios as f64); | |
1066 | let rrd_key = format!("{}/read_bytes", rrd_prefix); | |
1067 | rrd_update_derive(&rrd_key, (stat.read_sectors*512) as f64); | |
1068 | ||
1069 | let rrd_key = format!("{}/write_ios", rrd_prefix); | |
1070 | rrd_update_derive(&rrd_key, stat.write_ios as f64); | |
1071 | let rrd_key = format!("{}/write_bytes", rrd_prefix); | |
1072 | rrd_update_derive(&rrd_key, (stat.write_sectors*512) as f64); | |
1073 | ||
1074 | let rrd_key = format!("{}/io_ticks", rrd_prefix); | |
1075 | rrd_update_derive(&rrd_key, (stat.io_ticks as f64)/1000.0); | |
1076 | } | |
1077 | } | |
1078 | Err(err) => { | |
1079 | eprintln!("find_mounted_device failed - {}", err); | |
1080 | } | |
1081 | } | |
1082 | } | |
1083 | ||
1084 | // Rate Limiter lookup | |
1085 | ||
1086 | // Test WITH | |
1087 | // proxmox-backup-client restore vm/201/2021-10-22T09:55:56Z drive-scsi0.img img1.img --repository localhost:store2 | |
1088 | ||
1089 | lazy_static::lazy_static!{ | |
1090 | static ref TRAFFIC_CONTROL_CACHE: Arc<Mutex<TrafficControlCache>> = | |
1091 | Arc::new(Mutex::new(TrafficControlCache::new())); | |
1092 | } | |
1093 | ||
1094 | fn lookup_rate_limiter( | |
1095 | peer: Option<std::net::SocketAddr>, | |
1096 | ) -> (Option<Arc<Mutex<RateLimiter>>>, Option<Arc<Mutex<RateLimiter>>>) { | |
1097 | let mut cache = TRAFFIC_CONTROL_CACHE.lock().unwrap(); | |
1098 | ||
1099 | let now = proxmox_time::epoch_i64(); | |
1100 | ||
1101 | cache.reload(now); | |
1102 | ||
1103 | let (_rule_name, read_limiter, write_limiter) = cache.lookup_rate_limiter(peer, now); | |
1104 | ||
1105 | (read_limiter, write_limiter) | |
1106 | } |