]> git.proxmox.com Git - proxmox-backup.git/blame - src/tools/daemon.rs
move ApiConfig, FileLogger and CommandoSocket to proxmox-rest-server workspace
[proxmox-backup.git] / src / tools / daemon.rs
CommitLineData
dce94d0e
WB
1//! Helpers for daemons/services.
2
3use std::ffi::CString;
083ff3fd 4use std::future::Future;
c08fac4d 5use std::io::{Read, Write};
3ddb1488 6use std::os::raw::{c_char, c_uchar, c_int};
620dccf1 7use std::os::unix::io::{AsRawFd, FromRawFd, IntoRawFd, RawFd};
dce94d0e
WB
8use std::os::unix::ffi::OsStrExt;
9use std::panic::UnwindSafe;
083ff3fd
WB
10use std::pin::Pin;
11use std::task::{Context, Poll};
3ddb1488 12use std::path::PathBuf;
dce94d0e 13
f7d4e4b5 14use anyhow::{bail, format_err, Error};
9e45e03a 15use futures::future::{self, Either};
4422ba2c 16
ca3c3ce9
WB
17use proxmox::tools::io::{ReadExt, WriteExt};
18
620dccf1 19use crate::tools::{fd_change_cloexec, self};
dce94d0e 20
3ddb1488
DM
21#[link(name = "systemd")]
22extern "C" {
23 fn sd_journal_stream_fd(identifier: *const c_uchar, priority: c_int, level_prefix: c_int) -> c_int;
24}
25
dce94d0e 26// Unfortunately FnBox is nightly-only and Box<FnOnce> is unusable, so just use Box<Fn>...
620dccf1 27pub type BoxedStoreFunc = Box<dyn FnMut() -> Result<String, Error> + UnwindSafe + Send>;
dce94d0e
WB
28
29/// Helper trait to "store" something in the environment to be re-used after re-executing the
30/// service on a reload.
e4311382 31pub trait Reloadable: Sized {
dce94d0e 32 fn restore(var: &str) -> Result<Self, Error>;
620dccf1 33 fn get_store_func(&self) -> Result<BoxedStoreFunc, Error>;
dce94d0e
WB
34}
35
36/// Manages things to be stored and reloaded upon reexec.
37/// Anything which should be restorable should be instantiated via this struct's `restore` method,
62ee2eb4 38#[derive(Default)]
e4311382 39pub struct Reloader {
dce94d0e 40 pre_exec: Vec<PreExecEntry>,
3ddb1488 41 self_exe: PathBuf,
dce94d0e
WB
42}
43
44// Currently we only need environment variables for storage, but in theory we could also add
45// variants which need temporary files or pipes...
46struct PreExecEntry {
47 name: &'static str, // Feel free to change to String if necessary...
48 store_fn: BoxedStoreFunc,
49}
50
e4311382 51impl Reloader {
dc2ef2b5
WB
52 pub fn new() -> Result<Self, Error> {
53 Ok(Self {
dce94d0e 54 pre_exec: Vec::new(),
dc2ef2b5 55
3ddb1488
DM
56 // Get the path to our executable as PathBuf
57 self_exe: std::fs::read_link("/proc/self/exe")?,
dc2ef2b5 58 })
dce94d0e
WB
59 }
60
61 /// Restore an object from an environment variable of the given name, or, if none exists, uses
62 /// the function provided in the `or_create` parameter to instantiate the new "first" instance.
63 ///
64 /// Values created via this method will be remembered for later re-execution.
083ff3fd 65 pub async fn restore<T, F, U>(&mut self, name: &'static str, or_create: F) -> Result<T, Error>
dce94d0e 66 where
e4311382 67 T: Reloadable,
083ff3fd
WB
68 F: FnOnce() -> U,
69 U: Future<Output = Result<T, Error>>,
dce94d0e
WB
70 {
71 let res = match std::env::var(name) {
72 Ok(varstr) => T::restore(&varstr)?,
083ff3fd 73 Err(std::env::VarError::NotPresent) => or_create().await?,
dce94d0e
WB
74 Err(_) => bail!("variable {} has invalid value", name),
75 };
76
77 self.pre_exec.push(PreExecEntry {
78 name,
620dccf1 79 store_fn: res.get_store_func()?,
dce94d0e
WB
80 });
81 Ok(res)
82 }
83
84 fn pre_exec(self) -> Result<(), Error> {
620dccf1 85 for mut item in self.pre_exec {
dce94d0e
WB
86 std::env::set_var(item.name, (item.store_fn)()?);
87 }
88 Ok(())
89 }
90
91 pub fn fork_restart(self) -> Result<(), Error> {
dce94d0e
WB
92 // Get our parameters as Vec<CString>
93 let args = std::env::args_os();
94 let mut new_args = Vec::with_capacity(args.len());
95 for arg in args {
96 new_args.push(CString::new(arg.as_bytes())?);
97 }
98
5e5eed5c 99 // Synchronisation pipe:
c08fac4d 100 let (pold, pnew) = super::socketpair()?;
5e5eed5c 101
dce94d0e
WB
102 // Start ourselves in the background:
103 use nix::unistd::{fork, ForkResult};
0c4c6a7b 104 match unsafe { fork() } {
dce94d0e 105 Ok(ForkResult::Child) => {
5e5eed5c 106 // Double fork so systemd can supervise us without nagging...
0c4c6a7b 107 match unsafe { fork() } {
5e5eed5c 108 Ok(ForkResult::Child) => {
c08fac4d 109 std::mem::drop(pold);
5e5eed5c
WB
110 // At this point we call pre-exec helpers. We must be certain that if they fail for
111 // whatever reason we can still call `_exit()`, so use catch_unwind.
112 match std::panic::catch_unwind(move || {
c08fac4d
WB
113 let mut pnew = unsafe {
114 std::fs::File::from_raw_fd(pnew.into_raw_fd())
5e5eed5c
WB
115 };
116 let pid = nix::unistd::Pid::this();
c08fac4d 117 if let Err(e) = unsafe { pnew.write_host_value(pid.as_raw()) } {
5e5eed5c
WB
118 log::error!("failed to send new server PID to parent: {}", e);
119 unsafe {
120 libc::_exit(-1);
121 }
122 }
c08fac4d
WB
123
124 let mut ok = [0u8];
125 if let Err(e) = pnew.read_exact(&mut ok) {
126 log::error!("parent vanished before notifying systemd: {}", e);
127 unsafe {
128 libc::_exit(-1);
129 }
130 }
131 assert_eq!(ok[0], 1, "reload handshake should have sent a 1 byte");
132
133 std::mem::drop(pnew);
3ddb1488
DM
134
135 // Try to reopen STDOUT/STDERR journald streams to get correct PID in logs
136 let ident = CString::new(self.self_exe.file_name().unwrap().as_bytes()).unwrap();
137 let ident = ident.as_bytes();
138 let fd = unsafe { sd_journal_stream_fd(ident.as_ptr(), libc::LOG_INFO, 1) };
139 if fd >= 0 && fd != 1 {
140 let fd = proxmox::tools::fd::Fd(fd); // add drop handler
141 nix::unistd::dup2(fd.as_raw_fd(), 1)?;
142 } else {
143 log::error!("failed to update STDOUT journal redirection ({})", fd);
144 }
145 let fd = unsafe { sd_journal_stream_fd(ident.as_ptr(), libc::LOG_ERR, 1) };
146 if fd >= 0 && fd != 2 {
147 let fd = proxmox::tools::fd::Fd(fd); // add drop handler
148 nix::unistd::dup2(fd.as_raw_fd(), 2)?;
149 } else {
150 log::error!("failed to update STDERR journal redirection ({})", fd);
151 }
152
dc2ef2b5 153 self.do_reexec(new_args)
5e5eed5c
WB
154 })
155 {
dc2ef2b5
WB
156 Ok(Ok(())) => eprintln!("do_reexec returned!"),
157 Ok(Err(err)) => eprintln!("do_reexec failed: {}", err),
5e5eed5c
WB
158 Err(_) => eprintln!("panic in re-exec"),
159 }
160 }
161 Ok(ForkResult::Parent { child }) => {
c08fac4d 162 std::mem::drop((pold, pnew));
5e5eed5c
WB
163 log::debug!("forked off a new server (second pid: {})", child);
164 }
165 Err(e) => log::error!("fork() failed, restart delayed: {}", e),
dce94d0e
WB
166 }
167 // No matter how we managed to get here, this is the time where we bail out quickly:
168 unsafe {
169 libc::_exit(-1)
170 }
171 }
172 Ok(ForkResult::Parent { child }) => {
5e5eed5c 173 log::debug!("forked off a new server (first pid: {}), waiting for 2nd pid", child);
c08fac4d
WB
174 std::mem::drop(pnew);
175 let mut pold = unsafe {
176 std::fs::File::from_raw_fd(pold.into_raw_fd())
5e5eed5c 177 };
c08fac4d 178 let child = nix::unistd::Pid::from_raw(match unsafe { pold.read_le_value() } {
5e5eed5c
WB
179 Ok(v) => v,
180 Err(e) => {
181 log::error!("failed to receive pid of double-forked child process: {}", e);
182 // systemd will complain but won't kill the service...
183 return Ok(());
184 }
185 });
186
d98c9a7a
WB
187 if let Err(e) = systemd_notify(SystemdNotify::MainPid(child)) {
188 log::error!("failed to notify systemd about the new main pid: {}", e);
189 }
c08fac4d
WB
190
191 // notify child that it is now the new main process:
192 if let Err(e) = pold.write_all(&[1u8]) {
193 log::error!("child vanished during reload: {}", e);
194 }
195
dce94d0e
WB
196 Ok(())
197 }
198 Err(e) => {
5e5eed5c 199 log::error!("fork() failed, restart delayed: {}", e);
dce94d0e
WB
200 Ok(())
201 }
202 }
203 }
204
dc2ef2b5 205 fn do_reexec(self, args: Vec<CString>) -> Result<(), Error> {
3ddb1488 206 let exe = CString::new(self.self_exe.as_os_str().as_bytes())?;
dce94d0e
WB
207 self.pre_exec()?;
208 nix::unistd::setsid()?;
8bf4559b 209 let args: Vec<&std::ffi::CStr> = args.iter().map(|s| s.as_ref()).collect();
dce94d0e 210 nix::unistd::execvp(&exe, &args)?;
dc2ef2b5 211 panic!("exec misbehaved");
dce94d0e
WB
212 }
213}
4422ba2c 214
af70c181 215// For now all we need to do is store and reuse a tcp listening socket:
e4311382 216impl Reloadable for tokio::net::TcpListener {
af70c181
WB
217 // NOTE: The socket must not be closed when the store-function is called:
218 // FIXME: We could become "independent" of the TcpListener and its reference to the file
219 // descriptor by `dup()`ing it (and check if the listener still exists via kcmp()?)
620dccf1
WB
220 fn get_store_func(&self) -> Result<BoxedStoreFunc, Error> {
221 let mut fd_opt = Some(tools::Fd(
222 nix::fcntl::fcntl(self.as_raw_fd(), nix::fcntl::FcntlArg::F_DUPFD_CLOEXEC(0))?
223 ));
224 Ok(Box::new(move || {
225 let fd = fd_opt.take().unwrap();
226 fd_change_cloexec(fd.as_raw_fd(), false)?;
227 Ok(fd.into_raw_fd().to_string())
228 }))
af70c181
WB
229 }
230
231 fn restore(var: &str) -> Result<Self, Error> {
232 let fd = var.parse::<u32>()
233 .map_err(|e| format_err!("invalid file descriptor: {}", e))?
234 as RawFd;
235 fd_change_cloexec(fd, true)?;
236 Ok(Self::from_std(
237 unsafe { std::net::TcpListener::from_raw_fd(fd) },
af70c181
WB
238 )?)
239 }
240}
a690ecac 241
083ff3fd
WB
242pub struct NotifyReady;
243
244impl Future for NotifyReady {
245 type Output = Result<(), Error>;
246
247 fn poll(self: Pin<&mut Self>, _cx: &mut Context) -> Poll<Result<(), Error>> {
248 systemd_notify(SystemdNotify::Ready)?;
249 Poll::Ready(Ok(()))
250 }
251}
252
a690ecac
WB
253/// This creates a future representing a daemon which reloads itself when receiving a SIGHUP.
254/// If this is started regularly, a listening socket is created. In this case, the file descriptor
255/// number will be remembered in `PROXMOX_BACKUP_LISTEN_FD`.
256/// If the variable already exists, its contents will instead be used to restore the listening
257/// socket. The finished listening socket is then passed to the `create_service` function which
258/// can be used to setup the TLS and the HTTP daemon.
083ff3fd 259pub async fn create_daemon<F, S>(
a690ecac
WB
260 address: std::net::SocketAddr,
261 create_service: F,
d7c6ad60 262 service_name: &str,
083ff3fd 263) -> Result<(), Error>
a690ecac 264where
083ff3fd 265 F: FnOnce(tokio::net::TcpListener, NotifyReady) -> Result<S, Error>,
9e45e03a 266 S: Future<Output = ()> + Unpin,
a690ecac 267{
dc2ef2b5 268 let mut reloader = Reloader::new()?;
a690ecac
WB
269
270 let listener: tokio::net::TcpListener = reloader.restore(
271 "PROXMOX_BACKUP_LISTEN_FD",
083ff3fd
WB
272 move || async move { Ok(tokio::net::TcpListener::bind(&address).await?) },
273 ).await?;
a690ecac 274
9e45e03a 275 let server_future = create_service(listener, NotifyReady)?;
fd6d2438 276 let shutdown_future = proxmox_rest_server::shutdown_future();
9e45e03a
DC
277
278 let finish_future = match future::select(server_future, shutdown_future).await {
279 Either::Left((_, _)) => {
fd6d2438 280 proxmox_rest_server::request_shutdown(); // make sure we are in shutdown mode
9e45e03a
DC
281 None
282 }
283 Either::Right((_, server_future)) => Some(server_future),
284 };
a690ecac 285
a690ecac
WB
286 let mut reloader = Some(reloader);
287
fd6d2438 288 if proxmox_rest_server::is_reload_request() {
083ff3fd
WB
289 log::info!("daemon reload...");
290 if let Err(e) = systemd_notify(SystemdNotify::Reloading) {
291 log::error!("failed to notify systemd about the state change: {}", e);
292 }
0ec79339 293 wait_service_is_state(service_name, "reloading").await?;
083ff3fd
WB
294 if let Err(e) = reloader.take().unwrap().fork_restart() {
295 log::error!("error during reload: {}", e);
62ee2eb4 296 let _ = systemd_notify(SystemdNotify::Status("error during reload".to_string()));
083ff3fd
WB
297 }
298 } else {
299 log::info!("daemon shutting down...");
300 }
9e45e03a
DC
301
302 if let Some(future) = finish_future {
303 future.await;
304 }
d7c6ad60
DC
305
306 // FIXME: this is a hack, replace with sd_notify_barrier when available
fd6d2438 307 if proxmox_rest_server::is_reload_request() {
0ec79339 308 wait_service_is_not_state(service_name, "reloading").await?;
d7c6ad60
DC
309 }
310
9e45e03a 311 log::info!("daemon shut down...");
083ff3fd 312 Ok(())
a690ecac 313}
9c351a36 314
06c9059d 315// hack, do not use if unsure!
0ec79339
DC
316async fn get_service_state(service: &str) -> Result<String, Error> {
317 let text = match tokio::process::Command::new("systemctl")
318 .args(&["is-active", service])
319 .output()
320 .await
321 {
322 Ok(output) => match String::from_utf8(output.stdout) {
323 Ok(text) => text,
324 Err(err) => bail!("output of 'systemctl is-active' not valid UTF-8 - {}", err),
325 },
326 Err(err) => bail!("executing 'systemctl is-active' failed - {}", err),
327 };
328
329 Ok(text.trim().trim_start().to_string())
330}
331
332async fn wait_service_is_state(service: &str, state: &str) -> Result<(), Error> {
0a8d773a 333 tokio::time::sleep(std::time::Duration::new(1, 0)).await;
0ec79339 334 while get_service_state(service).await? != state {
0a8d773a 335 tokio::time::sleep(std::time::Duration::new(5, 0)).await;
0ec79339
DC
336 }
337 Ok(())
338}
06c9059d 339
0ec79339 340async fn wait_service_is_not_state(service: &str, state: &str) -> Result<(), Error> {
0a8d773a 341 tokio::time::sleep(std::time::Duration::new(1, 0)).await;
0ec79339 342 while get_service_state(service).await? == state {
0a8d773a 343 tokio::time::sleep(std::time::Duration::new(5, 0)).await;
d7c6ad60 344 }
0ec79339 345 Ok(())
d7c6ad60
DC
346}
347
9c351a36
WB
348#[link(name = "systemd")]
349extern "C" {
350 fn sd_notify(unset_environment: c_int, state: *const c_char) -> c_int;
351}
352
353pub enum SystemdNotify {
354 Ready,
355 Reloading,
356 Stopping,
357 Status(String),
358 MainPid(nix::unistd::Pid),
359}
360
361pub fn systemd_notify(state: SystemdNotify) -> Result<(), Error> {
362 let message = match state {
363 SystemdNotify::Ready => CString::new("READY=1"),
364 SystemdNotify::Reloading => CString::new("RELOADING=1"),
365 SystemdNotify::Stopping => CString::new("STOPPING=1"),
366 SystemdNotify::Status(msg) => CString::new(format!("STATUS={}", msg)),
367 SystemdNotify::MainPid(pid) => CString::new(format!("MAINPID={}", pid)),
368 }?;
369 let rc = unsafe { sd_notify(0, message.as_ptr()) };
370 if rc < 0 {
371 bail!(
372 "systemd_notify failed: {}",
373 std::io::Error::from_raw_os_error(-rc),
374 );
375 }
376 Ok(())
377}