]>
Commit | Line | Data |
---|---|---|
53daae8e | 1 | //! Helpers to implement restartable daemons/services. |
dce94d0e WB |
2 | |
3 | use std::ffi::CString; | |
083ff3fd | 4 | use std::future::Future; |
c08fac4d | 5 | use std::io::{Read, Write}; |
3ddb1488 | 6 | use std::os::raw::{c_char, c_uchar, c_int}; |
620dccf1 | 7 | use std::os::unix::io::{AsRawFd, FromRawFd, IntoRawFd, RawFd}; |
dce94d0e WB |
8 | use std::os::unix::ffi::OsStrExt; |
9 | use std::panic::UnwindSafe; | |
083ff3fd WB |
10 | use std::pin::Pin; |
11 | use std::task::{Context, Poll}; | |
3ddb1488 | 12 | use std::path::PathBuf; |
dce94d0e | 13 | |
f7d4e4b5 | 14 | use anyhow::{bail, format_err, Error}; |
9e45e03a | 15 | use futures::future::{self, Either}; |
4422ba2c | 16 | |
ca3c3ce9 | 17 | use proxmox::tools::io::{ReadExt, WriteExt}; |
8bca935f | 18 | use proxmox::tools::fd::Fd; |
ca3c3ce9 | 19 | |
8bca935f | 20 | use crate::fd_change_cloexec; |
dce94d0e | 21 | |
3ddb1488 DM |
22 | #[link(name = "systemd")] |
23 | extern "C" { | |
24 | fn sd_journal_stream_fd(identifier: *const c_uchar, priority: c_int, level_prefix: c_int) -> c_int; | |
25 | } | |
26 | ||
dce94d0e | 27 | // Unfortunately FnBox is nightly-only and Box<FnOnce> is unusable, so just use Box<Fn>... |
620dccf1 | 28 | pub type BoxedStoreFunc = Box<dyn FnMut() -> Result<String, Error> + UnwindSafe + Send>; |
dce94d0e WB |
29 | |
30 | /// Helper trait to "store" something in the environment to be re-used after re-executing the | |
31 | /// service on a reload. | |
e4311382 | 32 | pub trait Reloadable: Sized { |
dce94d0e | 33 | fn restore(var: &str) -> Result<Self, Error>; |
620dccf1 | 34 | fn get_store_func(&self) -> Result<BoxedStoreFunc, Error>; |
dce94d0e WB |
35 | } |
36 | ||
37 | /// Manages things to be stored and reloaded upon reexec. | |
38 | /// Anything which should be restorable should be instantiated via this struct's `restore` method, | |
62ee2eb4 | 39 | #[derive(Default)] |
e4311382 | 40 | pub struct Reloader { |
dce94d0e | 41 | pre_exec: Vec<PreExecEntry>, |
3ddb1488 | 42 | self_exe: PathBuf, |
dce94d0e WB |
43 | } |
44 | ||
45 | // Currently we only need environment variables for storage, but in theory we could also add | |
46 | // variants which need temporary files or pipes... | |
47 | struct PreExecEntry { | |
48 | name: &'static str, // Feel free to change to String if necessary... | |
49 | store_fn: BoxedStoreFunc, | |
50 | } | |
51 | ||
e4311382 | 52 | impl Reloader { |
dc2ef2b5 WB |
53 | pub fn new() -> Result<Self, Error> { |
54 | Ok(Self { | |
dce94d0e | 55 | pre_exec: Vec::new(), |
dc2ef2b5 | 56 | |
3ddb1488 DM |
57 | // Get the path to our executable as PathBuf |
58 | self_exe: std::fs::read_link("/proc/self/exe")?, | |
dc2ef2b5 | 59 | }) |
dce94d0e WB |
60 | } |
61 | ||
62 | /// Restore an object from an environment variable of the given name, or, if none exists, uses | |
63 | /// the function provided in the `or_create` parameter to instantiate the new "first" instance. | |
64 | /// | |
65 | /// Values created via this method will be remembered for later re-execution. | |
083ff3fd | 66 | pub async fn restore<T, F, U>(&mut self, name: &'static str, or_create: F) -> Result<T, Error> |
dce94d0e | 67 | where |
e4311382 | 68 | T: Reloadable, |
083ff3fd WB |
69 | F: FnOnce() -> U, |
70 | U: Future<Output = Result<T, Error>>, | |
dce94d0e WB |
71 | { |
72 | let res = match std::env::var(name) { | |
73 | Ok(varstr) => T::restore(&varstr)?, | |
083ff3fd | 74 | Err(std::env::VarError::NotPresent) => or_create().await?, |
dce94d0e WB |
75 | Err(_) => bail!("variable {} has invalid value", name), |
76 | }; | |
77 | ||
78 | self.pre_exec.push(PreExecEntry { | |
79 | name, | |
620dccf1 | 80 | store_fn: res.get_store_func()?, |
dce94d0e WB |
81 | }); |
82 | Ok(res) | |
83 | } | |
84 | ||
85 | fn pre_exec(self) -> Result<(), Error> { | |
620dccf1 | 86 | for mut item in self.pre_exec { |
dce94d0e WB |
87 | std::env::set_var(item.name, (item.store_fn)()?); |
88 | } | |
89 | Ok(()) | |
90 | } | |
91 | ||
92 | pub fn fork_restart(self) -> Result<(), Error> { | |
dce94d0e WB |
93 | // Get our parameters as Vec<CString> |
94 | let args = std::env::args_os(); | |
95 | let mut new_args = Vec::with_capacity(args.len()); | |
96 | for arg in args { | |
97 | new_args.push(CString::new(arg.as_bytes())?); | |
98 | } | |
99 | ||
5e5eed5c | 100 | // Synchronisation pipe: |
c08fac4d | 101 | let (pold, pnew) = super::socketpair()?; |
5e5eed5c | 102 | |
dce94d0e WB |
103 | // Start ourselves in the background: |
104 | use nix::unistd::{fork, ForkResult}; | |
0c4c6a7b | 105 | match unsafe { fork() } { |
dce94d0e | 106 | Ok(ForkResult::Child) => { |
5e5eed5c | 107 | // Double fork so systemd can supervise us without nagging... |
0c4c6a7b | 108 | match unsafe { fork() } { |
5e5eed5c | 109 | Ok(ForkResult::Child) => { |
c08fac4d | 110 | std::mem::drop(pold); |
5e5eed5c WB |
111 | // At this point we call pre-exec helpers. We must be certain that if they fail for |
112 | // whatever reason we can still call `_exit()`, so use catch_unwind. | |
113 | match std::panic::catch_unwind(move || { | |
c08fac4d WB |
114 | let mut pnew = unsafe { |
115 | std::fs::File::from_raw_fd(pnew.into_raw_fd()) | |
5e5eed5c WB |
116 | }; |
117 | let pid = nix::unistd::Pid::this(); | |
c08fac4d | 118 | if let Err(e) = unsafe { pnew.write_host_value(pid.as_raw()) } { |
5e5eed5c WB |
119 | log::error!("failed to send new server PID to parent: {}", e); |
120 | unsafe { | |
121 | libc::_exit(-1); | |
122 | } | |
123 | } | |
c08fac4d WB |
124 | |
125 | let mut ok = [0u8]; | |
126 | if let Err(e) = pnew.read_exact(&mut ok) { | |
127 | log::error!("parent vanished before notifying systemd: {}", e); | |
128 | unsafe { | |
129 | libc::_exit(-1); | |
130 | } | |
131 | } | |
132 | assert_eq!(ok[0], 1, "reload handshake should have sent a 1 byte"); | |
133 | ||
134 | std::mem::drop(pnew); | |
3ddb1488 DM |
135 | |
136 | // Try to reopen STDOUT/STDERR journald streams to get correct PID in logs | |
137 | let ident = CString::new(self.self_exe.file_name().unwrap().as_bytes()).unwrap(); | |
138 | let ident = ident.as_bytes(); | |
139 | let fd = unsafe { sd_journal_stream_fd(ident.as_ptr(), libc::LOG_INFO, 1) }; | |
140 | if fd >= 0 && fd != 1 { | |
141 | let fd = proxmox::tools::fd::Fd(fd); // add drop handler | |
142 | nix::unistd::dup2(fd.as_raw_fd(), 1)?; | |
143 | } else { | |
144 | log::error!("failed to update STDOUT journal redirection ({})", fd); | |
145 | } | |
146 | let fd = unsafe { sd_journal_stream_fd(ident.as_ptr(), libc::LOG_ERR, 1) }; | |
147 | if fd >= 0 && fd != 2 { | |
148 | let fd = proxmox::tools::fd::Fd(fd); // add drop handler | |
149 | nix::unistd::dup2(fd.as_raw_fd(), 2)?; | |
150 | } else { | |
151 | log::error!("failed to update STDERR journal redirection ({})", fd); | |
152 | } | |
153 | ||
dc2ef2b5 | 154 | self.do_reexec(new_args) |
5e5eed5c WB |
155 | }) |
156 | { | |
dc2ef2b5 WB |
157 | Ok(Ok(())) => eprintln!("do_reexec returned!"), |
158 | Ok(Err(err)) => eprintln!("do_reexec failed: {}", err), | |
5e5eed5c WB |
159 | Err(_) => eprintln!("panic in re-exec"), |
160 | } | |
161 | } | |
162 | Ok(ForkResult::Parent { child }) => { | |
c08fac4d | 163 | std::mem::drop((pold, pnew)); |
5e5eed5c WB |
164 | log::debug!("forked off a new server (second pid: {})", child); |
165 | } | |
166 | Err(e) => log::error!("fork() failed, restart delayed: {}", e), | |
dce94d0e WB |
167 | } |
168 | // No matter how we managed to get here, this is the time where we bail out quickly: | |
169 | unsafe { | |
170 | libc::_exit(-1) | |
171 | } | |
172 | } | |
173 | Ok(ForkResult::Parent { child }) => { | |
5e5eed5c | 174 | log::debug!("forked off a new server (first pid: {}), waiting for 2nd pid", child); |
c08fac4d WB |
175 | std::mem::drop(pnew); |
176 | let mut pold = unsafe { | |
177 | std::fs::File::from_raw_fd(pold.into_raw_fd()) | |
5e5eed5c | 178 | }; |
c08fac4d | 179 | let child = nix::unistd::Pid::from_raw(match unsafe { pold.read_le_value() } { |
5e5eed5c WB |
180 | Ok(v) => v, |
181 | Err(e) => { | |
182 | log::error!("failed to receive pid of double-forked child process: {}", e); | |
183 | // systemd will complain but won't kill the service... | |
184 | return Ok(()); | |
185 | } | |
186 | }); | |
187 | ||
d98c9a7a WB |
188 | if let Err(e) = systemd_notify(SystemdNotify::MainPid(child)) { |
189 | log::error!("failed to notify systemd about the new main pid: {}", e); | |
190 | } | |
c08fac4d WB |
191 | |
192 | // notify child that it is now the new main process: | |
193 | if let Err(e) = pold.write_all(&[1u8]) { | |
194 | log::error!("child vanished during reload: {}", e); | |
195 | } | |
196 | ||
dce94d0e WB |
197 | Ok(()) |
198 | } | |
199 | Err(e) => { | |
5e5eed5c | 200 | log::error!("fork() failed, restart delayed: {}", e); |
dce94d0e WB |
201 | Ok(()) |
202 | } | |
203 | } | |
204 | } | |
205 | ||
dc2ef2b5 | 206 | fn do_reexec(self, args: Vec<CString>) -> Result<(), Error> { |
3ddb1488 | 207 | let exe = CString::new(self.self_exe.as_os_str().as_bytes())?; |
dce94d0e WB |
208 | self.pre_exec()?; |
209 | nix::unistd::setsid()?; | |
8bf4559b | 210 | let args: Vec<&std::ffi::CStr> = args.iter().map(|s| s.as_ref()).collect(); |
dce94d0e | 211 | nix::unistd::execvp(&exe, &args)?; |
dc2ef2b5 | 212 | panic!("exec misbehaved"); |
dce94d0e WB |
213 | } |
214 | } | |
4422ba2c | 215 | |
af70c181 | 216 | // For now all we need to do is store and reuse a tcp listening socket: |
e4311382 | 217 | impl Reloadable for tokio::net::TcpListener { |
af70c181 WB |
218 | // NOTE: The socket must not be closed when the store-function is called: |
219 | // FIXME: We could become "independent" of the TcpListener and its reference to the file | |
220 | // descriptor by `dup()`ing it (and check if the listener still exists via kcmp()?) | |
620dccf1 | 221 | fn get_store_func(&self) -> Result<BoxedStoreFunc, Error> { |
8bca935f | 222 | let mut fd_opt = Some(Fd( |
620dccf1 WB |
223 | nix::fcntl::fcntl(self.as_raw_fd(), nix::fcntl::FcntlArg::F_DUPFD_CLOEXEC(0))? |
224 | )); | |
225 | Ok(Box::new(move || { | |
226 | let fd = fd_opt.take().unwrap(); | |
227 | fd_change_cloexec(fd.as_raw_fd(), false)?; | |
228 | Ok(fd.into_raw_fd().to_string()) | |
229 | })) | |
af70c181 WB |
230 | } |
231 | ||
232 | fn restore(var: &str) -> Result<Self, Error> { | |
233 | let fd = var.parse::<u32>() | |
234 | .map_err(|e| format_err!("invalid file descriptor: {}", e))? | |
235 | as RawFd; | |
236 | fd_change_cloexec(fd, true)?; | |
237 | Ok(Self::from_std( | |
238 | unsafe { std::net::TcpListener::from_raw_fd(fd) }, | |
af70c181 WB |
239 | )?) |
240 | } | |
241 | } | |
a690ecac | 242 | |
083ff3fd WB |
243 | pub struct NotifyReady; |
244 | ||
245 | impl Future for NotifyReady { | |
246 | type Output = Result<(), Error>; | |
247 | ||
248 | fn poll(self: Pin<&mut Self>, _cx: &mut Context) -> Poll<Result<(), Error>> { | |
249 | systemd_notify(SystemdNotify::Ready)?; | |
250 | Poll::Ready(Ok(())) | |
251 | } | |
252 | } | |
253 | ||
a690ecac WB |
254 | /// This creates a future representing a daemon which reloads itself when receiving a SIGHUP. |
255 | /// If this is started regularly, a listening socket is created. In this case, the file descriptor | |
256 | /// number will be remembered in `PROXMOX_BACKUP_LISTEN_FD`. | |
257 | /// If the variable already exists, its contents will instead be used to restore the listening | |
258 | /// socket. The finished listening socket is then passed to the `create_service` function which | |
259 | /// can be used to setup the TLS and the HTTP daemon. | |
083ff3fd | 260 | pub async fn create_daemon<F, S>( |
a690ecac WB |
261 | address: std::net::SocketAddr, |
262 | create_service: F, | |
d7c6ad60 | 263 | service_name: &str, |
083ff3fd | 264 | ) -> Result<(), Error> |
a690ecac | 265 | where |
083ff3fd | 266 | F: FnOnce(tokio::net::TcpListener, NotifyReady) -> Result<S, Error>, |
9e45e03a | 267 | S: Future<Output = ()> + Unpin, |
a690ecac | 268 | { |
dc2ef2b5 | 269 | let mut reloader = Reloader::new()?; |
a690ecac WB |
270 | |
271 | let listener: tokio::net::TcpListener = reloader.restore( | |
272 | "PROXMOX_BACKUP_LISTEN_FD", | |
083ff3fd WB |
273 | move || async move { Ok(tokio::net::TcpListener::bind(&address).await?) }, |
274 | ).await?; | |
a690ecac | 275 | |
9e45e03a | 276 | let server_future = create_service(listener, NotifyReady)?; |
8bca935f | 277 | let shutdown_future = crate::shutdown_future(); |
9e45e03a DC |
278 | |
279 | let finish_future = match future::select(server_future, shutdown_future).await { | |
280 | Either::Left((_, _)) => { | |
8bca935f | 281 | crate::request_shutdown(); // make sure we are in shutdown mode |
9e45e03a DC |
282 | None |
283 | } | |
284 | Either::Right((_, server_future)) => Some(server_future), | |
285 | }; | |
a690ecac | 286 | |
a690ecac WB |
287 | let mut reloader = Some(reloader); |
288 | ||
8bca935f | 289 | if crate::is_reload_request() { |
083ff3fd WB |
290 | log::info!("daemon reload..."); |
291 | if let Err(e) = systemd_notify(SystemdNotify::Reloading) { | |
292 | log::error!("failed to notify systemd about the state change: {}", e); | |
293 | } | |
0ec79339 | 294 | wait_service_is_state(service_name, "reloading").await?; |
083ff3fd WB |
295 | if let Err(e) = reloader.take().unwrap().fork_restart() { |
296 | log::error!("error during reload: {}", e); | |
62ee2eb4 | 297 | let _ = systemd_notify(SystemdNotify::Status("error during reload".to_string())); |
083ff3fd WB |
298 | } |
299 | } else { | |
300 | log::info!("daemon shutting down..."); | |
301 | } | |
9e45e03a DC |
302 | |
303 | if let Some(future) = finish_future { | |
304 | future.await; | |
305 | } | |
d7c6ad60 DC |
306 | |
307 | // FIXME: this is a hack, replace with sd_notify_barrier when available | |
8bca935f | 308 | if crate::is_reload_request() { |
0ec79339 | 309 | wait_service_is_not_state(service_name, "reloading").await?; |
d7c6ad60 DC |
310 | } |
311 | ||
9e45e03a | 312 | log::info!("daemon shut down..."); |
083ff3fd | 313 | Ok(()) |
a690ecac | 314 | } |
9c351a36 | 315 | |
06c9059d | 316 | // hack, do not use if unsure! |
0ec79339 DC |
317 | async fn get_service_state(service: &str) -> Result<String, Error> { |
318 | let text = match tokio::process::Command::new("systemctl") | |
319 | .args(&["is-active", service]) | |
320 | .output() | |
321 | .await | |
322 | { | |
323 | Ok(output) => match String::from_utf8(output.stdout) { | |
324 | Ok(text) => text, | |
325 | Err(err) => bail!("output of 'systemctl is-active' not valid UTF-8 - {}", err), | |
326 | }, | |
327 | Err(err) => bail!("executing 'systemctl is-active' failed - {}", err), | |
328 | }; | |
329 | ||
330 | Ok(text.trim().trim_start().to_string()) | |
331 | } | |
332 | ||
333 | async fn wait_service_is_state(service: &str, state: &str) -> Result<(), Error> { | |
0a8d773a | 334 | tokio::time::sleep(std::time::Duration::new(1, 0)).await; |
0ec79339 | 335 | while get_service_state(service).await? != state { |
0a8d773a | 336 | tokio::time::sleep(std::time::Duration::new(5, 0)).await; |
0ec79339 DC |
337 | } |
338 | Ok(()) | |
339 | } | |
06c9059d | 340 | |
0ec79339 | 341 | async fn wait_service_is_not_state(service: &str, state: &str) -> Result<(), Error> { |
0a8d773a | 342 | tokio::time::sleep(std::time::Duration::new(1, 0)).await; |
0ec79339 | 343 | while get_service_state(service).await? == state { |
0a8d773a | 344 | tokio::time::sleep(std::time::Duration::new(5, 0)).await; |
d7c6ad60 | 345 | } |
0ec79339 | 346 | Ok(()) |
d7c6ad60 DC |
347 | } |
348 | ||
9c351a36 WB |
349 | #[link(name = "systemd")] |
350 | extern "C" { | |
351 | fn sd_notify(unset_environment: c_int, state: *const c_char) -> c_int; | |
352 | } | |
353 | ||
53daae8e | 354 | /// Systemd sercice startup states (see: ``man sd_notify``) |
9c351a36 WB |
355 | pub enum SystemdNotify { |
356 | Ready, | |
357 | Reloading, | |
358 | Stopping, | |
359 | Status(String), | |
360 | MainPid(nix::unistd::Pid), | |
361 | } | |
362 | ||
53daae8e | 363 | /// Tells systemd the startup state of the service (see: ``man sd_notify``) |
9c351a36 WB |
364 | pub fn systemd_notify(state: SystemdNotify) -> Result<(), Error> { |
365 | let message = match state { | |
366 | SystemdNotify::Ready => CString::new("READY=1"), | |
367 | SystemdNotify::Reloading => CString::new("RELOADING=1"), | |
368 | SystemdNotify::Stopping => CString::new("STOPPING=1"), | |
369 | SystemdNotify::Status(msg) => CString::new(format!("STATUS={}", msg)), | |
370 | SystemdNotify::MainPid(pid) => CString::new(format!("MAINPID={}", pid)), | |
371 | }?; | |
372 | let rc = unsafe { sd_notify(0, message.as_ptr()) }; | |
373 | if rc < 0 { | |
374 | bail!( | |
375 | "systemd_notify failed: {}", | |
376 | std::io::Error::from_raw_os_error(-rc), | |
377 | ); | |
378 | } | |
379 | Ok(()) | |
380 | } |