]>
Commit | Line | Data |
---|---|---|
53daae8e | 1 | //! Helpers to implement restartable daemons/services. |
dce94d0e WB |
2 | |
3 | use std::ffi::CString; | |
083ff3fd | 4 | use std::future::Future; |
c08fac4d | 5 | use std::io::{Read, Write}; |
3ddb1488 | 6 | use std::os::raw::{c_char, c_uchar, c_int}; |
620dccf1 | 7 | use std::os::unix::io::{AsRawFd, FromRawFd, IntoRawFd, RawFd}; |
dce94d0e WB |
8 | use std::os::unix::ffi::OsStrExt; |
9 | use std::panic::UnwindSafe; | |
3ddb1488 | 10 | use std::path::PathBuf; |
dce94d0e | 11 | |
f7d4e4b5 | 12 | use anyhow::{bail, format_err, Error}; |
9e45e03a | 13 | use futures::future::{self, Either}; |
e64f77b7 | 14 | use nix::unistd::{fork, ForkResult}; |
4422ba2c | 15 | |
8bca935f | 16 | use proxmox::tools::fd::Fd; |
6ef1b649 | 17 | use proxmox_io::{ReadExt, WriteExt}; |
ca3c3ce9 | 18 | |
8735247f | 19 | use pbs_tools::fd::fd_change_cloexec; |
dce94d0e WB |
20 | |
21 | // Unfortunately FnBox is nightly-only and Box<FnOnce> is unusable, so just use Box<Fn>... | |
8cf445ec | 22 | type BoxedStoreFunc = Box<dyn FnMut() -> Result<String, Error> + UnwindSafe + Send>; |
dce94d0e | 23 | |
e64f77b7 DM |
24 | // Helper trait to "store" something in the environment to be re-used after re-executing the |
25 | // service on a reload. | |
11148dce | 26 | trait Reloadable: Sized { |
dce94d0e | 27 | fn restore(var: &str) -> Result<Self, Error>; |
620dccf1 | 28 | fn get_store_func(&self) -> Result<BoxedStoreFunc, Error>; |
dce94d0e WB |
29 | } |
30 | ||
11148dce DM |
31 | // Manages things to be stored and reloaded upon reexec. |
32 | // Anything which should be restorable should be instantiated via this struct's `restore` method, | |
62ee2eb4 | 33 | #[derive(Default)] |
11148dce | 34 | struct Reloader { |
dce94d0e | 35 | pre_exec: Vec<PreExecEntry>, |
3ddb1488 | 36 | self_exe: PathBuf, |
dce94d0e WB |
37 | } |
38 | ||
39 | // Currently we only need environment variables for storage, but in theory we could also add | |
40 | // variants which need temporary files or pipes... | |
41 | struct PreExecEntry { | |
42 | name: &'static str, // Feel free to change to String if necessary... | |
43 | store_fn: BoxedStoreFunc, | |
44 | } | |
45 | ||
e4311382 | 46 | impl Reloader { |
dc2ef2b5 WB |
47 | pub fn new() -> Result<Self, Error> { |
48 | Ok(Self { | |
dce94d0e | 49 | pre_exec: Vec::new(), |
dc2ef2b5 | 50 | |
3ddb1488 DM |
51 | // Get the path to our executable as PathBuf |
52 | self_exe: std::fs::read_link("/proc/self/exe")?, | |
dc2ef2b5 | 53 | }) |
dce94d0e WB |
54 | } |
55 | ||
56 | /// Restore an object from an environment variable of the given name, or, if none exists, uses | |
57 | /// the function provided in the `or_create` parameter to instantiate the new "first" instance. | |
58 | /// | |
59 | /// Values created via this method will be remembered for later re-execution. | |
083ff3fd | 60 | pub async fn restore<T, F, U>(&mut self, name: &'static str, or_create: F) -> Result<T, Error> |
dce94d0e | 61 | where |
e4311382 | 62 | T: Reloadable, |
083ff3fd WB |
63 | F: FnOnce() -> U, |
64 | U: Future<Output = Result<T, Error>>, | |
dce94d0e WB |
65 | { |
66 | let res = match std::env::var(name) { | |
67 | Ok(varstr) => T::restore(&varstr)?, | |
083ff3fd | 68 | Err(std::env::VarError::NotPresent) => or_create().await?, |
dce94d0e WB |
69 | Err(_) => bail!("variable {} has invalid value", name), |
70 | }; | |
71 | ||
72 | self.pre_exec.push(PreExecEntry { | |
73 | name, | |
620dccf1 | 74 | store_fn: res.get_store_func()?, |
dce94d0e WB |
75 | }); |
76 | Ok(res) | |
77 | } | |
78 | ||
79 | fn pre_exec(self) -> Result<(), Error> { | |
620dccf1 | 80 | for mut item in self.pre_exec { |
dce94d0e WB |
81 | std::env::set_var(item.name, (item.store_fn)()?); |
82 | } | |
83 | Ok(()) | |
84 | } | |
85 | ||
86 | pub fn fork_restart(self) -> Result<(), Error> { | |
dce94d0e WB |
87 | // Get our parameters as Vec<CString> |
88 | let args = std::env::args_os(); | |
89 | let mut new_args = Vec::with_capacity(args.len()); | |
90 | for arg in args { | |
91 | new_args.push(CString::new(arg.as_bytes())?); | |
92 | } | |
93 | ||
5e5eed5c | 94 | // Synchronisation pipe: |
c08fac4d | 95 | let (pold, pnew) = super::socketpair()?; |
5e5eed5c | 96 | |
dce94d0e | 97 | // Start ourselves in the background: |
0c4c6a7b | 98 | match unsafe { fork() } { |
dce94d0e | 99 | Ok(ForkResult::Child) => { |
5e5eed5c | 100 | // Double fork so systemd can supervise us without nagging... |
0c4c6a7b | 101 | match unsafe { fork() } { |
5e5eed5c | 102 | Ok(ForkResult::Child) => { |
c08fac4d | 103 | std::mem::drop(pold); |
5e5eed5c WB |
104 | // At this point we call pre-exec helpers. We must be certain that if they fail for |
105 | // whatever reason we can still call `_exit()`, so use catch_unwind. | |
106 | match std::panic::catch_unwind(move || { | |
c08fac4d WB |
107 | let mut pnew = unsafe { |
108 | std::fs::File::from_raw_fd(pnew.into_raw_fd()) | |
5e5eed5c WB |
109 | }; |
110 | let pid = nix::unistd::Pid::this(); | |
c08fac4d | 111 | if let Err(e) = unsafe { pnew.write_host_value(pid.as_raw()) } { |
5e5eed5c WB |
112 | log::error!("failed to send new server PID to parent: {}", e); |
113 | unsafe { | |
114 | libc::_exit(-1); | |
115 | } | |
116 | } | |
c08fac4d WB |
117 | |
118 | let mut ok = [0u8]; | |
119 | if let Err(e) = pnew.read_exact(&mut ok) { | |
120 | log::error!("parent vanished before notifying systemd: {}", e); | |
121 | unsafe { | |
122 | libc::_exit(-1); | |
123 | } | |
124 | } | |
125 | assert_eq!(ok[0], 1, "reload handshake should have sent a 1 byte"); | |
126 | ||
127 | std::mem::drop(pnew); | |
3ddb1488 DM |
128 | |
129 | // Try to reopen STDOUT/STDERR journald streams to get correct PID in logs | |
130 | let ident = CString::new(self.self_exe.file_name().unwrap().as_bytes()).unwrap(); | |
131 | let ident = ident.as_bytes(); | |
132 | let fd = unsafe { sd_journal_stream_fd(ident.as_ptr(), libc::LOG_INFO, 1) }; | |
133 | if fd >= 0 && fd != 1 { | |
134 | let fd = proxmox::tools::fd::Fd(fd); // add drop handler | |
135 | nix::unistd::dup2(fd.as_raw_fd(), 1)?; | |
136 | } else { | |
137 | log::error!("failed to update STDOUT journal redirection ({})", fd); | |
138 | } | |
139 | let fd = unsafe { sd_journal_stream_fd(ident.as_ptr(), libc::LOG_ERR, 1) }; | |
140 | if fd >= 0 && fd != 2 { | |
141 | let fd = proxmox::tools::fd::Fd(fd); // add drop handler | |
142 | nix::unistd::dup2(fd.as_raw_fd(), 2)?; | |
143 | } else { | |
144 | log::error!("failed to update STDERR journal redirection ({})", fd); | |
145 | } | |
146 | ||
dc2ef2b5 | 147 | self.do_reexec(new_args) |
5e5eed5c WB |
148 | }) |
149 | { | |
38da8ca1 DM |
150 | Ok(Ok(())) => log::error!("do_reexec returned!"), |
151 | Ok(Err(err)) => log::error!("do_reexec failed: {}", err), | |
152 | Err(_) => log::error!("panic in re-exec"), | |
5e5eed5c WB |
153 | } |
154 | } | |
155 | Ok(ForkResult::Parent { child }) => { | |
c08fac4d | 156 | std::mem::drop((pold, pnew)); |
5e5eed5c WB |
157 | log::debug!("forked off a new server (second pid: {})", child); |
158 | } | |
159 | Err(e) => log::error!("fork() failed, restart delayed: {}", e), | |
dce94d0e WB |
160 | } |
161 | // No matter how we managed to get here, this is the time where we bail out quickly: | |
162 | unsafe { | |
163 | libc::_exit(-1) | |
164 | } | |
165 | } | |
166 | Ok(ForkResult::Parent { child }) => { | |
5e5eed5c | 167 | log::debug!("forked off a new server (first pid: {}), waiting for 2nd pid", child); |
c08fac4d WB |
168 | std::mem::drop(pnew); |
169 | let mut pold = unsafe { | |
170 | std::fs::File::from_raw_fd(pold.into_raw_fd()) | |
5e5eed5c | 171 | }; |
c08fac4d | 172 | let child = nix::unistd::Pid::from_raw(match unsafe { pold.read_le_value() } { |
5e5eed5c WB |
173 | Ok(v) => v, |
174 | Err(e) => { | |
175 | log::error!("failed to receive pid of double-forked child process: {}", e); | |
176 | // systemd will complain but won't kill the service... | |
177 | return Ok(()); | |
178 | } | |
179 | }); | |
180 | ||
d98c9a7a WB |
181 | if let Err(e) = systemd_notify(SystemdNotify::MainPid(child)) { |
182 | log::error!("failed to notify systemd about the new main pid: {}", e); | |
183 | } | |
9a1330c7 TL |
184 | // ensure systemd got the message about the new main PID before continuing, else it |
185 | // will get confused if the new main process sends its READY signal before that | |
058b4b97 | 186 | if let Err(e) = systemd_notify_barrier(u64::MAX) { |
0a6df209 DC |
187 | log::error!("failed to wait on systemd-processing: {}", e); |
188 | } | |
c08fac4d WB |
189 | |
190 | // notify child that it is now the new main process: | |
191 | if let Err(e) = pold.write_all(&[1u8]) { | |
192 | log::error!("child vanished during reload: {}", e); | |
193 | } | |
194 | ||
dce94d0e WB |
195 | Ok(()) |
196 | } | |
197 | Err(e) => { | |
5e5eed5c | 198 | log::error!("fork() failed, restart delayed: {}", e); |
dce94d0e WB |
199 | Ok(()) |
200 | } | |
201 | } | |
202 | } | |
203 | ||
dc2ef2b5 | 204 | fn do_reexec(self, args: Vec<CString>) -> Result<(), Error> { |
3ddb1488 | 205 | let exe = CString::new(self.self_exe.as_os_str().as_bytes())?; |
dce94d0e WB |
206 | self.pre_exec()?; |
207 | nix::unistd::setsid()?; | |
8bf4559b | 208 | let args: Vec<&std::ffi::CStr> = args.iter().map(|s| s.as_ref()).collect(); |
dce94d0e | 209 | nix::unistd::execvp(&exe, &args)?; |
dc2ef2b5 | 210 | panic!("exec misbehaved"); |
dce94d0e WB |
211 | } |
212 | } | |
4422ba2c | 213 | |
af70c181 | 214 | // For now all we need to do is store and reuse a tcp listening socket: |
e4311382 | 215 | impl Reloadable for tokio::net::TcpListener { |
af70c181 WB |
216 | // NOTE: The socket must not be closed when the store-function is called: |
217 | // FIXME: We could become "independent" of the TcpListener and its reference to the file | |
218 | // descriptor by `dup()`ing it (and check if the listener still exists via kcmp()?) | |
620dccf1 | 219 | fn get_store_func(&self) -> Result<BoxedStoreFunc, Error> { |
8bca935f | 220 | let mut fd_opt = Some(Fd( |
620dccf1 WB |
221 | nix::fcntl::fcntl(self.as_raw_fd(), nix::fcntl::FcntlArg::F_DUPFD_CLOEXEC(0))? |
222 | )); | |
223 | Ok(Box::new(move || { | |
224 | let fd = fd_opt.take().unwrap(); | |
225 | fd_change_cloexec(fd.as_raw_fd(), false)?; | |
226 | Ok(fd.into_raw_fd().to_string()) | |
227 | })) | |
af70c181 WB |
228 | } |
229 | ||
230 | fn restore(var: &str) -> Result<Self, Error> { | |
231 | let fd = var.parse::<u32>() | |
232 | .map_err(|e| format_err!("invalid file descriptor: {}", e))? | |
233 | as RawFd; | |
234 | fd_change_cloexec(fd, true)?; | |
235 | Ok(Self::from_std( | |
236 | unsafe { std::net::TcpListener::from_raw_fd(fd) }, | |
af70c181 WB |
237 | )?) |
238 | } | |
239 | } | |
a690ecac WB |
240 | |
241 | /// This creates a future representing a daemon which reloads itself when receiving a SIGHUP. | |
242 | /// If this is started regularly, a listening socket is created. In this case, the file descriptor | |
243 | /// number will be remembered in `PROXMOX_BACKUP_LISTEN_FD`. | |
244 | /// If the variable already exists, its contents will instead be used to restore the listening | |
245 | /// socket. The finished listening socket is then passed to the `create_service` function which | |
d2654200 DM |
246 | /// can be used to setup the TLS and the HTTP daemon. The returned future has to call |
247 | /// [systemd_notify] with [SystemdNotify::Ready] when the service is ready. | |
083ff3fd | 248 | pub async fn create_daemon<F, S>( |
a690ecac WB |
249 | address: std::net::SocketAddr, |
250 | create_service: F, | |
083ff3fd | 251 | ) -> Result<(), Error> |
a690ecac | 252 | where |
d2654200 DM |
253 | F: FnOnce(tokio::net::TcpListener) -> Result<S, Error>, |
254 | S: Future<Output = Result<(), Error>>, | |
a690ecac | 255 | { |
dc2ef2b5 | 256 | let mut reloader = Reloader::new()?; |
a690ecac WB |
257 | |
258 | let listener: tokio::net::TcpListener = reloader.restore( | |
259 | "PROXMOX_BACKUP_LISTEN_FD", | |
083ff3fd WB |
260 | move || async move { Ok(tokio::net::TcpListener::bind(&address).await?) }, |
261 | ).await?; | |
a690ecac | 262 | |
d2654200 DM |
263 | let service = create_service(listener)?; |
264 | ||
265 | let service = async move { | |
266 | if let Err(err) = service.await { | |
267 | log::error!("server error: {}", err); | |
268 | } | |
269 | }; | |
270 | ||
271 | let server_future = Box::pin(service); | |
8bca935f | 272 | let shutdown_future = crate::shutdown_future(); |
9e45e03a DC |
273 | |
274 | let finish_future = match future::select(server_future, shutdown_future).await { | |
275 | Either::Left((_, _)) => { | |
a0ffd4a4 DM |
276 | if !crate::shutdown_requested() { |
277 | crate::request_shutdown(); // make sure we are in shutdown mode | |
278 | } | |
9e45e03a DC |
279 | None |
280 | } | |
281 | Either::Right((_, server_future)) => Some(server_future), | |
282 | }; | |
a690ecac | 283 | |
a690ecac WB |
284 | let mut reloader = Some(reloader); |
285 | ||
8bca935f | 286 | if crate::is_reload_request() { |
083ff3fd WB |
287 | log::info!("daemon reload..."); |
288 | if let Err(e) = systemd_notify(SystemdNotify::Reloading) { | |
289 | log::error!("failed to notify systemd about the state change: {}", e); | |
290 | } | |
058b4b97 | 291 | if let Err(e) = systemd_notify_barrier(u64::MAX) { |
0a6df209 DC |
292 | log::error!("failed to wait on systemd-processing: {}", e); |
293 | } | |
294 | ||
083ff3fd WB |
295 | if let Err(e) = reloader.take().unwrap().fork_restart() { |
296 | log::error!("error during reload: {}", e); | |
62ee2eb4 | 297 | let _ = systemd_notify(SystemdNotify::Status("error during reload".to_string())); |
083ff3fd WB |
298 | } |
299 | } else { | |
300 | log::info!("daemon shutting down..."); | |
301 | } | |
9e45e03a DC |
302 | |
303 | if let Some(future) = finish_future { | |
304 | future.await; | |
305 | } | |
d7c6ad60 | 306 | |
38da8ca1 | 307 | log::info!("daemon shut down."); |
083ff3fd | 308 | Ok(()) |
a690ecac | 309 | } |
9c351a36 WB |
310 | |
311 | #[link(name = "systemd")] | |
312 | extern "C" { | |
e1c8c27f | 313 | fn sd_journal_stream_fd(identifier: *const c_uchar, priority: c_int, level_prefix: c_int) -> c_int; |
9c351a36 | 314 | fn sd_notify(unset_environment: c_int, state: *const c_char) -> c_int; |
0a6df209 | 315 | fn sd_notify_barrier(unset_environment: c_int, timeout: u64) -> c_int; |
9c351a36 WB |
316 | } |
317 | ||
53daae8e | 318 | /// Systemd sercice startup states (see: ``man sd_notify``) |
9c351a36 WB |
319 | pub enum SystemdNotify { |
320 | Ready, | |
321 | Reloading, | |
322 | Stopping, | |
323 | Status(String), | |
324 | MainPid(nix::unistd::Pid), | |
325 | } | |
326 | ||
53daae8e | 327 | /// Tells systemd the startup state of the service (see: ``man sd_notify``) |
9c351a36 WB |
328 | pub fn systemd_notify(state: SystemdNotify) -> Result<(), Error> { |
329 | let message = match state { | |
63cec162 TL |
330 | SystemdNotify::Ready => { |
331 | log::info!("service is ready"); | |
332 | CString::new("READY=1") | |
333 | }, | |
9c351a36 WB |
334 | SystemdNotify::Reloading => CString::new("RELOADING=1"), |
335 | SystemdNotify::Stopping => CString::new("STOPPING=1"), | |
336 | SystemdNotify::Status(msg) => CString::new(format!("STATUS={}", msg)), | |
337 | SystemdNotify::MainPid(pid) => CString::new(format!("MAINPID={}", pid)), | |
338 | }?; | |
339 | let rc = unsafe { sd_notify(0, message.as_ptr()) }; | |
340 | if rc < 0 { | |
63cec162 | 341 | bail!("systemd_notify failed: {}", std::io::Error::from_raw_os_error(-rc)); |
9c351a36 | 342 | } |
38da8ca1 | 343 | |
9c351a36 WB |
344 | Ok(()) |
345 | } | |
31142ef2 TL |
346 | |
347 | /// Waits until all previously sent messages with sd_notify are processed | |
348 | pub fn systemd_notify_barrier(timeout: u64) -> Result<(), Error> { | |
349 | let rc = unsafe { sd_notify_barrier(0, timeout) }; | |
350 | if rc < 0 { | |
351 | bail!("systemd_notify_barrier failed: {}", std::io::Error::from_raw_os_error(-rc)); | |
352 | } | |
353 | Ok(()) | |
354 | } |