]>
git.proxmox.com Git - pve-ha-manager.git/blob - src/watchdog-mux.c
b4bcc0c4108c2ba376acfba8a6663475f2a3e252
10 #include <sys/types.h>
12 #include <sys/socket.h>
14 #include <sys/epoll.h>
16 #include <sys/signalfd.h>
18 #include <linux/types.h>
19 #include <linux/watchdog.h>
21 #define WD_SOCK_PATH "/run/watchdog-mux.sock"
22 #define WD_ACTIVE_MARKER "/run/watchdog-mux.active"
24 #define LISTEN_BACKLOG 32
28 #define WATCHDOG_DEV "/dev/watchdog"
30 #define JOURNALCTL_BIN "/bin/journalctl"
33 int watchdog_timeout
= 10;
34 int client_watchdog_timeout
= 60;
35 int update_watchdog
= 1;
43 #define MAX_CLIENTS 100
45 static wd_client_t client_list
[MAX_CLIENTS
];
48 alloc_client(int fd
, time_t time
)
52 for (i
= 0; i
< MAX_CLIENTS
; i
++) {
53 if (client_list
[i
].fd
== 0) {
54 client_list
[i
].fd
= fd
;
55 client_list
[i
].time
= time
;
56 client_list
[i
].magic_close
= 0;
57 return &client_list
[i
];
65 free_client(wd_client_t
*wd_client
)
72 wd_client
->magic_close
= 0;
76 active_client_count(void)
80 for (i
= 0; i
< MAX_CLIENTS
; i
++) {
81 if (client_list
[i
].fd
!= 0 && client_list
[i
].time
!= 0) {
92 if (watchdog_fd
!= -1) {
93 if (write(watchdog_fd
, "V", 1) == -1) {
94 perror("write magic watchdog close");
96 if (close(watchdog_fd
) == -1) {
97 perror("write magic watchdog close");
105 sync_journal_unsafe(void)
108 pid_t child
= fork();
110 // do not care about fork error or collecting the childs exit status,
111 // we are resetting soon anyway and just want to sync out the journal
113 execl(JOURNALCTL_BIN
, JOURNALCTL_BIN
, "--sync", NULL
);
121 struct sockaddr_un my_addr
, peer_addr
;
122 socklen_t peer_addr_size
;
123 struct epoll_event ev
, events
[MAX_EVENTS
];
124 int listen_sock
, nfds
, epollfd
, sigfd
;
128 if (stat(WD_ACTIVE_MARKER
, &fs
) == 0) {
129 fprintf(stderr
, "watchdog active - unable to restart watchdog-mux\n");
133 /* if you want to debug, set options in /lib/modprobe.d/aliases.conf
134 * options softdog soft_noboot=1
136 if (stat(WATCHDOG_DEV
, &fs
) == -1) {
137 char *wd_module
= getenv("WATCHDOG_MODULE");
140 if ((asprintf(&cmd
, "modprobe -q %s", wd_module
) == -1)) {
141 perror("assemble modprobe command failed");
144 fprintf(stderr
, "Loading watchdog module '%s'\n", wd_module
);
148 system("modprobe -q softdog"); // load softdog by default
152 if ((watchdog_fd
= open(WATCHDOG_DEV
, O_WRONLY
)) == -1) {
153 perror("watchdog open");
157 if (ioctl(watchdog_fd
, WDIOC_SETTIMEOUT
, &watchdog_timeout
) == -1) {
158 perror("watchdog set timeout");
163 /* read and log watchdog identity */
164 struct watchdog_info wdinfo
;
165 if (ioctl(watchdog_fd
, WDIOC_GETSUPPORT
, &wdinfo
) == -1) {
166 perror("read watchdog info");
171 wdinfo
.identity
[sizeof(wdinfo
.identity
) - 1] = 0; // just to be sure
172 fprintf(stderr
, "Watchdog driver '%s', version %x\n", wdinfo
.identity
, wdinfo
.firmware_version
);
174 /* always unlink socket path then create socket */
175 unlink(WD_SOCK_PATH
);
177 listen_sock
= socket(AF_UNIX
, SOCK_STREAM
, 0);
178 if (listen_sock
== -1) {
179 perror("socket create");
182 memset(&my_addr
, 0, sizeof(struct sockaddr_un
));
183 my_addr
.sun_family
= AF_UNIX
;
184 strncpy(my_addr
.sun_path
, WD_SOCK_PATH
, sizeof(my_addr
.sun_path
) - 1);
186 if (bind(listen_sock
, (struct sockaddr
*) &my_addr
, sizeof(struct sockaddr_un
)) == -1) {
187 perror("socket bind");
191 if (listen(listen_sock
, LISTEN_BACKLOG
) == -1) {
192 perror("socket listen");
196 epollfd
= epoll_create(10);
198 perror("epoll_create");
203 ev
.data
.ptr
= alloc_client(listen_sock
, 0);
204 if (epoll_ctl(epollfd
, EPOLL_CTL_ADD
, listen_sock
, &ev
) == -1) {
205 perror("epoll_ctl add listen_sock");
211 sigaddset(&mask
, SIGINT
);
212 sigaddset(&mask
, SIGTERM
);
213 sigaddset(&mask
, SIGHUP
);
215 sigprocmask(SIG_BLOCK
, &mask
, NULL
);
217 if ((sigfd
= signalfd(-1, &mask
, SFD_NONBLOCK
)) < 0) {
218 perror("unable to open signalfd");
223 ev
.data
.ptr
= alloc_client(sigfd
, 0);
224 if (epoll_ctl(epollfd
, EPOLL_CTL_ADD
, sigfd
, &ev
) == -1) {
225 perror("epoll_ctl add sigfd");
230 nfds
= epoll_wait(epollfd
, events
, MAX_EVENTS
, 1000);
235 perror("epoll_pwait");
239 if (nfds
== 0) { // timeout
241 // check for timeouts
242 if (update_watchdog
) {
244 time_t ctime
= time(NULL
);
245 for (i
= 0; i
< MAX_CLIENTS
; i
++) {
247 client_list
[i
].fd
!= 0
248 && client_list
[i
].time
!= 0
249 && ((ctime
- client_list
[i
].time
) > client_watchdog_timeout
)
252 fprintf(stderr
, "client watchdog expired - disable watchdog updates\n");
257 if (update_watchdog
) {
258 if (ioctl(watchdog_fd
, WDIOC_KEEPALIVE
, 0) == -1) {
259 perror("watchdog update failed");
266 if (!update_watchdog
)
272 for (n
= 0; n
< nfds
; ++n
) {
273 wd_client_t
*wd_client
= events
[n
].data
.ptr
;
274 if (wd_client
->fd
== listen_sock
) {
275 int conn_sock
= accept(listen_sock
, (struct sockaddr
*) &peer_addr
, &peer_addr_size
);
276 if (conn_sock
== -1) {
280 if (fcntl(conn_sock
, F_SETFL
, O_NONBLOCK
) == -1) {
281 perror("setnonblocking");
285 wd_client_t
*new_client
= alloc_client(conn_sock
, time(NULL
));
286 if (new_client
== NULL
) {
287 fprintf(stderr
, "unable to alloc wd_client structure\n");
291 mkdir(WD_ACTIVE_MARKER
, 0600);
294 ev
.data
.ptr
= new_client
;
295 if (epoll_ctl(epollfd
, EPOLL_CTL_ADD
, conn_sock
, &ev
) == -1) {
296 perror("epoll_ctl: add conn_sock");
299 } else if (wd_client
->fd
== sigfd
) {
301 /* signal handling */
304 struct signalfd_siginfo si
;
306 if ((rv
= read(sigfd
, &si
, sizeof(si
))) && rv
>= 0) {
307 if (si
.ssi_signo
== SIGHUP
) {
308 perror("got SIGHUP - ignored");
311 fprintf(stderr
, "got terminate request\n");
317 int cfd
= wd_client
->fd
;
319 ssize_t bytes
= read(cfd
, buf
, sizeof(buf
));
323 } else if (bytes
> 0) {
325 for (i
= 0; i
< bytes
; i
++) {
327 wd_client
->magic_close
= 1;
329 wd_client
->magic_close
= 0;
332 wd_client
->time
= time(NULL
);
334 if (events
[n
].events
& EPOLLHUP
|| events
[n
].events
& EPOLLERR
) {
335 //printf("GOT %016x event\n", events[n].events);
336 if (epoll_ctl(epollfd
, EPOLL_CTL_DEL
, cfd
, NULL
) == -1) {
337 perror("epoll_ctl: del conn_sock");
340 if (close(cfd
) == -1) {
341 perror("close conn_sock");
345 if (!wd_client
->magic_close
) {
346 fprintf(stderr
, "client did not stop watchdog - disable watchdog updates\n");
347 sync_journal_unsafe();
350 free_client(wd_client
);
353 if (!active_client_count()) {
354 rmdir(WD_ACTIVE_MARKER
);
364 int active_count
= active_client_count();
365 if (active_count
> 0) {
366 fprintf(stderr
, "exit watchdog-mux with active connections\n");
367 sync_journal_unsafe();
369 fprintf(stderr
, "clean exit\n");
373 unlink(WD_SOCK_PATH
);
378 unlink(WD_SOCK_PATH
);