]> git.proxmox.com Git - pve-ha-manager.git/blame - src/watchdog-mux.c
allow to configure watchdog module in /etc/default/pve-ha-manager
[pve-ha-manager.git] / src / watchdog-mux.c
CommitLineData
6263c81d 1#define _GNU_SOURCE
da8f8bbc
DM
2#include <stdio.h>
3#include <stdlib.h>
4#include <unistd.h>
5#include <fcntl.h>
6#include <string.h>
1fe42db7 7#include <errno.h>
98099e4f 8#include <time.h>
da8f8bbc 9#include <sys/ioctl.h>
7336614a
DM
10#include <sys/types.h>
11#include <sys/stat.h>
da8f8bbc
DM
12#include <sys/socket.h>
13#include <sys/un.h>
14#include <sys/epoll.h>
98099e4f
DM
15#include <signal.h>
16#include <sys/signalfd.h>
da8f8bbc
DM
17
18#include <linux/types.h>
19#include <linux/watchdog.h>
20
7336614a 21#include <systemd/sd-daemon.h>
e99d3682 22
06b589da 23#define WD_SOCK_PATH "/run/watchdog-mux.sock"
98099e4f
DM
24#define WD_ACTIVE_MARKER "/run/watchdog-mux.active"
25
92763a2e
DM
26#define LISTEN_BACKLOG 32 /* set same value in watchdog-mux.socket */
27
da8f8bbc
DM
28#define MAX_EVENTS 10
29
30#define WATCHDOG_DEV "/dev/watchdog"
31
32int watchdog_fd = -1;
5ce9f244
DM
33int watchdog_timeout = 10;
34int client_watchdog_timeout = 60;
35int update_watchdog = 1;
4915a0e9
DM
36
37typedef struct {
38 int fd;
98099e4f 39 time_t time;
4178d9ea 40 int magic_close;
4915a0e9
DM
41} wd_client_t;
42
43#define MAX_CLIENTS 100
44
45static wd_client_t client_list[MAX_CLIENTS];
46
47static wd_client_t *
98099e4f 48alloc_client(int fd, time_t time)
4915a0e9
DM
49{
50 int i;
51
52 for (i = 0; i < MAX_CLIENTS; i++) {
53 if (client_list[i].fd == 0) {
4915a0e9 54 client_list[i].fd = fd;
98099e4f 55 client_list[i].time = time;
4178d9ea 56 client_list[i].magic_close = 0;
4915a0e9
DM
57 return &client_list[i];
58 }
59 }
60
61 return NULL;
62}
63
64static void
65free_client(wd_client_t *wd_client)
66{
67 if (!wd_client)
68 return;
69
98099e4f 70 wd_client->time = 0;
4915a0e9 71 wd_client->fd = 0;
4178d9ea 72 wd_client->magic_close = 0;
4915a0e9
DM
73}
74
98099e4f
DM
75static int
76active_client_count(void)
77{
78 int i, count = 0;
79
80 for (i = 0; i < MAX_CLIENTS; i++) {
81 if (client_list[i].fd != 0 && client_list[i].time != 0) {
82 count++;
83 }
84 }
85
86 return count;
87}
88
da8f8bbc
DM
89static void
90watchdog_close(void)
91{
92 if (watchdog_fd != -1) {
93 if (write(watchdog_fd, "V", 1) == -1) {
94 perror("write magic watchdog close");
95 }
96 if (close(watchdog_fd) == -1) {
97 perror("write magic watchdog close");
98 }
99 }
100
101 watchdog_fd = -1;
102}
103
104int
105main(void)
106{
da8f8bbc
DM
107 struct sockaddr_un my_addr, peer_addr;
108 socklen_t peer_addr_size;
109 struct epoll_event ev, events[MAX_EVENTS];
98099e4f 110 int socket_count, listen_sock, nfds, epollfd, sigfd;
06b589da 111 int unlink_socket = 0;
98099e4f 112
da8f8bbc 113 struct stat fs;
98099e4f
DM
114
115 if (stat(WD_ACTIVE_MARKER, &fs) == 0) {
116 fprintf(stderr, "watchdog active - unable to restart watchdog-mux\n");
117 exit(EXIT_FAILURE);
118 }
b7d5be18
DM
119
120 /* if you want to debug, set options in /lib/modprobe.d/aliases.conf
121 * options softdog soft_noboot=1
122 */
da8f8bbc 123 if (stat(WATCHDOG_DEV, &fs) == -1) {
6263c81d
DM
124 char *wd_module = getenv("WATCHDOG_MODULE");
125 if (wd_module) {
126 char *cmd = NULL;
127 if ((asprintf(&cmd, "modprobe -q %s", wd_module) == -1)) {
128 perror("assemble modprobe command failed");
129 exit(EXIT_FAILURE);
130 }
131 system(cmd);
132 } else {
133 system("modprobe -q softdog"); // load softdog by default
134 }
da8f8bbc
DM
135 }
136
137 if ((watchdog_fd = open(WATCHDOG_DEV, O_WRONLY)) == -1) {
138 perror("watchdog open");
139 exit(EXIT_FAILURE);
140 }
141
142 if (ioctl(watchdog_fd, WDIOC_SETTIMEOUT, &watchdog_timeout) == -1) {
143 perror("watchdog set timeout");
144 watchdog_close();
145 exit(EXIT_FAILURE);
146 }
147
148 /* read and log watchdog identity */
149 struct watchdog_info wdinfo;
150 if (ioctl(watchdog_fd, WDIOC_GETSUPPORT, &wdinfo) == -1) {
151 perror("read watchdog info");
152 watchdog_close();
153 exit(EXIT_FAILURE);
154 }
155
156 wdinfo.identity[sizeof(wdinfo.identity) - 1] = 0; // just to be sure
157 fprintf(stderr, "Watchdog driver '%s', version %x\n",
158 wdinfo.identity, wdinfo.firmware_version);
159
e99d3682 160 socket_count = sd_listen_fds(0);
06b589da 161
e99d3682 162 if (socket_count > 1) {
da8f8bbc 163
ba878e35 164 perror("too many file descriptors received.\n");
e99d3682
DM
165 goto err;
166
167 } else if (socket_count == 1) {
da8f8bbc 168
e99d3682 169 listen_sock = SD_LISTEN_FDS_START + 0;
06b589da 170
e99d3682 171 } else {
da8f8bbc 172
06b589da
DM
173 unlink_socket = 1;
174
175 unlink(WD_SOCK_PATH);
e99d3682
DM
176
177 listen_sock = socket(AF_UNIX, SOCK_STREAM, 0);
178 if (listen_sock == -1) {
179 perror("socket create");
180 exit(EXIT_FAILURE);
181 }
182
183 memset(&my_addr, 0, sizeof(struct sockaddr_un));
184 my_addr.sun_family = AF_UNIX;
06b589da 185 strncpy(my_addr.sun_path, WD_SOCK_PATH, sizeof(my_addr.sun_path) - 1);
e99d3682
DM
186
187 if (bind(listen_sock, (struct sockaddr *) &my_addr,
188 sizeof(struct sockaddr_un)) == -1) {
189 perror("socket bind");
190 exit(EXIT_FAILURE);
191 }
da8f8bbc 192
e99d3682
DM
193 if (listen(listen_sock, LISTEN_BACKLOG) == -1) {
194 perror("socket listen");
195 goto err;
196 }
da8f8bbc 197 }
e99d3682 198
da8f8bbc
DM
199 epollfd = epoll_create(10);
200 if (epollfd == -1) {
201 perror("epoll_create");
202 goto err;
203 }
204
205 ev.events = EPOLLIN;
98099e4f 206 ev.data.ptr = alloc_client(listen_sock, 0);
da8f8bbc 207 if (epoll_ctl(epollfd, EPOLL_CTL_ADD, listen_sock, &ev) == -1) {
98099e4f
DM
208 perror("epoll_ctl add listen_sock");
209 goto err;
210 }
211
212 sigset_t mask;
213 sigemptyset(&mask);
214 sigaddset(&mask, SIGINT);
215 sigaddset(&mask, SIGTERM);
216 sigaddset(&mask, SIGHUP);
217
218 sigprocmask(SIG_BLOCK, &mask, NULL);
219
220 if ((sigfd = signalfd(-1, &mask, SFD_NONBLOCK)) < 0) {
221 perror("unable to open signalfd");
da8f8bbc
DM
222 goto err;
223 }
224
98099e4f
DM
225 ev.events = EPOLLIN;
226 ev.data.ptr = alloc_client(sigfd, 0);
227 if (epoll_ctl(epollfd, EPOLL_CTL_ADD, sigfd, &ev) == -1) {
228 perror("epoll_ctl add sigfd");
229 goto err;
230 }
231
da8f8bbc 232 for (;;) {
1fe42db7 233 nfds = epoll_wait(epollfd, events, MAX_EVENTS, 1000);
da8f8bbc 234 if (nfds == -1) {
1fe42db7
DM
235 if (errno == EINTR)
236 continue;
237
da8f8bbc
DM
238 perror("epoll_pwait");
239 goto err;
240 }
241
1fe42db7
DM
242 if (nfds == 0) { // timeout
243
5ce9f244
DM
244 // check for timeouts
245 if (update_watchdog) {
246 int i;
247 time_t ctime = time(NULL);
248 for (i = 0; i < MAX_CLIENTS; i++) {
249 if (client_list[i].fd != 0 && client_list[i].time != 0 &&
250 ((ctime - client_list[i].time) > client_watchdog_timeout)) {
251 update_watchdog = 0;
252 fprintf(stderr, "client watchdog expired - disable watchdog updates\n");
253 }
254 }
255 }
256
257 if (update_watchdog) {
258 if (ioctl(watchdog_fd, WDIOC_KEEPALIVE, 0) == -1) {
259 perror("watchdog update failed");
260 }
1fe42db7
DM
261 }
262
263 continue;
264 }
265
115805fd
DM
266 if (!update_watchdog)
267 break;
268
98099e4f
DM
269 int terminate = 0;
270
da8f8bbc
DM
271 int n;
272 for (n = 0; n < nfds; ++n) {
4915a0e9
DM
273 wd_client_t *wd_client = events[n].data.ptr;
274 if (wd_client->fd == listen_sock) {
da8f8bbc
DM
275 int conn_sock = accept(listen_sock, (struct sockaddr *) &peer_addr, &peer_addr_size);
276 if (conn_sock == -1) {
277 perror("accept");
278 goto err; // fixme
279 }
280 if (fcntl(conn_sock, F_SETFL, O_NONBLOCK) == -1) {
281 perror("setnonblocking");
282 goto err; // fixme
283 }
284
98099e4f 285 wd_client_t *new_client = alloc_client(conn_sock, time(NULL));
4915a0e9
DM
286 if (new_client == NULL) {
287 fprintf(stderr, "unable to alloc wd_client structure\n");
288 goto err; // fixme;
289 }
98099e4f
DM
290
291 mkdir(WD_ACTIVE_MARKER, 0600);
292
da8f8bbc 293 ev.events = EPOLLIN;
4915a0e9 294 ev.data.ptr = new_client;
da8f8bbc
DM
295 if (epoll_ctl(epollfd, EPOLL_CTL_ADD, conn_sock, &ev) == -1) {
296 perror("epoll_ctl: add conn_sock");
297 goto err; // fixme
298 }
98099e4f
DM
299 } else if (wd_client->fd == sigfd) {
300
301 /* signal handling */
302
303 int rv = 0;
304 struct signalfd_siginfo si;
305
306 if ((rv = read(sigfd, &si, sizeof(si))) && rv >= 0) {
307 if (si.ssi_signo == SIGHUP) {
308 perror("got SIGHUP - ignored");
309 } else {
310 terminate = 1;
311 fprintf(stderr, "got terminate request\n");
312 }
313 }
314
da8f8bbc
DM
315 } else {
316 char buf[4096];
4915a0e9 317 int cfd = wd_client->fd;
98099e4f 318
da8f8bbc
DM
319 ssize_t bytes = read(cfd, buf, sizeof(buf));
320 if (bytes == -1) {
321 perror("read");
322 goto err; // fixme
323 } else if (bytes > 0) {
4178d9ea
DM
324 int i;
325 for (i = 0; i < bytes; i++) {
326 if (buf[i] == 'V') {
327 wd_client->magic_close = 1;
328 } else {
329 wd_client->magic_close = 0;
330 }
331 }
332 wd_client->time = time(NULL);
da8f8bbc
DM
333 } else {
334 if (events[n].events & EPOLLHUP || events[n].events & EPOLLERR) {
4178d9ea 335 //printf("GOT %016x event\n", events[n].events);
da8f8bbc
DM
336 if (epoll_ctl(epollfd, EPOLL_CTL_DEL, cfd, NULL) == -1) {
337 perror("epoll_ctl: del conn_sock");
338 goto err; // fixme
339 }
340 if (close(cfd) == -1) {
341 perror("close conn_sock");
342 goto err; // fixme
343 }
98099e4f 344
4178d9ea 345 if (!wd_client->magic_close) {
5ce9f244
DM
346 fprintf(stderr, "client did not stop watchdog - disable watchdog updates\n");
347 update_watchdog = 0;
4178d9ea
DM
348 } else {
349 free_client(wd_client);
350 }
351
98099e4f
DM
352 if (!active_client_count()) {
353 rmdir(WD_ACTIVE_MARKER);
354 }
da8f8bbc
DM
355 }
356 }
357 }
358 }
98099e4f
DM
359 if (terminate)
360 break;
da8f8bbc
DM
361 }
362
98099e4f
DM
363 int active_count = active_client_count();
364 if (active_count > 0) {
365 fprintf(stderr, "exit watchdog-mux with active connections\n");
366 } else {
367 fprintf(stderr, "clean exit\n");
368 watchdog_close();
369 }
370
06b589da
DM
371 if (unlink_socket)
372 unlink(WD_SOCK_PATH);
373
da8f8bbc
DM
374 exit(EXIT_SUCCESS);
375
376err:
06b589da
DM
377 if (unlink_socket)
378 unlink(WD_SOCK_PATH);
379
da8f8bbc
DM
380 exit(EXIT_FAILURE);
381}