]> git.proxmox.com Git - pve-ha-manager.git/blame - src/watchdog-mux.c
bump version to 1.0-13
[pve-ha-manager.git] / src / watchdog-mux.c
CommitLineData
da8f8bbc
DM
1#include <stdio.h>
2#include <stdlib.h>
3#include <unistd.h>
4#include <fcntl.h>
5#include <string.h>
1fe42db7 6#include <errno.h>
98099e4f 7#include <time.h>
da8f8bbc 8#include <sys/ioctl.h>
7336614a
DM
9#include <sys/types.h>
10#include <sys/stat.h>
da8f8bbc
DM
11#include <sys/socket.h>
12#include <sys/un.h>
13#include <sys/epoll.h>
98099e4f
DM
14#include <signal.h>
15#include <sys/signalfd.h>
da8f8bbc
DM
16
17#include <linux/types.h>
18#include <linux/watchdog.h>
19
7336614a 20#include <systemd/sd-daemon.h>
e99d3682 21
06b589da 22#define WD_SOCK_PATH "/run/watchdog-mux.sock"
98099e4f
DM
23#define WD_ACTIVE_MARKER "/run/watchdog-mux.active"
24
92763a2e
DM
25#define LISTEN_BACKLOG 32 /* set same value in watchdog-mux.socket */
26
da8f8bbc
DM
27#define MAX_EVENTS 10
28
29#define WATCHDOG_DEV "/dev/watchdog"
30
31int watchdog_fd = -1;
5ce9f244
DM
32int watchdog_timeout = 10;
33int client_watchdog_timeout = 60;
34int update_watchdog = 1;
4915a0e9
DM
35
36typedef struct {
37 int fd;
98099e4f 38 time_t time;
4178d9ea 39 int magic_close;
4915a0e9
DM
40} wd_client_t;
41
42#define MAX_CLIENTS 100
43
44static wd_client_t client_list[MAX_CLIENTS];
45
46static wd_client_t *
98099e4f 47alloc_client(int fd, time_t time)
4915a0e9
DM
48{
49 int i;
50
51 for (i = 0; i < MAX_CLIENTS; i++) {
52 if (client_list[i].fd == 0) {
4915a0e9 53 client_list[i].fd = fd;
98099e4f 54 client_list[i].time = time;
4178d9ea 55 client_list[i].magic_close = 0;
4915a0e9
DM
56 return &client_list[i];
57 }
58 }
59
60 return NULL;
61}
62
63static void
64free_client(wd_client_t *wd_client)
65{
66 if (!wd_client)
67 return;
68
98099e4f 69 wd_client->time = 0;
4915a0e9 70 wd_client->fd = 0;
4178d9ea 71 wd_client->magic_close = 0;
4915a0e9
DM
72}
73
98099e4f
DM
74static int
75active_client_count(void)
76{
77 int i, count = 0;
78
79 for (i = 0; i < MAX_CLIENTS; i++) {
80 if (client_list[i].fd != 0 && client_list[i].time != 0) {
81 count++;
82 }
83 }
84
85 return count;
86}
87
da8f8bbc
DM
88static void
89watchdog_close(void)
90{
91 if (watchdog_fd != -1) {
92 if (write(watchdog_fd, "V", 1) == -1) {
93 perror("write magic watchdog close");
94 }
95 if (close(watchdog_fd) == -1) {
96 perror("write magic watchdog close");
97 }
98 }
99
100 watchdog_fd = -1;
101}
102
103int
104main(void)
105{
da8f8bbc
DM
106 struct sockaddr_un my_addr, peer_addr;
107 socklen_t peer_addr_size;
108 struct epoll_event ev, events[MAX_EVENTS];
98099e4f 109 int socket_count, listen_sock, nfds, epollfd, sigfd;
06b589da 110 int unlink_socket = 0;
98099e4f 111
da8f8bbc 112 struct stat fs;
98099e4f
DM
113
114 if (stat(WD_ACTIVE_MARKER, &fs) == 0) {
115 fprintf(stderr, "watchdog active - unable to restart watchdog-mux\n");
116 exit(EXIT_FAILURE);
117 }
b7d5be18
DM
118
119 /* if you want to debug, set options in /lib/modprobe.d/aliases.conf
120 * options softdog soft_noboot=1
121 */
da8f8bbc 122 if (stat(WATCHDOG_DEV, &fs) == -1) {
b7d5be18 123 system("modprobe -q softdog"); // load softdog by default
da8f8bbc
DM
124 }
125
126 if ((watchdog_fd = open(WATCHDOG_DEV, O_WRONLY)) == -1) {
127 perror("watchdog open");
128 exit(EXIT_FAILURE);
129 }
130
131 if (ioctl(watchdog_fd, WDIOC_SETTIMEOUT, &watchdog_timeout) == -1) {
132 perror("watchdog set timeout");
133 watchdog_close();
134 exit(EXIT_FAILURE);
135 }
136
137 /* read and log watchdog identity */
138 struct watchdog_info wdinfo;
139 if (ioctl(watchdog_fd, WDIOC_GETSUPPORT, &wdinfo) == -1) {
140 perror("read watchdog info");
141 watchdog_close();
142 exit(EXIT_FAILURE);
143 }
144
145 wdinfo.identity[sizeof(wdinfo.identity) - 1] = 0; // just to be sure
146 fprintf(stderr, "Watchdog driver '%s', version %x\n",
147 wdinfo.identity, wdinfo.firmware_version);
148
e99d3682 149 socket_count = sd_listen_fds(0);
06b589da 150
e99d3682 151 if (socket_count > 1) {
da8f8bbc 152
ba878e35 153 perror("too many file descriptors received.\n");
e99d3682
DM
154 goto err;
155
156 } else if (socket_count == 1) {
da8f8bbc 157
e99d3682 158 listen_sock = SD_LISTEN_FDS_START + 0;
06b589da 159
e99d3682 160 } else {
da8f8bbc 161
06b589da
DM
162 unlink_socket = 1;
163
164 unlink(WD_SOCK_PATH);
e99d3682
DM
165
166 listen_sock = socket(AF_UNIX, SOCK_STREAM, 0);
167 if (listen_sock == -1) {
168 perror("socket create");
169 exit(EXIT_FAILURE);
170 }
171
172 memset(&my_addr, 0, sizeof(struct sockaddr_un));
173 my_addr.sun_family = AF_UNIX;
06b589da 174 strncpy(my_addr.sun_path, WD_SOCK_PATH, sizeof(my_addr.sun_path) - 1);
e99d3682
DM
175
176 if (bind(listen_sock, (struct sockaddr *) &my_addr,
177 sizeof(struct sockaddr_un)) == -1) {
178 perror("socket bind");
179 exit(EXIT_FAILURE);
180 }
da8f8bbc 181
e99d3682
DM
182 if (listen(listen_sock, LISTEN_BACKLOG) == -1) {
183 perror("socket listen");
184 goto err;
185 }
da8f8bbc 186 }
e99d3682 187
da8f8bbc
DM
188 epollfd = epoll_create(10);
189 if (epollfd == -1) {
190 perror("epoll_create");
191 goto err;
192 }
193
194 ev.events = EPOLLIN;
98099e4f 195 ev.data.ptr = alloc_client(listen_sock, 0);
da8f8bbc 196 if (epoll_ctl(epollfd, EPOLL_CTL_ADD, listen_sock, &ev) == -1) {
98099e4f
DM
197 perror("epoll_ctl add listen_sock");
198 goto err;
199 }
200
201 sigset_t mask;
202 sigemptyset(&mask);
203 sigaddset(&mask, SIGINT);
204 sigaddset(&mask, SIGTERM);
205 sigaddset(&mask, SIGHUP);
206
207 sigprocmask(SIG_BLOCK, &mask, NULL);
208
209 if ((sigfd = signalfd(-1, &mask, SFD_NONBLOCK)) < 0) {
210 perror("unable to open signalfd");
da8f8bbc
DM
211 goto err;
212 }
213
98099e4f
DM
214 ev.events = EPOLLIN;
215 ev.data.ptr = alloc_client(sigfd, 0);
216 if (epoll_ctl(epollfd, EPOLL_CTL_ADD, sigfd, &ev) == -1) {
217 perror("epoll_ctl add sigfd");
218 goto err;
219 }
220
da8f8bbc 221 for (;;) {
1fe42db7 222 nfds = epoll_wait(epollfd, events, MAX_EVENTS, 1000);
da8f8bbc 223 if (nfds == -1) {
1fe42db7
DM
224 if (errno == EINTR)
225 continue;
226
da8f8bbc
DM
227 perror("epoll_pwait");
228 goto err;
229 }
230
1fe42db7
DM
231 if (nfds == 0) { // timeout
232
5ce9f244
DM
233 // check for timeouts
234 if (update_watchdog) {
235 int i;
236 time_t ctime = time(NULL);
237 for (i = 0; i < MAX_CLIENTS; i++) {
238 if (client_list[i].fd != 0 && client_list[i].time != 0 &&
239 ((ctime - client_list[i].time) > client_watchdog_timeout)) {
240 update_watchdog = 0;
241 fprintf(stderr, "client watchdog expired - disable watchdog updates\n");
242 }
243 }
244 }
245
246 if (update_watchdog) {
247 if (ioctl(watchdog_fd, WDIOC_KEEPALIVE, 0) == -1) {
248 perror("watchdog update failed");
249 }
1fe42db7
DM
250 }
251
252 continue;
253 }
254
115805fd
DM
255 if (!update_watchdog)
256 break;
257
98099e4f
DM
258 int terminate = 0;
259
da8f8bbc
DM
260 int n;
261 for (n = 0; n < nfds; ++n) {
4915a0e9
DM
262 wd_client_t *wd_client = events[n].data.ptr;
263 if (wd_client->fd == listen_sock) {
da8f8bbc
DM
264 int conn_sock = accept(listen_sock, (struct sockaddr *) &peer_addr, &peer_addr_size);
265 if (conn_sock == -1) {
266 perror("accept");
267 goto err; // fixme
268 }
269 if (fcntl(conn_sock, F_SETFL, O_NONBLOCK) == -1) {
270 perror("setnonblocking");
271 goto err; // fixme
272 }
273
98099e4f 274 wd_client_t *new_client = alloc_client(conn_sock, time(NULL));
4915a0e9
DM
275 if (new_client == NULL) {
276 fprintf(stderr, "unable to alloc wd_client structure\n");
277 goto err; // fixme;
278 }
98099e4f
DM
279
280 mkdir(WD_ACTIVE_MARKER, 0600);
281
da8f8bbc 282 ev.events = EPOLLIN;
4915a0e9 283 ev.data.ptr = new_client;
da8f8bbc
DM
284 if (epoll_ctl(epollfd, EPOLL_CTL_ADD, conn_sock, &ev) == -1) {
285 perror("epoll_ctl: add conn_sock");
286 goto err; // fixme
287 }
98099e4f
DM
288 } else if (wd_client->fd == sigfd) {
289
290 /* signal handling */
291
292 int rv = 0;
293 struct signalfd_siginfo si;
294
295 if ((rv = read(sigfd, &si, sizeof(si))) && rv >= 0) {
296 if (si.ssi_signo == SIGHUP) {
297 perror("got SIGHUP - ignored");
298 } else {
299 terminate = 1;
300 fprintf(stderr, "got terminate request\n");
301 }
302 }
303
da8f8bbc
DM
304 } else {
305 char buf[4096];
4915a0e9 306 int cfd = wd_client->fd;
98099e4f 307
da8f8bbc
DM
308 ssize_t bytes = read(cfd, buf, sizeof(buf));
309 if (bytes == -1) {
310 perror("read");
311 goto err; // fixme
312 } else if (bytes > 0) {
4178d9ea
DM
313 int i;
314 for (i = 0; i < bytes; i++) {
315 if (buf[i] == 'V') {
316 wd_client->magic_close = 1;
317 } else {
318 wd_client->magic_close = 0;
319 }
320 }
321 wd_client->time = time(NULL);
da8f8bbc
DM
322 } else {
323 if (events[n].events & EPOLLHUP || events[n].events & EPOLLERR) {
4178d9ea 324 //printf("GOT %016x event\n", events[n].events);
da8f8bbc
DM
325 if (epoll_ctl(epollfd, EPOLL_CTL_DEL, cfd, NULL) == -1) {
326 perror("epoll_ctl: del conn_sock");
327 goto err; // fixme
328 }
329 if (close(cfd) == -1) {
330 perror("close conn_sock");
331 goto err; // fixme
332 }
98099e4f 333
4178d9ea 334 if (!wd_client->magic_close) {
5ce9f244
DM
335 fprintf(stderr, "client did not stop watchdog - disable watchdog updates\n");
336 update_watchdog = 0;
4178d9ea
DM
337 } else {
338 free_client(wd_client);
339 }
340
98099e4f
DM
341 if (!active_client_count()) {
342 rmdir(WD_ACTIVE_MARKER);
343 }
da8f8bbc
DM
344 }
345 }
346 }
347 }
98099e4f
DM
348 if (terminate)
349 break;
da8f8bbc
DM
350 }
351
98099e4f
DM
352 int active_count = active_client_count();
353 if (active_count > 0) {
354 fprintf(stderr, "exit watchdog-mux with active connections\n");
355 } else {
356 fprintf(stderr, "clean exit\n");
357 watchdog_close();
358 }
359
06b589da
DM
360 if (unlink_socket)
361 unlink(WD_SOCK_PATH);
362
da8f8bbc
DM
363 exit(EXIT_SUCCESS);
364
365err:
06b589da
DM
366 if (unlink_socket)
367 unlink(WD_SOCK_PATH);
368
da8f8bbc
DM
369 exit(EXIT_FAILURE);
370}