]> git.proxmox.com Git - pve-ha-manager.git/blame - src/watchdog-mux.c
remove watchdog-mux.socket
[pve-ha-manager.git] / src / watchdog-mux.c
CommitLineData
6263c81d 1#define _GNU_SOURCE
da8f8bbc
DM
2#include <stdio.h>
3#include <stdlib.h>
4#include <unistd.h>
5#include <fcntl.h>
6#include <string.h>
1fe42db7 7#include <errno.h>
98099e4f 8#include <time.h>
da8f8bbc 9#include <sys/ioctl.h>
7336614a
DM
10#include <sys/types.h>
11#include <sys/stat.h>
da8f8bbc
DM
12#include <sys/socket.h>
13#include <sys/un.h>
14#include <sys/epoll.h>
98099e4f
DM
15#include <signal.h>
16#include <sys/signalfd.h>
da8f8bbc
DM
17
18#include <linux/types.h>
19#include <linux/watchdog.h>
20
06b589da 21#define WD_SOCK_PATH "/run/watchdog-mux.sock"
98099e4f
DM
22#define WD_ACTIVE_MARKER "/run/watchdog-mux.active"
23
92763a2e
DM
24#define LISTEN_BACKLOG 32 /* set same value in watchdog-mux.socket */
25
da8f8bbc
DM
26#define MAX_EVENTS 10
27
28#define WATCHDOG_DEV "/dev/watchdog"
29
30int watchdog_fd = -1;
5ce9f244
DM
31int watchdog_timeout = 10;
32int client_watchdog_timeout = 60;
33int update_watchdog = 1;
4915a0e9
DM
34
35typedef struct {
36 int fd;
98099e4f 37 time_t time;
4178d9ea 38 int magic_close;
4915a0e9
DM
39} wd_client_t;
40
41#define MAX_CLIENTS 100
42
43static wd_client_t client_list[MAX_CLIENTS];
44
45static wd_client_t *
98099e4f 46alloc_client(int fd, time_t time)
4915a0e9
DM
47{
48 int i;
49
50 for (i = 0; i < MAX_CLIENTS; i++) {
51 if (client_list[i].fd == 0) {
4915a0e9 52 client_list[i].fd = fd;
98099e4f 53 client_list[i].time = time;
4178d9ea 54 client_list[i].magic_close = 0;
4915a0e9
DM
55 return &client_list[i];
56 }
57 }
58
59 return NULL;
60}
61
62static void
63free_client(wd_client_t *wd_client)
64{
65 if (!wd_client)
66 return;
67
98099e4f 68 wd_client->time = 0;
4915a0e9 69 wd_client->fd = 0;
4178d9ea 70 wd_client->magic_close = 0;
4915a0e9
DM
71}
72
98099e4f
DM
73static int
74active_client_count(void)
75{
76 int i, count = 0;
77
78 for (i = 0; i < MAX_CLIENTS; i++) {
79 if (client_list[i].fd != 0 && client_list[i].time != 0) {
80 count++;
81 }
82 }
83
84 return count;
85}
86
da8f8bbc
DM
87static void
88watchdog_close(void)
89{
90 if (watchdog_fd != -1) {
91 if (write(watchdog_fd, "V", 1) == -1) {
92 perror("write magic watchdog close");
93 }
94 if (close(watchdog_fd) == -1) {
95 perror("write magic watchdog close");
96 }
97 }
98
99 watchdog_fd = -1;
100}
101
102int
103main(void)
104{
da8f8bbc
DM
105 struct sockaddr_un my_addr, peer_addr;
106 socklen_t peer_addr_size;
107 struct epoll_event ev, events[MAX_EVENTS];
f8a3fc80 108 int listen_sock, nfds, epollfd, sigfd;
06b589da 109 int unlink_socket = 0;
98099e4f 110
da8f8bbc 111 struct stat fs;
98099e4f
DM
112
113 if (stat(WD_ACTIVE_MARKER, &fs) == 0) {
114 fprintf(stderr, "watchdog active - unable to restart watchdog-mux\n");
115 exit(EXIT_FAILURE);
116 }
b7d5be18
DM
117
118 /* if you want to debug, set options in /lib/modprobe.d/aliases.conf
119 * options softdog soft_noboot=1
120 */
da8f8bbc 121 if (stat(WATCHDOG_DEV, &fs) == -1) {
6263c81d
DM
122 char *wd_module = getenv("WATCHDOG_MODULE");
123 if (wd_module) {
124 char *cmd = NULL;
125 if ((asprintf(&cmd, "modprobe -q %s", wd_module) == -1)) {
126 perror("assemble modprobe command failed");
127 exit(EXIT_FAILURE);
128 }
129 system(cmd);
130 } else {
131 system("modprobe -q softdog"); // load softdog by default
132 }
da8f8bbc
DM
133 }
134
135 if ((watchdog_fd = open(WATCHDOG_DEV, O_WRONLY)) == -1) {
136 perror("watchdog open");
137 exit(EXIT_FAILURE);
138 }
139
140 if (ioctl(watchdog_fd, WDIOC_SETTIMEOUT, &watchdog_timeout) == -1) {
141 perror("watchdog set timeout");
142 watchdog_close();
143 exit(EXIT_FAILURE);
144 }
145
146 /* read and log watchdog identity */
147 struct watchdog_info wdinfo;
148 if (ioctl(watchdog_fd, WDIOC_GETSUPPORT, &wdinfo) == -1) {
149 perror("read watchdog info");
150 watchdog_close();
151 exit(EXIT_FAILURE);
152 }
153
154 wdinfo.identity[sizeof(wdinfo.identity) - 1] = 0; // just to be sure
155 fprintf(stderr, "Watchdog driver '%s', version %x\n",
156 wdinfo.identity, wdinfo.firmware_version);
157
f8a3fc80
TL
158 /* always unlink socket path then create socket */
159 unlink(WD_SOCK_PATH);
160 unlink_socket = 1;
da8f8bbc 161
f8a3fc80
TL
162 listen_sock = socket(AF_UNIX, SOCK_STREAM, 0);
163 if (listen_sock == -1) {
164 perror("socket create");
165 exit(EXIT_FAILURE);
166 }
167 memset(&my_addr, 0, sizeof(struct sockaddr_un));
168 my_addr.sun_family = AF_UNIX;
169 strncpy(my_addr.sun_path, WD_SOCK_PATH, sizeof(my_addr.sun_path) - 1);
170
171 if (bind(listen_sock, (struct sockaddr *) &my_addr,
172 sizeof(struct sockaddr_un)) == -1) {
173 perror("socket bind");
174 exit(EXIT_FAILURE);
175 }
e99d3682 176
f8a3fc80
TL
177 if (listen(listen_sock, LISTEN_BACKLOG) == -1) {
178 perror("socket listen");
179 goto err;
da8f8bbc 180 }
f8a3fc80 181
da8f8bbc
DM
182 epollfd = epoll_create(10);
183 if (epollfd == -1) {
184 perror("epoll_create");
185 goto err;
186 }
187
188 ev.events = EPOLLIN;
98099e4f 189 ev.data.ptr = alloc_client(listen_sock, 0);
da8f8bbc 190 if (epoll_ctl(epollfd, EPOLL_CTL_ADD, listen_sock, &ev) == -1) {
98099e4f
DM
191 perror("epoll_ctl add listen_sock");
192 goto err;
193 }
194
195 sigset_t mask;
196 sigemptyset(&mask);
197 sigaddset(&mask, SIGINT);
198 sigaddset(&mask, SIGTERM);
199 sigaddset(&mask, SIGHUP);
200
201 sigprocmask(SIG_BLOCK, &mask, NULL);
202
203 if ((sigfd = signalfd(-1, &mask, SFD_NONBLOCK)) < 0) {
204 perror("unable to open signalfd");
da8f8bbc
DM
205 goto err;
206 }
207
98099e4f
DM
208 ev.events = EPOLLIN;
209 ev.data.ptr = alloc_client(sigfd, 0);
210 if (epoll_ctl(epollfd, EPOLL_CTL_ADD, sigfd, &ev) == -1) {
211 perror("epoll_ctl add sigfd");
212 goto err;
213 }
214
da8f8bbc 215 for (;;) {
1fe42db7 216 nfds = epoll_wait(epollfd, events, MAX_EVENTS, 1000);
da8f8bbc 217 if (nfds == -1) {
1fe42db7
DM
218 if (errno == EINTR)
219 continue;
220
da8f8bbc
DM
221 perror("epoll_pwait");
222 goto err;
223 }
224
1fe42db7
DM
225 if (nfds == 0) { // timeout
226
5ce9f244
DM
227 // check for timeouts
228 if (update_watchdog) {
229 int i;
230 time_t ctime = time(NULL);
231 for (i = 0; i < MAX_CLIENTS; i++) {
232 if (client_list[i].fd != 0 && client_list[i].time != 0 &&
233 ((ctime - client_list[i].time) > client_watchdog_timeout)) {
234 update_watchdog = 0;
235 fprintf(stderr, "client watchdog expired - disable watchdog updates\n");
236 }
237 }
238 }
239
240 if (update_watchdog) {
241 if (ioctl(watchdog_fd, WDIOC_KEEPALIVE, 0) == -1) {
242 perror("watchdog update failed");
243 }
1fe42db7
DM
244 }
245
246 continue;
247 }
248
115805fd
DM
249 if (!update_watchdog)
250 break;
251
98099e4f
DM
252 int terminate = 0;
253
da8f8bbc
DM
254 int n;
255 for (n = 0; n < nfds; ++n) {
4915a0e9
DM
256 wd_client_t *wd_client = events[n].data.ptr;
257 if (wd_client->fd == listen_sock) {
da8f8bbc
DM
258 int conn_sock = accept(listen_sock, (struct sockaddr *) &peer_addr, &peer_addr_size);
259 if (conn_sock == -1) {
260 perror("accept");
261 goto err; // fixme
262 }
263 if (fcntl(conn_sock, F_SETFL, O_NONBLOCK) == -1) {
264 perror("setnonblocking");
265 goto err; // fixme
266 }
267
98099e4f 268 wd_client_t *new_client = alloc_client(conn_sock, time(NULL));
4915a0e9
DM
269 if (new_client == NULL) {
270 fprintf(stderr, "unable to alloc wd_client structure\n");
271 goto err; // fixme;
272 }
98099e4f
DM
273
274 mkdir(WD_ACTIVE_MARKER, 0600);
275
da8f8bbc 276 ev.events = EPOLLIN;
4915a0e9 277 ev.data.ptr = new_client;
da8f8bbc
DM
278 if (epoll_ctl(epollfd, EPOLL_CTL_ADD, conn_sock, &ev) == -1) {
279 perror("epoll_ctl: add conn_sock");
280 goto err; // fixme
281 }
98099e4f
DM
282 } else if (wd_client->fd == sigfd) {
283
284 /* signal handling */
285
286 int rv = 0;
287 struct signalfd_siginfo si;
288
289 if ((rv = read(sigfd, &si, sizeof(si))) && rv >= 0) {
290 if (si.ssi_signo == SIGHUP) {
291 perror("got SIGHUP - ignored");
292 } else {
293 terminate = 1;
294 fprintf(stderr, "got terminate request\n");
295 }
296 }
297
da8f8bbc
DM
298 } else {
299 char buf[4096];
4915a0e9 300 int cfd = wd_client->fd;
98099e4f 301
da8f8bbc
DM
302 ssize_t bytes = read(cfd, buf, sizeof(buf));
303 if (bytes == -1) {
304 perror("read");
305 goto err; // fixme
306 } else if (bytes > 0) {
4178d9ea
DM
307 int i;
308 for (i = 0; i < bytes; i++) {
309 if (buf[i] == 'V') {
310 wd_client->magic_close = 1;
311 } else {
312 wd_client->magic_close = 0;
313 }
314 }
315 wd_client->time = time(NULL);
da8f8bbc
DM
316 } else {
317 if (events[n].events & EPOLLHUP || events[n].events & EPOLLERR) {
4178d9ea 318 //printf("GOT %016x event\n", events[n].events);
da8f8bbc
DM
319 if (epoll_ctl(epollfd, EPOLL_CTL_DEL, cfd, NULL) == -1) {
320 perror("epoll_ctl: del conn_sock");
321 goto err; // fixme
322 }
323 if (close(cfd) == -1) {
324 perror("close conn_sock");
325 goto err; // fixme
326 }
98099e4f 327
4178d9ea 328 if (!wd_client->magic_close) {
5ce9f244
DM
329 fprintf(stderr, "client did not stop watchdog - disable watchdog updates\n");
330 update_watchdog = 0;
4178d9ea
DM
331 } else {
332 free_client(wd_client);
333 }
334
98099e4f
DM
335 if (!active_client_count()) {
336 rmdir(WD_ACTIVE_MARKER);
337 }
da8f8bbc
DM
338 }
339 }
340 }
341 }
98099e4f
DM
342 if (terminate)
343 break;
da8f8bbc
DM
344 }
345
98099e4f
DM
346 int active_count = active_client_count();
347 if (active_count > 0) {
348 fprintf(stderr, "exit watchdog-mux with active connections\n");
349 } else {
350 fprintf(stderr, "clean exit\n");
351 watchdog_close();
352 }
353
06b589da
DM
354 if (unlink_socket)
355 unlink(WD_SOCK_PATH);
356
da8f8bbc
DM
357 exit(EXIT_SUCCESS);
358
359err:
06b589da
DM
360 if (unlink_socket)
361 unlink(WD_SOCK_PATH);
362
da8f8bbc
DM
363 exit(EXIT_FAILURE);
364}