]> git.proxmox.com Git - pve-ha-manager.git/blob - src/watchdog-mux.c
implement magic_close for clients
[pve-ha-manager.git] / src / watchdog-mux.c
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <unistd.h>
4 #include <fcntl.h>
5 #include <string.h>
6 #include <errno.h>
7 #include <time.h>
8 #include <sys/ioctl.h>
9 #include <sys/types.h>
10 #include <sys/stat.h>
11 #include <sys/socket.h>
12 #include <sys/un.h>
13 #include <sys/epoll.h>
14 #include <signal.h>
15 #include <sys/signalfd.h>
16
17 #include <linux/types.h>
18 #include <linux/watchdog.h>
19
20 #include <systemd/sd-daemon.h>
21
22 #define MY_SOCK_PATH "/run/watchdog-mux.sock"
23 #define WD_ACTIVE_MARKER "/run/watchdog-mux.active"
24
25 #define LISTEN_BACKLOG 50
26 #define MAX_EVENTS 10
27
28 #define WATCHDOG_DEV "/dev/watchdog"
29
30 int watchdog_fd = -1;
31 int watchdog_timeout = 20;
32
33
34 typedef struct {
35 int fd;
36 time_t time;
37 int magic_close;
38 } wd_client_t;
39
40 #define MAX_CLIENTS 100
41
42 static wd_client_t client_list[MAX_CLIENTS];
43
44 static wd_client_t *
45 alloc_client(int fd, time_t time)
46 {
47 int i;
48
49 for (i = 0; i < MAX_CLIENTS; i++) {
50 if (client_list[i].fd == 0) {
51 client_list[i].fd = fd;
52 client_list[i].time = time;
53 client_list[i].magic_close = 0;
54 return &client_list[i];
55 }
56 }
57
58 return NULL;
59 }
60
61 static void
62 free_client(wd_client_t *wd_client)
63 {
64 if (!wd_client)
65 return;
66
67 wd_client->time = 0;
68 wd_client->fd = 0;
69 wd_client->magic_close = 0;
70 }
71
72 static int
73 active_client_count(void)
74 {
75 int i, count = 0;
76
77 for (i = 0; i < MAX_CLIENTS; i++) {
78 if (client_list[i].fd != 0 && client_list[i].time != 0) {
79 count++;
80 }
81 }
82
83 return count;
84 }
85
86 static void
87 watchdog_close(void)
88 {
89 if (watchdog_fd != -1) {
90 if (write(watchdog_fd, "V", 1) == -1) {
91 perror("write magic watchdog close");
92 }
93 if (close(watchdog_fd) == -1) {
94 perror("write magic watchdog close");
95 }
96 }
97
98 watchdog_fd = -1;
99 }
100
101 int
102 main(void)
103 {
104 struct sockaddr_un my_addr, peer_addr;
105 socklen_t peer_addr_size;
106 struct epoll_event ev, events[MAX_EVENTS];
107 int socket_count, listen_sock, nfds, epollfd, sigfd;
108
109
110 struct stat fs;
111
112 if (stat(WD_ACTIVE_MARKER, &fs) == 0) {
113 fprintf(stderr, "watchdog active - unable to restart watchdog-mux\n");
114 exit(EXIT_FAILURE);
115 }
116
117 if (stat(WATCHDOG_DEV, &fs) == -1) {
118 system("modprobe -q softdog soft_noboot=1"); // fixme
119 }
120
121 if ((watchdog_fd = open(WATCHDOG_DEV, O_WRONLY)) == -1) {
122 perror("watchdog open");
123 exit(EXIT_FAILURE);
124 }
125
126 if (ioctl(watchdog_fd, WDIOC_SETTIMEOUT, &watchdog_timeout) == -1) {
127 perror("watchdog set timeout");
128 watchdog_close();
129 exit(EXIT_FAILURE);
130 }
131
132 /* read and log watchdog identity */
133 struct watchdog_info wdinfo;
134 if (ioctl(watchdog_fd, WDIOC_GETSUPPORT, &wdinfo) == -1) {
135 perror("read watchdog info");
136 watchdog_close();
137 exit(EXIT_FAILURE);
138 }
139
140 wdinfo.identity[sizeof(wdinfo.identity) - 1] = 0; // just to be sure
141 fprintf(stderr, "Watchdog driver '%s', version %x\n",
142 wdinfo.identity, wdinfo.firmware_version);
143
144 socket_count = sd_listen_fds(0);
145
146 if (socket_count > 1) {
147
148 perror("too many file descriptors received.\n");
149 goto err;
150
151 } else if (socket_count == 1) {
152
153 listen_sock = SD_LISTEN_FDS_START + 0;
154
155 } else {
156
157 unlink(MY_SOCK_PATH);
158
159 listen_sock = socket(AF_UNIX, SOCK_STREAM, 0);
160 if (listen_sock == -1) {
161 perror("socket create");
162 exit(EXIT_FAILURE);
163 }
164
165 memset(&my_addr, 0, sizeof(struct sockaddr_un));
166 my_addr.sun_family = AF_UNIX;
167 strncpy(my_addr.sun_path, MY_SOCK_PATH, sizeof(my_addr.sun_path) - 1);
168
169 if (bind(listen_sock, (struct sockaddr *) &my_addr,
170 sizeof(struct sockaddr_un)) == -1) {
171 perror("socket bind");
172 exit(EXIT_FAILURE);
173 }
174
175 if (listen(listen_sock, LISTEN_BACKLOG) == -1) {
176 perror("socket listen");
177 goto err;
178 }
179 }
180
181 epollfd = epoll_create(10);
182 if (epollfd == -1) {
183 perror("epoll_create");
184 goto err;
185 }
186
187 ev.events = EPOLLIN;
188 ev.data.ptr = alloc_client(listen_sock, 0);
189 if (epoll_ctl(epollfd, EPOLL_CTL_ADD, listen_sock, &ev) == -1) {
190 perror("epoll_ctl add listen_sock");
191 goto err;
192 }
193
194 sigset_t mask;
195 sigemptyset(&mask);
196 sigaddset(&mask, SIGINT);
197 sigaddset(&mask, SIGTERM);
198 sigaddset(&mask, SIGHUP);
199
200 sigprocmask(SIG_BLOCK, &mask, NULL);
201
202 if ((sigfd = signalfd(-1, &mask, SFD_NONBLOCK)) < 0) {
203 perror("unable to open signalfd");
204 goto err;
205 }
206
207 ev.events = EPOLLIN;
208 ev.data.ptr = alloc_client(sigfd, 0);
209 if (epoll_ctl(epollfd, EPOLL_CTL_ADD, sigfd, &ev) == -1) {
210 perror("epoll_ctl add sigfd");
211 goto err;
212 }
213
214 for (;;) {
215 nfds = epoll_wait(epollfd, events, MAX_EVENTS, 1000);
216 if (nfds == -1) {
217 if (errno == EINTR)
218 continue;
219
220 perror("epoll_pwait");
221 goto err;
222 }
223
224 if (nfds == 0) { // timeout
225
226 if (ioctl(watchdog_fd, WDIOC_KEEPALIVE, 0) == -1) {
227 perror("watchdog update failed");
228 }
229
230 continue;
231 }
232
233 int terminate = 0;
234
235 int n;
236 for (n = 0; n < nfds; ++n) {
237 wd_client_t *wd_client = events[n].data.ptr;
238 if (wd_client->fd == listen_sock) {
239 int conn_sock = accept(listen_sock, (struct sockaddr *) &peer_addr, &peer_addr_size);
240 if (conn_sock == -1) {
241 perror("accept");
242 goto err; // fixme
243 }
244 if (fcntl(conn_sock, F_SETFL, O_NONBLOCK) == -1) {
245 perror("setnonblocking");
246 goto err; // fixme
247 }
248
249 wd_client_t *new_client = alloc_client(conn_sock, time(NULL));
250 if (new_client == NULL) {
251 fprintf(stderr, "unable to alloc wd_client structure\n");
252 goto err; // fixme;
253 }
254
255 mkdir(WD_ACTIVE_MARKER, 0600);
256
257 ev.events = EPOLLIN;
258 ev.data.ptr = new_client;
259 if (epoll_ctl(epollfd, EPOLL_CTL_ADD, conn_sock, &ev) == -1) {
260 perror("epoll_ctl: add conn_sock");
261 goto err; // fixme
262 }
263 } else if (wd_client->fd == sigfd) {
264
265 /* signal handling */
266
267 int rv = 0;
268 struct signalfd_siginfo si;
269
270 if ((rv = read(sigfd, &si, sizeof(si))) && rv >= 0) {
271 if (si.ssi_signo == SIGHUP) {
272 perror("got SIGHUP - ignored");
273 } else {
274 terminate = 1;
275 fprintf(stderr, "got terminate request\n");
276 }
277 }
278
279 } else {
280 char buf[4096];
281 int cfd = wd_client->fd;
282
283 ssize_t bytes = read(cfd, buf, sizeof(buf));
284 if (bytes == -1) {
285 perror("read");
286 goto err; // fixme
287 } else if (bytes > 0) {
288 int i;
289 for (i = 0; i < bytes; i++) {
290 if (buf[i] == 'V') {
291 wd_client->magic_close = 1;
292 } else {
293 wd_client->magic_close = 0;
294 }
295 }
296 wd_client->time = time(NULL);
297 } else {
298 if (events[n].events & EPOLLHUP || events[n].events & EPOLLERR) {
299 //printf("GOT %016x event\n", events[n].events);
300 if (epoll_ctl(epollfd, EPOLL_CTL_DEL, cfd, NULL) == -1) {
301 perror("epoll_ctl: del conn_sock");
302 goto err; // fixme
303 }
304 if (close(cfd) == -1) {
305 perror("close conn_sock");
306 goto err; // fixme
307 }
308
309 if (!wd_client->magic_close) {
310 fprintf(stderr, "client did not stop watchdog\n");
311 } else {
312 free_client(wd_client);
313 }
314
315 if (!active_client_count()) {
316 rmdir(WD_ACTIVE_MARKER);
317 }
318 }
319 }
320 }
321 }
322 if (terminate)
323 break;
324 }
325
326 int active_count = active_client_count();
327 if (active_count > 0) {
328 fprintf(stderr, "exit watchdog-mux with active connections\n");
329 } else {
330 fprintf(stderr, "clean exit\n");
331 watchdog_close();
332 }
333
334 unlink(MY_SOCK_PATH);
335 exit(EXIT_SUCCESS);
336
337 err:
338 unlink(MY_SOCK_PATH);
339 exit(EXIT_FAILURE);
340 }