]> git.proxmox.com Git - mirror_frr.git/blob - watchfrr/watchfrr.c
Merge remote-tracking branch 'origin/master' into pim_lib_work2
[mirror_frr.git] / watchfrr / watchfrr.c
1 /*
2 Monitor status of frr daemons and restart if necessary.
3
4 Copyright (C) 2004 Andrew J. Schorr
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 */
20
21 #include <zebra.h>
22 #include <thread.h>
23 #include <log.h>
24 #include <network.h>
25 #include <sigevent.h>
26 #include <lib/version.h>
27 #include "command.h"
28 #include "memory_vty.h"
29
30 #include <getopt.h>
31 #include <sys/un.h>
32 #include <sys/wait.h>
33 #include <memory.h>
34 #include <systemd.h>
35
36 #include "watchfrr.h"
37
38 #ifndef MIN
39 #define MIN(X,Y) (((X) <= (Y)) ? (X) : (Y))
40 #endif
41
42 /* Macros to help randomize timers. */
43 #define JITTER(X) ((random() % ((X)+1))-((X)/2))
44 #define FUZZY(X) ((X)+JITTER((X)/20))
45
46 #define DEFAULT_PERIOD 5
47 #define DEFAULT_TIMEOUT 10
48 #define DEFAULT_RESTART_TIMEOUT 20
49 #define DEFAULT_LOGLEVEL LOG_INFO
50 #define DEFAULT_MIN_RESTART 60
51 #define DEFAULT_MAX_RESTART 600
52 #ifdef PATH_WATCHFRR_PID
53 #define DEFAULT_PIDFILE PATH_WATCHFRR_PID
54 #else
55 #define DEFAULT_PIDFILE STATEDIR "/watchfrr.pid"
56 #endif
57 #ifdef DAEMON_VTY_DIR
58 #define VTYDIR DAEMON_VTY_DIR
59 #else
60 #define VTYDIR STATEDIR
61 #endif
62
63 #define PING_TOKEN "PING"
64
65 /* Needs to be global, referenced somewhere inside libzebra. */
66 struct thread_master *master;
67
68 typedef enum {
69 MODE_MONITOR = 0,
70 MODE_GLOBAL_RESTART,
71 MODE_SEPARATE_RESTART,
72 MODE_PHASED_ZEBRA_RESTART,
73 MODE_PHASED_ALL_RESTART
74 } watch_mode_t;
75
76 static const char *mode_str[] = {
77 "monitor",
78 "global restart",
79 "individual daemon restart",
80 "phased zebra restart",
81 "phased global restart for any failure",
82 };
83
84 typedef enum {
85 PHASE_NONE = 0,
86 PHASE_STOPS_PENDING,
87 PHASE_WAITING_DOWN,
88 PHASE_ZEBRA_RESTART_PENDING,
89 PHASE_WAITING_ZEBRA_UP
90 } restart_phase_t;
91
92 static const char *phase_str[] = {
93 "None",
94 "Stop jobs running",
95 "Waiting for other daemons to come down",
96 "Zebra restart job running",
97 "Waiting for zebra to come up",
98 "Start jobs running",
99 };
100
101 #define PHASE_TIMEOUT (3*gs.restart_timeout)
102
103 struct restart_info {
104 const char *name;
105 const char *what;
106 pid_t pid;
107 struct timeval time;
108 long interval;
109 struct thread *t_kill;
110 int kills;
111 };
112
113 static struct global_state {
114 watch_mode_t mode;
115 restart_phase_t phase;
116 struct thread *t_phase_hanging;
117 const char *vtydir;
118 long period;
119 long timeout;
120 long restart_timeout;
121 long min_restart_interval;
122 long max_restart_interval;
123 int do_ping;
124 struct daemon *daemons;
125 const char *restart_command;
126 const char *start_command;
127 const char *stop_command;
128 struct restart_info restart;
129 int unresponsive_restart;
130 int loglevel;
131 struct daemon *special; /* points to zebra when doing phased restart */
132 int numdaemons;
133 int numpids;
134 int numdown; /* # of daemons that are not UP or UNRESPONSIVE */
135 } gs = {
136 .mode = MODE_MONITOR,.phase = PHASE_NONE,.vtydir = VTYDIR,.period =
137 1000 * DEFAULT_PERIOD,.timeout =
138 DEFAULT_TIMEOUT,.restart_timeout =
139 DEFAULT_RESTART_TIMEOUT,.loglevel =
140 DEFAULT_LOGLEVEL,.min_restart_interval =
141 DEFAULT_MIN_RESTART,.max_restart_interval =
142 DEFAULT_MAX_RESTART,.do_ping = 1,};
143
144 typedef enum {
145 DAEMON_INIT,
146 DAEMON_DOWN,
147 DAEMON_CONNECTING,
148 DAEMON_UP,
149 DAEMON_UNRESPONSIVE
150 } daemon_state_t;
151
152 #define IS_UP(DMN) \
153 (((DMN)->state == DAEMON_UP) || ((DMN)->state == DAEMON_UNRESPONSIVE))
154
155 static const char *state_str[] = {
156 "Init",
157 "Down",
158 "Connecting",
159 "Up",
160 "Unresponsive",
161 };
162
163 struct daemon {
164 const char *name;
165 daemon_state_t state;
166 int fd;
167 struct timeval echo_sent;
168 u_int connect_tries;
169 struct thread *t_wakeup;
170 struct thread *t_read;
171 struct thread *t_write;
172 struct daemon *next;
173 struct restart_info restart;
174 };
175
176 static const struct option longopts[] = {
177 {"daemon", no_argument, NULL, 'd'},
178 {"statedir", required_argument, NULL, 'S'},
179 {"no-echo", no_argument, NULL, 'e'},
180 {"loglevel", required_argument, NULL, 'l'},
181 {"interval", required_argument, NULL, 'i'},
182 {"timeout", required_argument, NULL, 't'},
183 {"restart-timeout", required_argument, NULL, 'T'},
184 {"restart", required_argument, NULL, 'r'},
185 {"start-command", required_argument, NULL, 's'},
186 {"kill-command", required_argument, NULL, 'k'},
187 {"restart-all", required_argument, NULL, 'R'},
188 {"all-restart", no_argument, NULL, 'a'},
189 {"always-all-restart", no_argument, NULL, 'A'},
190 {"unresponsive-restart", no_argument, NULL, 'z'},
191 {"min-restart-interval", required_argument, NULL, 'm'},
192 {"max-restart-interval", required_argument, NULL, 'M'},
193 {"pid-file", required_argument, NULL, 'p'},
194 {"blank-string", required_argument, NULL, 'b'},
195 {"help", no_argument, NULL, 'h'},
196 {"version", no_argument, NULL, 'v'},
197 {NULL, 0, NULL, 0}
198 };
199
200 static int try_connect(struct daemon *dmn);
201 static int wakeup_send_echo(struct thread *t_wakeup);
202 static void try_restart(struct daemon *dmn);
203 static void phase_check(void);
204
205 static int usage(const char *progname, int status)
206 {
207 if (status != 0)
208 fprintf(stderr, "Try `%s --help' for more information.\n",
209 progname);
210 else {
211 printf("Usage : %s [OPTION...] <daemon name> ...\n\n\
212 Watchdog program to monitor status of frr daemons and try to restart\n\
213 them if they are down or unresponsive. It determines whether a daemon is\n\
214 up based on whether it can connect to the daemon's vty unix stream socket.\n\
215 It then repeatedly sends echo commands over that socket to determine whether\n\
216 the daemon is responsive. If the daemon crashes, we will receive an EOF\n\
217 on the socket connection and know immediately that the daemon is down.\n\n\
218 The daemons to be monitored should be listed on the command line.\n\n\
219 This program can run in one of 5 modes:\n\n\
220 0. Mode: %s.\n\
221 Just monitor and report on status changes. Example:\n\
222 %s -d zebra ospfd bgpd\n\n\
223 1. Mode: %s.\n\
224 Whenever any daemon hangs or crashes, use the given command to restart\n\
225 them all. Example:\n\
226 %s -dz \\\n\
227 -R '/sbin/service zebra restart; /sbin/service ospfd restart' \\\n\
228 zebra ospfd\n\n\
229 2. Mode: %s.\n\
230 When any single daemon hangs or crashes, restart only the daemon that's\n\
231 in trouble using the supplied restart command. Example:\n\
232 %s -dz -r '/sbin/service %%s restart' zebra ospfd bgpd\n\n\
233 3. Mode: %s.\n\
234 The same as the previous mode, except that there is special treatment when\n\
235 the zebra daemon is in trouble. In that case, a phased restart approach\n\
236 is used: 1. stop all other daemons; 2. restart zebra; 3. start the other\n\
237 daemons. Example:\n\
238 %s -adz -r '/sbin/service %%s restart' \\\n\
239 -s '/sbin/service %%s start' \\\n\
240 -k '/sbin/service %%s stop' zebra ospfd bgpd\n\n\
241 4. Mode: %s.\n\
242 This is the same as the previous mode, except that the phased restart\n\
243 procedure is used whenever any of the daemons hangs or crashes. Example:\n\
244 %s -Adz -r '/sbin/service %%s restart' \\\n\
245 -s '/sbin/service %%s start' \\\n\
246 -k '/sbin/service %%s stop' zebra ospfd bgpd\n\n\
247 As of this writing, it is believed that mode 2 [%s]\n\
248 is not safe, and mode 3 [%s] may not be safe with some of the\n\
249 routing daemons.\n\n\
250 In order to avoid attempting to restart the daemons in a fast loop,\n\
251 the -m and -M options allow you to control the minimum delay between\n\
252 restart commands. The minimum restart delay is recalculated each time\n\
253 a restart is attempted: if the time since the last restart attempt exceeds\n\
254 twice the -M value, then the restart delay is set to the -m value.\n\
255 Otherwise, the interval is doubled (but capped at the -M value).\n\n", progname, mode_str[0], progname, mode_str[1], progname, mode_str[2], progname, mode_str[3], progname, mode_str[4], progname, mode_str[2], mode_str[3]);
256
257 printf("Options:\n\
258 -d, --daemon Run in daemon mode. In this mode, error messages are sent\n\
259 to syslog instead of stdout.\n\
260 -S, --statedir Set the vty socket directory (default is %s)\n\
261 -e, --no-echo Do not ping the daemons to test responsiveness (this\n\
262 option is necessary if the daemons do not support the\n\
263 echo command)\n\
264 -l, --loglevel Set the logging level (default is %d).\n\
265 The value should range from %d (LOG_EMERG) to %d (LOG_DEBUG),\n\
266 but it can be set higher than %d if extra-verbose debugging\n\
267 messages are desired.\n\
268 -m, --min-restart-interval\n\
269 Set the minimum seconds to wait between invocations of daemon\n\
270 restart commands (default is %d).\n\
271 -M, --max-restart-interval\n\
272 Set the maximum seconds to wait between invocations of daemon\n\
273 restart commands (default is %d).\n\
274 -i, --interval Set the status polling interval in seconds (default is %d)\n\
275 -t, --timeout Set the unresponsiveness timeout in seconds (default is %d)\n\
276 -T, --restart-timeout\n\
277 Set the restart (kill) timeout in seconds (default is %d).\n\
278 If any background jobs are still running after this much\n\
279 time has elapsed, they will be killed.\n\
280 -r, --restart Supply a Bourne shell command to use to restart a single\n\
281 daemon. The command string should include '%%s' where the\n\
282 name of the daemon should be substituted.\n\
283 Note that -r and -R are incompatible.\n\
284 -s, --start-command\n\
285 Supply a Bourne shell to command to use to start a single\n\
286 daemon. The command string should include '%%s' where the\n\
287 name of the daemon should be substituted.\n\
288 -k, --kill-command\n\
289 Supply a Bourne shell to command to use to stop a single\n\
290 daemon. The command string should include '%%s' where the\n\
291 name of the daemon should be substituted.\n\
292 -R, --restart-all\n\
293 When one or more daemons is down, try to restart everything\n\
294 using the Bourne shell command supplied as the argument.\n\
295 Note that -r and -R are incompatible.\n\
296 -z, --unresponsive-restart\n\
297 When a daemon is unresponsive, treat it as being down for\n\
298 restart purposes.\n\
299 -a, --all-restart\n\
300 When zebra hangs or crashes, restart all daemons using\n\
301 this phased approach: 1. stop all other daemons; 2. restart\n\
302 zebra; 3. start other daemons. Requires -r, -s, and -k.\n\
303 -A, --always-all-restart\n\
304 When any daemon (not just zebra) hangs or crashes, use the\n\
305 same phased restart mechanism described above for -a.\n\
306 Requires -r, -s, and -k.\n\
307 -p, --pid-file Set process identifier file name\n\
308 (default is %s).\n\
309 -b, --blank-string\n\
310 When the supplied argument string is found in any of the\n\
311 various shell command arguments (-r, -s, -k, or -R), replace\n\
312 it with a space. This is an ugly hack to circumvent problems\n\
313 passing command-line arguments with embedded spaces.\n\
314 -v, --version Print program version\n\
315 -h, --help Display this help and exit\n", VTYDIR, DEFAULT_LOGLEVEL, LOG_EMERG, LOG_DEBUG, LOG_DEBUG, DEFAULT_MIN_RESTART, DEFAULT_MAX_RESTART, DEFAULT_PERIOD, DEFAULT_TIMEOUT, DEFAULT_RESTART_TIMEOUT, DEFAULT_PIDFILE);
316 }
317
318 return status;
319 }
320
321 static pid_t run_background(char *shell_cmd)
322 {
323 pid_t child;
324
325 switch (child = fork()) {
326 case -1:
327 zlog_err("fork failed, cannot run command [%s]: %s",
328 shell_cmd, safe_strerror(errno));
329 return -1;
330 case 0:
331 /* Child process. */
332 /* Use separate process group so child processes can be killed easily. */
333 if (setpgid(0, 0) < 0)
334 zlog_warn("warning: setpgid(0,0) failed: %s",
335 safe_strerror(errno));
336 {
337 char shell[] = "sh";
338 char dashc[] = "-c";
339 char *const argv[4] = { shell, dashc, shell_cmd, NULL };
340 execv("/bin/sh", argv);
341 zlog_err("execv(/bin/sh -c '%s') failed: %s",
342 shell_cmd, safe_strerror(errno));
343 _exit(127);
344 }
345 default:
346 /* Parent process: we will reap the child later. */
347 zlog_err("Forked background command [pid %d]: %s", (int)child,
348 shell_cmd);
349 return child;
350 }
351 }
352
353 static struct timeval *time_elapsed(struct timeval *result,
354 const struct timeval *start_time)
355 {
356 gettimeofday(result, NULL);
357 result->tv_sec -= start_time->tv_sec;
358 result->tv_usec -= start_time->tv_usec;
359 while (result->tv_usec < 0) {
360 result->tv_usec += 1000000L;
361 result->tv_sec--;
362 }
363 return result;
364 }
365
366 static int restart_kill(struct thread *t_kill)
367 {
368 struct restart_info *restart = THREAD_ARG(t_kill);
369 struct timeval delay;
370
371 time_elapsed(&delay, &restart->time);
372 zlog_warn("Warning: %s %s child process %d still running after "
373 "%ld seconds, sending signal %d",
374 restart->what, restart->name, (int)restart->pid,
375 (long)delay.tv_sec, (restart->kills ? SIGKILL : SIGTERM));
376 kill(-restart->pid, (restart->kills ? SIGKILL : SIGTERM));
377 restart->kills++;
378 restart->t_kill = thread_add_timer(master, restart_kill, restart,
379 gs.restart_timeout);
380 return 0;
381 }
382
383 static struct restart_info *find_child(pid_t child)
384 {
385 if (gs.mode == MODE_GLOBAL_RESTART) {
386 if (gs.restart.pid == child)
387 return &gs.restart;
388 } else {
389 struct daemon *dmn;
390 for (dmn = gs.daemons; dmn; dmn = dmn->next) {
391 if (dmn->restart.pid == child)
392 return &dmn->restart;
393 }
394 }
395 return NULL;
396 }
397
398 static void sigchild(void)
399 {
400 pid_t child;
401 int status;
402 const char *name;
403 const char *what;
404 struct restart_info *restart;
405
406 switch (child = waitpid(-1, &status, WNOHANG)) {
407 case -1:
408 zlog_err("waitpid failed: %s", safe_strerror(errno));
409 return;
410 case 0:
411 zlog_warn("SIGCHLD received, but waitpid did not reap a child");
412 return;
413 }
414
415 if (child == integrated_write_pid) {
416 integrated_write_sigchld(status);
417 return;
418 }
419
420 if ((restart = find_child(child)) != NULL) {
421 name = restart->name;
422 what = restart->what;
423 restart->pid = 0;
424 gs.numpids--;
425 thread_cancel(restart->t_kill);
426 restart->t_kill = NULL;
427 /* Update restart time to reflect the time the command completed. */
428 gettimeofday(&restart->time, NULL);
429 } else {
430 zlog_err
431 ("waitpid returned status for an unknown child process %d",
432 (int)child);
433 name = "(unknown)";
434 what = "background";
435 }
436 if (WIFSTOPPED(status))
437 zlog_warn("warning: %s %s process %d is stopped",
438 what, name, (int)child);
439 else if (WIFSIGNALED(status))
440 zlog_warn("%s %s process %d terminated due to signal %d",
441 what, name, (int)child, WTERMSIG(status));
442 else if (WIFEXITED(status)) {
443 if (WEXITSTATUS(status) != 0)
444 zlog_warn
445 ("%s %s process %d exited with non-zero status %d",
446 what, name, (int)child, WEXITSTATUS(status));
447 else
448 zlog_debug("%s %s process %d exited normally", what,
449 name, (int)child);
450 } else
451 zlog_err("cannot interpret %s %s process %d wait status 0x%x",
452 what, name, (int)child, status);
453 phase_check();
454 }
455
456 static int
457 run_job(struct restart_info *restart, const char *cmdtype, const char *command,
458 int force, int update_interval)
459 {
460 struct timeval delay;
461
462 if (gs.loglevel > LOG_DEBUG + 1)
463 zlog_debug("attempting to %s %s", cmdtype, restart->name);
464
465 if (restart->pid) {
466 if (gs.loglevel > LOG_DEBUG + 1)
467 zlog_debug
468 ("cannot %s %s, previous pid %d still running",
469 cmdtype, restart->name, (int)restart->pid);
470 return -1;
471 }
472
473 /* Note: time_elapsed test must come before the force test, since we need
474 to make sure that delay is initialized for use below in updating the
475 restart interval. */
476 if ((time_elapsed(&delay, &restart->time)->tv_sec < restart->interval)
477 && !force) {
478 if (gs.loglevel > LOG_DEBUG + 1)
479 zlog_debug("postponing %s %s: "
480 "elapsed time %ld < retry interval %ld",
481 cmdtype, restart->name, (long)delay.tv_sec,
482 restart->interval);
483 return -1;
484 }
485
486 gettimeofday(&restart->time, NULL);
487 restart->kills = 0;
488 {
489 char cmd[strlen(command) + strlen(restart->name) + 1];
490 snprintf(cmd, sizeof(cmd), command, restart->name);
491 if ((restart->pid = run_background(cmd)) > 0) {
492 restart->t_kill =
493 thread_add_timer(master, restart_kill, restart,
494 gs.restart_timeout);
495 restart->what = cmdtype;
496 gs.numpids++;
497 } else
498 restart->pid = 0;
499 }
500
501 /* Calculate the new restart interval. */
502 if (update_interval) {
503 if (delay.tv_sec > 2 * gs.max_restart_interval)
504 restart->interval = gs.min_restart_interval;
505 else if ((restart->interval *= 2) > gs.max_restart_interval)
506 restart->interval = gs.max_restart_interval;
507 if (gs.loglevel > LOG_DEBUG + 1)
508 zlog_debug("restart %s interval is now %ld",
509 restart->name, restart->interval);
510 }
511 return restart->pid;
512 }
513
514 #define SET_READ_HANDLER(DMN) \
515 (DMN)->t_read = thread_add_read(master,handle_read,(DMN),(DMN)->fd)
516
517 #define SET_WAKEUP_DOWN(DMN) \
518 (DMN)->t_wakeup = thread_add_timer_msec(master,wakeup_down,(DMN), \
519 FUZZY(gs.period))
520
521 #define SET_WAKEUP_UNRESPONSIVE(DMN) \
522 (DMN)->t_wakeup = thread_add_timer_msec(master,wakeup_unresponsive,(DMN), \
523 FUZZY(gs.period))
524
525 #define SET_WAKEUP_ECHO(DMN) \
526 (DMN)->t_wakeup = thread_add_timer_msec(master,wakeup_send_echo,(DMN), \
527 FUZZY(gs.period))
528
529 static int wakeup_down(struct thread *t_wakeup)
530 {
531 struct daemon *dmn = THREAD_ARG(t_wakeup);
532
533 dmn->t_wakeup = NULL;
534 if (try_connect(dmn) < 0)
535 SET_WAKEUP_DOWN(dmn);
536 if ((dmn->connect_tries > 1) && (dmn->state != DAEMON_UP))
537 try_restart(dmn);
538 return 0;
539 }
540
541 static int wakeup_init(struct thread *t_wakeup)
542 {
543 struct daemon *dmn = THREAD_ARG(t_wakeup);
544
545 dmn->t_wakeup = NULL;
546 if (try_connect(dmn) < 0) {
547 SET_WAKEUP_DOWN(dmn);
548 zlog_err("%s state -> down : initial connection attempt failed",
549 dmn->name);
550 dmn->state = DAEMON_DOWN;
551 }
552 return 0;
553 }
554
555 static void daemon_down(struct daemon *dmn, const char *why)
556 {
557 if (IS_UP(dmn) || (dmn->state == DAEMON_INIT))
558 zlog_err("%s state -> down : %s", dmn->name, why);
559 else if (gs.loglevel > LOG_DEBUG)
560 zlog_debug("%s still down : %s", dmn->name, why);
561 if (IS_UP(dmn))
562 gs.numdown++;
563 dmn->state = DAEMON_DOWN;
564 if (dmn->fd >= 0) {
565 close(dmn->fd);
566 dmn->fd = -1;
567 }
568 THREAD_OFF(dmn->t_read);
569 THREAD_OFF(dmn->t_write);
570 THREAD_OFF(dmn->t_wakeup);
571 if (try_connect(dmn) < 0)
572 SET_WAKEUP_DOWN(dmn);
573 phase_check();
574 }
575
576 static int handle_read(struct thread *t_read)
577 {
578 struct daemon *dmn = THREAD_ARG(t_read);
579 static const char resp[sizeof(PING_TOKEN) + 4] = PING_TOKEN "\n";
580 char buf[sizeof(resp) + 100];
581 ssize_t rc;
582 struct timeval delay;
583
584 dmn->t_read = NULL;
585 if ((rc = read(dmn->fd, buf, sizeof(buf))) < 0) {
586 char why[100];
587
588 if (ERRNO_IO_RETRY(errno)) {
589 /* Pretend it never happened. */
590 SET_READ_HANDLER(dmn);
591 return 0;
592 }
593 snprintf(why, sizeof(why), "unexpected read error: %s",
594 safe_strerror(errno));
595 daemon_down(dmn, why);
596 return 0;
597 }
598 if (rc == 0) {
599 daemon_down(dmn, "read returned EOF");
600 return 0;
601 }
602 if (!dmn->echo_sent.tv_sec) {
603 char why[sizeof(buf) + 100];
604 snprintf(why, sizeof(why),
605 "unexpected read returns %d bytes: %.*s", (int)rc,
606 (int)rc, buf);
607 daemon_down(dmn, why);
608 return 0;
609 }
610
611 /* We are expecting an echo response: is there any chance that the
612 response would not be returned entirely in the first read? That
613 seems inconceivable... */
614 if ((rc != sizeof(resp)) || memcmp(buf, resp, sizeof(resp))) {
615 char why[100 + sizeof(buf)];
616 snprintf(why, sizeof(why),
617 "read returned bad echo response of %d bytes "
618 "(expecting %u): %.*s", (int)rc, (u_int) sizeof(resp),
619 (int)rc, buf);
620 daemon_down(dmn, why);
621 return 0;
622 }
623
624 time_elapsed(&delay, &dmn->echo_sent);
625 dmn->echo_sent.tv_sec = 0;
626 if (dmn->state == DAEMON_UNRESPONSIVE) {
627 if (delay.tv_sec < gs.timeout) {
628 dmn->state = DAEMON_UP;
629 zlog_warn
630 ("%s state -> up : echo response received after %ld.%06ld "
631 "seconds", dmn->name, (long)delay.tv_sec,
632 (long)delay.tv_usec);
633 } else
634 zlog_warn
635 ("%s: slow echo response finally received after %ld.%06ld "
636 "seconds", dmn->name, (long)delay.tv_sec,
637 (long)delay.tv_usec);
638 } else if (gs.loglevel > LOG_DEBUG + 1)
639 zlog_debug("%s: echo response received after %ld.%06ld seconds",
640 dmn->name, (long)delay.tv_sec, (long)delay.tv_usec);
641
642 SET_READ_HANDLER(dmn);
643 if (dmn->t_wakeup)
644 thread_cancel(dmn->t_wakeup);
645 SET_WAKEUP_ECHO(dmn);
646
647 return 0;
648 }
649
650 /*
651 * Wait till we notice that all daemons are ready before
652 * we send we are ready to systemd
653 */
654 static void daemon_send_ready(void)
655 {
656 static int sent = 0;
657 if (!sent && gs.numdown == 0) {
658 #if defined (HAVE_CUMULUS)
659 FILE *fp;
660
661 fp = fopen(DAEMON_VTY_DIR "/watchfrr.started", "w");
662 fclose(fp);
663 #endif
664 zlog_notice
665 ("Watchfrr: Notifying Systemd we are up and running");
666 systemd_send_started(master, 0);
667 sent = 1;
668 }
669 }
670
671 static void daemon_up(struct daemon *dmn, const char *why)
672 {
673 dmn->state = DAEMON_UP;
674 gs.numdown--;
675 dmn->connect_tries = 0;
676 zlog_notice("%s state -> up : %s", dmn->name, why);
677 daemon_send_ready();
678 if (gs.do_ping)
679 SET_WAKEUP_ECHO(dmn);
680 phase_check();
681 }
682
683 static int check_connect(struct thread *t_write)
684 {
685 struct daemon *dmn = THREAD_ARG(t_write);
686 int sockerr;
687 socklen_t reslen = sizeof(sockerr);
688
689 dmn->t_write = NULL;
690 if (getsockopt(dmn->fd, SOL_SOCKET, SO_ERROR, (char *)&sockerr, &reslen)
691 < 0) {
692 zlog_warn("%s: check_connect: getsockopt failed: %s", dmn->name,
693 safe_strerror(errno));
694 daemon_down(dmn,
695 "getsockopt failed checking connection success");
696 return 0;
697 }
698 if ((reslen == sizeof(sockerr)) && sockerr) {
699 char why[100];
700 snprintf(why, sizeof(why),
701 "getsockopt reports that connection attempt failed: %s",
702 safe_strerror(sockerr));
703 daemon_down(dmn, why);
704 return 0;
705 }
706
707 daemon_up(dmn, "delayed connect succeeded");
708 return 0;
709 }
710
711 static int wakeup_connect_hanging(struct thread *t_wakeup)
712 {
713 struct daemon *dmn = THREAD_ARG(t_wakeup);
714 char why[100];
715
716 dmn->t_wakeup = NULL;
717 snprintf(why, sizeof(why),
718 "connection attempt timed out after %ld seconds", gs.timeout);
719 daemon_down(dmn, why);
720 return 0;
721 }
722
723 /* Making connection to protocol daemon. */
724 static int try_connect(struct daemon *dmn)
725 {
726 int sock;
727 struct sockaddr_un addr;
728 socklen_t len;
729
730 if (gs.loglevel > LOG_DEBUG + 1)
731 zlog_debug("%s: attempting to connect", dmn->name);
732 dmn->connect_tries++;
733
734 memset(&addr, 0, sizeof(struct sockaddr_un));
735 addr.sun_family = AF_UNIX;
736 snprintf(addr.sun_path, sizeof(addr.sun_path), "%s/%s.vty",
737 gs.vtydir, dmn->name);
738 #ifdef HAVE_STRUCT_SOCKADDR_UN_SUN_LEN
739 len = addr.sun_len = SUN_LEN(&addr);
740 #else
741 len = sizeof(addr.sun_family) + strlen(addr.sun_path);
742 #endif /* HAVE_STRUCT_SOCKADDR_UN_SUN_LEN */
743
744 /* Quick check to see if we might succeed before we go to the trouble
745 of creating a socket. */
746 if (access(addr.sun_path, W_OK) < 0) {
747 if (errno != ENOENT)
748 zlog_err("%s: access to socket %s denied: %s",
749 dmn->name, addr.sun_path,
750 safe_strerror(errno));
751 return -1;
752 }
753
754 if ((sock = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) {
755 zlog_err("%s(%s): cannot make socket: %s",
756 __func__, addr.sun_path, safe_strerror(errno));
757 return -1;
758 }
759
760 if (set_nonblocking(sock) < 0 || set_cloexec(sock) < 0) {
761 zlog_err("%s(%s): set_nonblocking/cloexec(%d) failed",
762 __func__, addr.sun_path, sock);
763 close(sock);
764 return -1;
765 }
766
767 if (connect(sock, (struct sockaddr *)&addr, len) < 0) {
768 if ((errno != EINPROGRESS) && (errno != EWOULDBLOCK)) {
769 if (gs.loglevel > LOG_DEBUG)
770 zlog_debug("%s(%s): connect failed: %s",
771 __func__, addr.sun_path,
772 safe_strerror(errno));
773 close(sock);
774 return -1;
775 }
776 if (gs.loglevel > LOG_DEBUG)
777 zlog_debug("%s: connection in progress", dmn->name);
778 dmn->state = DAEMON_CONNECTING;
779 dmn->fd = sock;
780 dmn->t_write =
781 thread_add_write(master, check_connect, dmn, dmn->fd);
782 dmn->t_wakeup =
783 thread_add_timer(master, wakeup_connect_hanging, dmn,
784 gs.timeout);
785 SET_READ_HANDLER(dmn);
786 return 0;
787 }
788
789 dmn->fd = sock;
790 SET_READ_HANDLER(dmn);
791 daemon_up(dmn, "connect succeeded");
792 return 1;
793 }
794
795 static int phase_hanging(struct thread *t_hanging)
796 {
797 gs.t_phase_hanging = NULL;
798 zlog_err("Phase [%s] hanging for %ld seconds, aborting phased restart",
799 phase_str[gs.phase], PHASE_TIMEOUT);
800 gs.phase = PHASE_NONE;
801 return 0;
802 }
803
804 static void set_phase(restart_phase_t new_phase)
805 {
806 gs.phase = new_phase;
807 if (gs.t_phase_hanging)
808 thread_cancel(gs.t_phase_hanging);
809 gs.t_phase_hanging = thread_add_timer(master, phase_hanging, NULL,
810 PHASE_TIMEOUT);
811 }
812
813 static void phase_check(void)
814 {
815 switch (gs.phase) {
816 case PHASE_NONE:
817 break;
818 case PHASE_STOPS_PENDING:
819 if (gs.numpids)
820 break;
821 zlog_info
822 ("Phased restart: all routing daemon stop jobs have completed.");
823 set_phase(PHASE_WAITING_DOWN);
824
825 /*FALLTHRU*/
826 case PHASE_WAITING_DOWN:
827 if (gs.numdown + IS_UP(gs.special) < gs.numdaemons)
828 break;
829 zlog_info("Phased restart: all routing daemons now down.");
830 run_job(&gs.special->restart, "restart", gs.restart_command, 1,
831 1);
832 set_phase(PHASE_ZEBRA_RESTART_PENDING);
833
834 /*FALLTHRU*/
835 case PHASE_ZEBRA_RESTART_PENDING:
836 if (gs.special->restart.pid)
837 break;
838 zlog_info("Phased restart: %s restart job completed.",
839 gs.special->name);
840 set_phase(PHASE_WAITING_ZEBRA_UP);
841
842 /*FALLTHRU*/
843 case PHASE_WAITING_ZEBRA_UP:
844 if (!IS_UP(gs.special))
845 break;
846 zlog_info("Phased restart: %s is now up.", gs.special->name);
847 {
848 struct daemon *dmn;
849 for (dmn = gs.daemons; dmn; dmn = dmn->next) {
850 if (dmn != gs.special)
851 run_job(&dmn->restart, "start",
852 gs.start_command, 1, 0);
853 }
854 }
855 gs.phase = PHASE_NONE;
856 THREAD_OFF(gs.t_phase_hanging);
857 zlog_notice("Phased global restart has completed.");
858 break;
859 }
860 }
861
862 static void try_restart(struct daemon *dmn)
863 {
864 switch (gs.mode) {
865 case MODE_MONITOR:
866 return;
867 case MODE_GLOBAL_RESTART:
868 run_job(&gs.restart, "restart", gs.restart_command, 0, 1);
869 break;
870 case MODE_SEPARATE_RESTART:
871 run_job(&dmn->restart, "restart", gs.restart_command, 0, 1);
872 break;
873 case MODE_PHASED_ZEBRA_RESTART:
874 if (dmn != gs.special) {
875 if ((gs.special->state == DAEMON_UP)
876 && (gs.phase == PHASE_NONE))
877 run_job(&dmn->restart, "restart",
878 gs.restart_command, 0, 1);
879 else
880 zlog_debug
881 ("%s: postponing restart attempt because master %s daemon "
882 "not up [%s], or phased restart in progress",
883 dmn->name, gs.special->name,
884 state_str[gs.special->state]);
885 break;
886 }
887
888 /*FALLTHRU*/
889 case MODE_PHASED_ALL_RESTART:
890 if ((gs.phase != PHASE_NONE) || gs.numpids) {
891 if (gs.loglevel > LOG_DEBUG + 1)
892 zlog_debug
893 ("postponing phased global restart: restart already in "
894 "progress [%s], or outstanding child processes [%d]",
895 phase_str[gs.phase], gs.numpids);
896 break;
897 }
898 /* Is it too soon for a restart? */
899 {
900 struct timeval delay;
901 if (time_elapsed(&delay, &gs.special->restart.time)->
902 tv_sec < gs.special->restart.interval) {
903 if (gs.loglevel > LOG_DEBUG + 1)
904 zlog_debug
905 ("postponing phased global restart: "
906 "elapsed time %ld < retry interval %ld",
907 (long)delay.tv_sec,
908 gs.special->restart.interval);
909 break;
910 }
911 }
912 run_job(&gs.restart, "restart", gs.restart_command, 0, 1);
913 break;
914 default:
915 zlog_err("error: unknown restart mode %d", gs.mode);
916 break;
917 }
918 }
919
920 static int wakeup_unresponsive(struct thread *t_wakeup)
921 {
922 struct daemon *dmn = THREAD_ARG(t_wakeup);
923
924 dmn->t_wakeup = NULL;
925 if (dmn->state != DAEMON_UNRESPONSIVE)
926 zlog_err("%s: no longer unresponsive (now %s), "
927 "wakeup should have been cancelled!",
928 dmn->name, state_str[dmn->state]);
929 else {
930 SET_WAKEUP_UNRESPONSIVE(dmn);
931 try_restart(dmn);
932 }
933 return 0;
934 }
935
936 static int wakeup_no_answer(struct thread *t_wakeup)
937 {
938 struct daemon *dmn = THREAD_ARG(t_wakeup);
939
940 dmn->t_wakeup = NULL;
941 dmn->state = DAEMON_UNRESPONSIVE;
942 zlog_err("%s state -> unresponsive : no response yet to ping "
943 "sent %ld seconds ago", dmn->name, gs.timeout);
944 if (gs.unresponsive_restart) {
945 SET_WAKEUP_UNRESPONSIVE(dmn);
946 try_restart(dmn);
947 }
948 return 0;
949 }
950
951 static int wakeup_send_echo(struct thread *t_wakeup)
952 {
953 static const char echocmd[] = "echo " PING_TOKEN;
954 ssize_t rc;
955 struct daemon *dmn = THREAD_ARG(t_wakeup);
956
957 dmn->t_wakeup = NULL;
958 if (((rc = write(dmn->fd, echocmd, sizeof(echocmd))) < 0) ||
959 ((size_t) rc != sizeof(echocmd))) {
960 char why[100 + sizeof(echocmd)];
961 snprintf(why, sizeof(why),
962 "write '%s' returned %d instead of %u", echocmd,
963 (int)rc, (u_int) sizeof(echocmd));
964 daemon_down(dmn, why);
965 } else {
966 gettimeofday(&dmn->echo_sent, NULL);
967 dmn->t_wakeup =
968 thread_add_timer(master, wakeup_no_answer, dmn, gs.timeout);
969 }
970 return 0;
971 }
972
973 static void sigint(void)
974 {
975 zlog_notice("Terminating on signal");
976 systemd_send_stopping();
977 exit(0);
978 }
979
980 static int valid_command(const char *cmd)
981 {
982 char *p;
983
984 return ((p = strchr(cmd, '%')) != NULL) && (*(p + 1) == 's')
985 && !strchr(p + 1, '%');
986 }
987
988 /* This is an ugly hack to circumvent problems with passing command-line
989 arguments that contain spaces. The fix is to use a configuration file. */
990 static char *translate_blanks(const char *cmd, const char *blankstr)
991 {
992 char *res;
993 char *p;
994 size_t bslen = strlen(blankstr);
995
996 if (!(res = strdup(cmd))) {
997 perror("strdup");
998 exit(1);
999 }
1000 while ((p = strstr(res, blankstr)) != NULL) {
1001 *p = ' ';
1002 if (bslen != 1)
1003 memmove(p + 1, p + bslen, strlen(p + bslen) + 1);
1004 }
1005 return res;
1006 }
1007
1008 struct zebra_privs_t watchfrr_privs = {
1009 #ifdef VTY_GROUP
1010 .vty_group = VTY_GROUP,
1011 #endif
1012 };
1013
1014 int main(int argc, char **argv)
1015 {
1016 const char *progname;
1017 int opt;
1018 int daemon_mode = 0;
1019 const char *pidfile = DEFAULT_PIDFILE;
1020 const char *special = "zebra";
1021 const char *blankstr = NULL;
1022 static struct quagga_signal_t my_signals[] = {
1023 {
1024 .signal = SIGINT,
1025 .handler = sigint,
1026 },
1027 {
1028 .signal = SIGTERM,
1029 .handler = sigint,
1030 },
1031 {
1032 .signal = SIGCHLD,
1033 .handler = sigchild,
1034 },
1035 };
1036
1037 if ((progname = strrchr(argv[0], '/')) != NULL)
1038 progname++;
1039 else
1040 progname = argv[0];
1041
1042 gs.restart.name = "all";
1043 while ((opt =
1044 getopt_long(argc, argv, "aAb:dek:l:m:M:i:p:r:R:S:s:t:T:zvh",
1045 longopts, 0)) != EOF) {
1046 switch (opt) {
1047 case 0:
1048 break;
1049 case 'a':
1050 if ((gs.mode != MODE_MONITOR)
1051 && (gs.mode != MODE_SEPARATE_RESTART)) {
1052 fputs("Ambiguous operating mode selected.\n",
1053 stderr);
1054 return usage(progname, 1);
1055 }
1056 gs.mode = MODE_PHASED_ZEBRA_RESTART;
1057 break;
1058 case 'A':
1059 if ((gs.mode != MODE_MONITOR)
1060 && (gs.mode != MODE_SEPARATE_RESTART)) {
1061 fputs("Ambiguous operating mode selected.\n",
1062 stderr);
1063 return usage(progname, 1);
1064 }
1065 gs.mode = MODE_PHASED_ALL_RESTART;
1066 break;
1067 case 'b':
1068 blankstr = optarg;
1069 break;
1070 case 'd':
1071 daemon_mode = 1;
1072 break;
1073 case 'e':
1074 gs.do_ping = 0;
1075 break;
1076 case 'k':
1077 if (!valid_command(optarg)) {
1078 fprintf(stderr,
1079 "Invalid kill command, must contain '%%s': %s\n",
1080 optarg);
1081 return usage(progname, 1);
1082 }
1083 gs.stop_command = optarg;
1084 break;
1085 case 'l':
1086 {
1087 char garbage[3];
1088 if ((sscanf
1089 (optarg, "%d%1s", &gs.loglevel,
1090 garbage) != 1)
1091 || (gs.loglevel < LOG_EMERG)) {
1092 fprintf(stderr,
1093 "Invalid loglevel argument: %s\n",
1094 optarg);
1095 return usage(progname, 1);
1096 }
1097 }
1098 break;
1099 case 'm':
1100 {
1101 char garbage[3];
1102 if ((sscanf(optarg, "%ld%1s",
1103 &gs.min_restart_interval,
1104 garbage) != 1)
1105 || (gs.min_restart_interval < 0)) {
1106 fprintf(stderr,
1107 "Invalid min_restart_interval argument: %s\n",
1108 optarg);
1109 return usage(progname, 1);
1110 }
1111 }
1112 break;
1113 case 'M':
1114 {
1115 char garbage[3];
1116 if ((sscanf(optarg, "%ld%1s",
1117 &gs.max_restart_interval,
1118 garbage) != 1)
1119 || (gs.max_restart_interval < 0)) {
1120 fprintf(stderr,
1121 "Invalid max_restart_interval argument: %s\n",
1122 optarg);
1123 return usage(progname, 1);
1124 }
1125 }
1126 break;
1127 case 'i':
1128 {
1129 char garbage[3];
1130 int period;
1131 if ((sscanf(optarg, "%d%1s", &period, garbage)
1132 != 1) || (gs.period < 1)) {
1133 fprintf(stderr,
1134 "Invalid interval argument: %s\n",
1135 optarg);
1136 return usage(progname, 1);
1137 }
1138 gs.period = 1000 * period;
1139 }
1140 break;
1141 case 'p':
1142 pidfile = optarg;
1143 break;
1144 case 'r':
1145 if ((gs.mode == MODE_GLOBAL_RESTART) ||
1146 (gs.mode == MODE_SEPARATE_RESTART)) {
1147 fputs("Ambiguous operating mode selected.\n",
1148 stderr);
1149 return usage(progname, 1);
1150 }
1151 if (!valid_command(optarg)) {
1152 fprintf(stderr,
1153 "Invalid restart command, must contain '%%s': %s\n",
1154 optarg);
1155 return usage(progname, 1);
1156 }
1157 gs.restart_command = optarg;
1158 if (gs.mode == MODE_MONITOR)
1159 gs.mode = MODE_SEPARATE_RESTART;
1160 break;
1161 case 'R':
1162 if (gs.mode != MODE_MONITOR) {
1163 fputs("Ambiguous operating mode selected.\n",
1164 stderr);
1165 return usage(progname, 1);
1166 }
1167 if (strchr(optarg, '%')) {
1168 fprintf(stderr,
1169 "Invalid restart-all arg, must not contain '%%s': %s\n",
1170 optarg);
1171 return usage(progname, 1);
1172 }
1173 gs.restart_command = optarg;
1174 gs.mode = MODE_GLOBAL_RESTART;
1175 break;
1176 case 's':
1177 if (!valid_command(optarg)) {
1178 fprintf(stderr,
1179 "Invalid start command, must contain '%%s': %s\n",
1180 optarg);
1181 return usage(progname, 1);
1182 }
1183 gs.start_command = optarg;
1184 break;
1185 case 'S':
1186 gs.vtydir = optarg;
1187 break;
1188 case 't':
1189 {
1190 char garbage[3];
1191 if ((sscanf
1192 (optarg, "%ld%1s", &gs.timeout,
1193 garbage) != 1) || (gs.timeout < 1)) {
1194 fprintf(stderr,
1195 "Invalid timeout argument: %s\n",
1196 optarg);
1197 return usage(progname, 1);
1198 }
1199 }
1200 break;
1201 case 'T':
1202 {
1203 char garbage[3];
1204 if ((sscanf
1205 (optarg, "%ld%1s", &gs.restart_timeout,
1206 garbage) != 1)
1207 || (gs.restart_timeout < 1)) {
1208 fprintf(stderr,
1209 "Invalid restart timeout argument: %s\n",
1210 optarg);
1211 return usage(progname, 1);
1212 }
1213 }
1214 break;
1215 case 'z':
1216 gs.unresponsive_restart = 1;
1217 break;
1218 case 'v':
1219 printf("%s version %s\n", progname, FRR_VERSION);
1220 puts("Copyright 2004 Andrew J. Schorr");
1221 return 0;
1222 case 'h':
1223 return usage(progname, 0);
1224 default:
1225 fputs("Invalid option.\n", stderr);
1226 return usage(progname, 1);
1227 }
1228 }
1229
1230 if (gs.unresponsive_restart && (gs.mode == MODE_MONITOR)) {
1231 fputs("Option -z requires a -r or -R restart option.\n",
1232 stderr);
1233 return usage(progname, 1);
1234 }
1235 switch (gs.mode) {
1236 case MODE_MONITOR:
1237 if (gs.restart_command || gs.start_command || gs.stop_command) {
1238 fprintf(stderr,
1239 "No kill/(re)start commands needed for %s mode.\n",
1240 mode_str[gs.mode]);
1241 return usage(progname, 1);
1242 }
1243 break;
1244 case MODE_GLOBAL_RESTART:
1245 case MODE_SEPARATE_RESTART:
1246 if (!gs.restart_command || gs.start_command || gs.stop_command) {
1247 fprintf(stderr,
1248 "No start/kill commands needed in [%s] mode.\n",
1249 mode_str[gs.mode]);
1250 return usage(progname, 1);
1251 }
1252 break;
1253 case MODE_PHASED_ZEBRA_RESTART:
1254 case MODE_PHASED_ALL_RESTART:
1255 if (!gs.restart_command || !gs.start_command
1256 || !gs.stop_command) {
1257 fprintf(stderr,
1258 "Need start, kill, and restart commands in [%s] mode.\n",
1259 mode_str[gs.mode]);
1260 return usage(progname, 1);
1261 }
1262 break;
1263 }
1264
1265 if (blankstr) {
1266 if (gs.restart_command)
1267 gs.restart_command =
1268 translate_blanks(gs.restart_command, blankstr);
1269 if (gs.start_command)
1270 gs.start_command =
1271 translate_blanks(gs.start_command, blankstr);
1272 if (gs.stop_command)
1273 gs.stop_command =
1274 translate_blanks(gs.stop_command, blankstr);
1275 }
1276
1277 gs.restart.interval = gs.min_restart_interval;
1278
1279 zprivs_init(&watchfrr_privs);
1280
1281 master = thread_master_create();
1282 cmd_init(-1);
1283 memory_init();
1284 vty_init(master);
1285 watchfrr_vty_init();
1286 vty_serv_sock(NULL, 0, WATCHFRR_VTYSH_PATH);
1287
1288 signal_init(master, array_size(my_signals), my_signals);
1289 srandom(time(NULL));
1290
1291 {
1292 int i;
1293 struct daemon *tail = NULL;
1294
1295 for (i = optind; i < argc; i++) {
1296 struct daemon *dmn;
1297
1298 if (!(dmn = (struct daemon *)calloc(1, sizeof(*dmn)))) {
1299 fprintf(stderr, "calloc(1,%u) failed: %s\n",
1300 (u_int) sizeof(*dmn),
1301 safe_strerror(errno));
1302 return 1;
1303 }
1304 dmn->name = dmn->restart.name = argv[i];
1305 dmn->state = DAEMON_INIT;
1306 gs.numdaemons++;
1307 gs.numdown++;
1308 dmn->fd = -1;
1309 dmn->t_wakeup =
1310 thread_add_timer_msec(master, wakeup_init, dmn,
1311 100 + (random() % 900));
1312 dmn->restart.interval = gs.min_restart_interval;
1313 if (tail)
1314 tail->next = dmn;
1315 else
1316 gs.daemons = dmn;
1317 tail = dmn;
1318
1319 if (((gs.mode == MODE_PHASED_ZEBRA_RESTART) ||
1320 (gs.mode == MODE_PHASED_ALL_RESTART)) &&
1321 !strcmp(dmn->name, special))
1322 gs.special = dmn;
1323 }
1324 }
1325 if (!gs.daemons) {
1326 fputs("Must specify one or more daemons to monitor.\n", stderr);
1327 return usage(progname, 1);
1328 }
1329 if (((gs.mode == MODE_PHASED_ZEBRA_RESTART) ||
1330 (gs.mode == MODE_PHASED_ALL_RESTART)) && !gs.special) {
1331 fprintf(stderr,
1332 "In mode [%s], but cannot find master daemon %s\n",
1333 mode_str[gs.mode], special);
1334 return usage(progname, 1);
1335 }
1336
1337 zlog_default = openzlog(progname, ZLOG_WATCHFRR, 0,
1338 LOG_CONS | LOG_NDELAY | LOG_PID, LOG_DAEMON);
1339 zlog_set_level(NULL, ZLOG_DEST_MONITOR, ZLOG_DISABLED);
1340 if (daemon_mode) {
1341 zlog_set_level(NULL, ZLOG_DEST_SYSLOG,
1342 MIN(gs.loglevel, LOG_DEBUG));
1343 if (daemon(0, 0) < 0) {
1344 fprintf(stderr, "Watchfrr daemon failed: %s",
1345 strerror(errno));
1346 exit(1);
1347 }
1348 } else
1349 zlog_set_level(NULL, ZLOG_DEST_STDOUT,
1350 MIN(gs.loglevel, LOG_DEBUG));
1351
1352 /* Make sure we're not already running. */
1353 pid_output(pidfile);
1354
1355 /* Announce which daemons are being monitored. */
1356 {
1357 struct daemon *dmn;
1358 size_t len = 0;
1359
1360 for (dmn = gs.daemons; dmn; dmn = dmn->next)
1361 len += strlen(dmn->name) + 1;
1362
1363 {
1364 char buf[len + 1];
1365 char *p = buf;
1366
1367 for (dmn = gs.daemons; dmn; dmn = dmn->next) {
1368 if (p != buf)
1369 *p++ = ' ';
1370 strcpy(p, dmn->name);
1371 p += strlen(p);
1372 }
1373 zlog_notice("%s %s watching [%s], mode [%s]",
1374 progname, FRR_VERSION, buf,
1375 mode_str[gs.mode]);
1376 }
1377 }
1378
1379 {
1380 struct thread thread;
1381
1382 while (thread_fetch(master, &thread))
1383 thread_call(&thread);
1384 }
1385
1386 systemd_send_stopping();
1387 /* Not reached. */
1388 return 0;
1389 }