]> git.proxmox.com Git - mirror_frr.git/blob - watchfrr/watchfrr.c
Merge pull request #176 from opensourcerouting/mtype-underflow-backtrace
[mirror_frr.git] / watchfrr / watchfrr.c
1 /*
2 Monitor status of frr daemons and restart if necessary.
3
4 Copyright (C) 2004 Andrew J. Schorr
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 */
20
21 #include <zebra.h>
22 #include <thread.h>
23 #include <log.h>
24 #include <network.h>
25 #include <sigevent.h>
26 #include <lib/version.h>
27 #include "command.h"
28 #include "memory_vty.h"
29
30 #include <getopt.h>
31 #include <sys/un.h>
32 #include <sys/wait.h>
33 #include <memory.h>
34 #include <systemd.h>
35
36 #include "watchfrr.h"
37
38 #ifndef MIN
39 #define MIN(X,Y) (((X) <= (Y)) ? (X) : (Y))
40 #endif
41
42 /* Macros to help randomize timers. */
43 #define JITTER(X) ((random() % ((X)+1))-((X)/2))
44 #define FUZZY(X) ((X)+JITTER((X)/20))
45
46 #define DEFAULT_PERIOD 5
47 #define DEFAULT_TIMEOUT 10
48 #define DEFAULT_RESTART_TIMEOUT 20
49 #define DEFAULT_LOGLEVEL LOG_INFO
50 #define DEFAULT_MIN_RESTART 60
51 #define DEFAULT_MAX_RESTART 600
52 #ifdef PATH_WATCHFRR_PID
53 #define DEFAULT_PIDFILE PATH_WATCHFRR_PID
54 #else
55 #define DEFAULT_PIDFILE STATEDIR "/watchfrr.pid"
56 #endif
57 #ifdef DAEMON_VTY_DIR
58 #define VTYDIR DAEMON_VTY_DIR
59 #else
60 #define VTYDIR STATEDIR
61 #endif
62
63 #define PING_TOKEN "PING"
64
65 /* Needs to be global, referenced somewhere inside libfrr. */
66 struct thread_master *master;
67
68 typedef enum
69 {
70 MODE_MONITOR = 0,
71 MODE_GLOBAL_RESTART,
72 MODE_SEPARATE_RESTART,
73 MODE_PHASED_ZEBRA_RESTART,
74 MODE_PHASED_ALL_RESTART
75 } watch_mode_t;
76
77 static const char *mode_str[] =
78 {
79 "monitor",
80 "global restart",
81 "individual daemon restart",
82 "phased zebra restart",
83 "phased global restart for any failure",
84 };
85
86 typedef enum
87 {
88 PHASE_NONE = 0,
89 PHASE_STOPS_PENDING,
90 PHASE_WAITING_DOWN,
91 PHASE_ZEBRA_RESTART_PENDING,
92 PHASE_WAITING_ZEBRA_UP
93 } restart_phase_t;
94
95 static const char *phase_str[] =
96 {
97 "None",
98 "Stop jobs running",
99 "Waiting for other daemons to come down",
100 "Zebra restart job running",
101 "Waiting for zebra to come up",
102 "Start jobs running",
103 };
104
105 #define PHASE_TIMEOUT (3*gs.restart_timeout)
106
107 struct restart_info
108 {
109 const char *name;
110 const char *what;
111 pid_t pid;
112 struct timeval time;
113 long interval;
114 struct thread *t_kill;
115 int kills;
116 };
117
118 static struct global_state
119 {
120 watch_mode_t mode;
121 restart_phase_t phase;
122 struct thread *t_phase_hanging;
123 const char *vtydir;
124 long period;
125 long timeout;
126 long restart_timeout;
127 long min_restart_interval;
128 long max_restart_interval;
129 int do_ping;
130 struct daemon *daemons;
131 const char *restart_command;
132 const char *start_command;
133 const char *stop_command;
134 struct restart_info restart;
135 int unresponsive_restart;
136 int loglevel;
137 struct daemon *special; /* points to zebra when doing phased restart */
138 int numdaemons;
139 int numpids;
140 int numdown; /* # of daemons that are not UP or UNRESPONSIVE */
141 } gs = {
142 .mode = MODE_MONITOR,
143 .phase = PHASE_NONE,
144 .vtydir = VTYDIR,
145 .period = 1000*DEFAULT_PERIOD,
146 .timeout = DEFAULT_TIMEOUT,
147 .restart_timeout = DEFAULT_RESTART_TIMEOUT,
148 .loglevel = DEFAULT_LOGLEVEL,
149 .min_restart_interval = DEFAULT_MIN_RESTART,
150 .max_restart_interval = DEFAULT_MAX_RESTART,
151 .do_ping = 1,
152 };
153
154 typedef enum
155 {
156 DAEMON_INIT,
157 DAEMON_DOWN,
158 DAEMON_CONNECTING,
159 DAEMON_UP,
160 DAEMON_UNRESPONSIVE
161 } daemon_state_t;
162
163 #define IS_UP(DMN) \
164 (((DMN)->state == DAEMON_UP) || ((DMN)->state == DAEMON_UNRESPONSIVE))
165
166 static const char *state_str[] =
167 {
168 "Init",
169 "Down",
170 "Connecting",
171 "Up",
172 "Unresponsive",
173 };
174
175 struct daemon {
176 const char *name;
177 daemon_state_t state;
178 int fd;
179 struct timeval echo_sent;
180 u_int connect_tries;
181 struct thread *t_wakeup;
182 struct thread *t_read;
183 struct thread *t_write;
184 struct daemon *next;
185 struct restart_info restart;
186 };
187
188 static const struct option longopts[] =
189 {
190 { "daemon", no_argument, NULL, 'd'},
191 { "statedir", required_argument, NULL, 'S'},
192 { "no-echo", no_argument, NULL, 'e'},
193 { "loglevel", required_argument, NULL, 'l'},
194 { "interval", required_argument, NULL, 'i'},
195 { "timeout", required_argument, NULL, 't'},
196 { "restart-timeout", required_argument, NULL, 'T'},
197 { "restart", required_argument, NULL, 'r'},
198 { "start-command", required_argument, NULL, 's'},
199 { "kill-command", required_argument, NULL, 'k'},
200 { "restart-all", required_argument, NULL, 'R'},
201 { "all-restart", no_argument, NULL, 'a'},
202 { "always-all-restart", no_argument, NULL, 'A'},
203 { "unresponsive-restart", no_argument, NULL, 'z'},
204 { "min-restart-interval", required_argument, NULL, 'm'},
205 { "max-restart-interval", required_argument, NULL, 'M'},
206 { "pid-file", required_argument, NULL, 'p'},
207 { "blank-string", required_argument, NULL, 'b'},
208 { "help", no_argument, NULL, 'h'},
209 { "version", no_argument, NULL, 'v'},
210 { NULL, 0, NULL, 0 }
211 };
212
213 static int try_connect(struct daemon *dmn);
214 static int wakeup_send_echo(struct thread *t_wakeup);
215 static void try_restart(struct daemon *dmn);
216 static void phase_check(void);
217
218 static int
219 usage(const char *progname, int status)
220 {
221 if (status != 0)
222 fprintf(stderr, "Try `%s --help' for more information.\n", progname);
223 else
224 {
225 printf("Usage : %s [OPTION...] <daemon name> ...\n\n\
226 Watchdog program to monitor status of frr daemons and try to restart\n\
227 them if they are down or unresponsive. It determines whether a daemon is\n\
228 up based on whether it can connect to the daemon's vty unix stream socket.\n\
229 It then repeatedly sends echo commands over that socket to determine whether\n\
230 the daemon is responsive. If the daemon crashes, we will receive an EOF\n\
231 on the socket connection and know immediately that the daemon is down.\n\n\
232 The daemons to be monitored should be listed on the command line.\n\n\
233 This program can run in one of 5 modes:\n\n\
234 0. Mode: %s.\n\
235 Just monitor and report on status changes. Example:\n\
236 %s -d zebra ospfd bgpd\n\n\
237 1. Mode: %s.\n\
238 Whenever any daemon hangs or crashes, use the given command to restart\n\
239 them all. Example:\n\
240 %s -dz \\\n\
241 -R '/sbin/service zebra restart; /sbin/service ospfd restart' \\\n\
242 zebra ospfd\n\n\
243 2. Mode: %s.\n\
244 When any single daemon hangs or crashes, restart only the daemon that's\n\
245 in trouble using the supplied restart command. Example:\n\
246 %s -dz -r '/sbin/service %%s restart' zebra ospfd bgpd\n\n\
247 3. Mode: %s.\n\
248 The same as the previous mode, except that there is special treatment when\n\
249 the zebra daemon is in trouble. In that case, a phased restart approach\n\
250 is used: 1. stop all other daemons; 2. restart zebra; 3. start the other\n\
251 daemons. Example:\n\
252 %s -adz -r '/sbin/service %%s restart' \\\n\
253 -s '/sbin/service %%s start' \\\n\
254 -k '/sbin/service %%s stop' zebra ospfd bgpd\n\n\
255 4. Mode: %s.\n\
256 This is the same as the previous mode, except that the phased restart\n\
257 procedure is used whenever any of the daemons hangs or crashes. Example:\n\
258 %s -Adz -r '/sbin/service %%s restart' \\\n\
259 -s '/sbin/service %%s start' \\\n\
260 -k '/sbin/service %%s stop' zebra ospfd bgpd\n\n\
261 As of this writing, it is believed that mode 2 [%s]\n\
262 is not safe, and mode 3 [%s] may not be safe with some of the\n\
263 routing daemons.\n\n\
264 In order to avoid attempting to restart the daemons in a fast loop,\n\
265 the -m and -M options allow you to control the minimum delay between\n\
266 restart commands. The minimum restart delay is recalculated each time\n\
267 a restart is attempted: if the time since the last restart attempt exceeds\n\
268 twice the -M value, then the restart delay is set to the -m value.\n\
269 Otherwise, the interval is doubled (but capped at the -M value).\n\n",
270 progname,mode_str[0],progname,mode_str[1],progname,mode_str[2],
271 progname,mode_str[3],progname,mode_str[4],progname,mode_str[2],
272 mode_str[3]);
273
274 printf("Options:\n\
275 -d, --daemon Run in daemon mode. In this mode, error messages are sent\n\
276 to syslog instead of stdout.\n\
277 -S, --statedir Set the vty socket directory (default is %s)\n\
278 -e, --no-echo Do not ping the daemons to test responsiveness (this\n\
279 option is necessary if the daemons do not support the\n\
280 echo command)\n\
281 -l, --loglevel Set the logging level (default is %d).\n\
282 The value should range from %d (LOG_EMERG) to %d (LOG_DEBUG),\n\
283 but it can be set higher than %d if extra-verbose debugging\n\
284 messages are desired.\n\
285 -m, --min-restart-interval\n\
286 Set the minimum seconds to wait between invocations of daemon\n\
287 restart commands (default is %d).\n\
288 -M, --max-restart-interval\n\
289 Set the maximum seconds to wait between invocations of daemon\n\
290 restart commands (default is %d).\n\
291 -i, --interval Set the status polling interval in seconds (default is %d)\n\
292 -t, --timeout Set the unresponsiveness timeout in seconds (default is %d)\n\
293 -T, --restart-timeout\n\
294 Set the restart (kill) timeout in seconds (default is %d).\n\
295 If any background jobs are still running after this much\n\
296 time has elapsed, they will be killed.\n\
297 -r, --restart Supply a Bourne shell command to use to restart a single\n\
298 daemon. The command string should include '%%s' where the\n\
299 name of the daemon should be substituted.\n\
300 Note that -r and -R are incompatible.\n\
301 -s, --start-command\n\
302 Supply a Bourne shell to command to use to start a single\n\
303 daemon. The command string should include '%%s' where the\n\
304 name of the daemon should be substituted.\n\
305 -k, --kill-command\n\
306 Supply a Bourne shell to command to use to stop a single\n\
307 daemon. The command string should include '%%s' where the\n\
308 name of the daemon should be substituted.\n\
309 -R, --restart-all\n\
310 When one or more daemons is down, try to restart everything\n\
311 using the Bourne shell command supplied as the argument.\n\
312 Note that -r and -R are incompatible.\n\
313 -z, --unresponsive-restart\n\
314 When a daemon is unresponsive, treat it as being down for\n\
315 restart purposes.\n\
316 -a, --all-restart\n\
317 When zebra hangs or crashes, restart all daemons using\n\
318 this phased approach: 1. stop all other daemons; 2. restart\n\
319 zebra; 3. start other daemons. Requires -r, -s, and -k.\n\
320 -A, --always-all-restart\n\
321 When any daemon (not just zebra) hangs or crashes, use the\n\
322 same phased restart mechanism described above for -a.\n\
323 Requires -r, -s, and -k.\n\
324 -p, --pid-file Set process identifier file name\n\
325 (default is %s).\n\
326 -b, --blank-string\n\
327 When the supplied argument string is found in any of the\n\
328 various shell command arguments (-r, -s, -k, or -R), replace\n\
329 it with a space. This is an ugly hack to circumvent problems\n\
330 passing command-line arguments with embedded spaces.\n\
331 -v, --version Print program version\n\
332 -h, --help Display this help and exit\n",
333 VTYDIR,DEFAULT_LOGLEVEL,LOG_EMERG,LOG_DEBUG,LOG_DEBUG,
334 DEFAULT_MIN_RESTART,DEFAULT_MAX_RESTART,
335 DEFAULT_PERIOD,DEFAULT_TIMEOUT,DEFAULT_RESTART_TIMEOUT,
336 DEFAULT_PIDFILE);
337 }
338
339 return status;
340 }
341
342 static pid_t
343 run_background(char *shell_cmd)
344 {
345 pid_t child;
346
347 switch (child = fork())
348 {
349 case -1:
350 zlog_err("fork failed, cannot run command [%s]: %s",
351 shell_cmd,safe_strerror(errno));
352 return -1;
353 case 0:
354 /* Child process. */
355 /* Use separate process group so child processes can be killed easily. */
356 if (setpgid(0,0) < 0)
357 zlog_warn("warning: setpgid(0,0) failed: %s",safe_strerror(errno));
358 {
359 char shell[] = "sh";
360 char dashc[] = "-c";
361 char * const argv[4] = { shell, dashc, shell_cmd, NULL};
362 execv("/bin/sh", argv);
363 zlog_err("execv(/bin/sh -c '%s') failed: %s",
364 shell_cmd,safe_strerror(errno));
365 _exit(127);
366 }
367 default:
368 /* Parent process: we will reap the child later. */
369 zlog_err("Forked background command [pid %d]: %s",(int)child,shell_cmd);
370 return child;
371 }
372 }
373
374 static struct timeval *
375 time_elapsed(struct timeval *result, const struct timeval *start_time)
376 {
377 gettimeofday(result,NULL);
378 result->tv_sec -= start_time->tv_sec;
379 result->tv_usec -= start_time->tv_usec;
380 while (result->tv_usec < 0)
381 {
382 result->tv_usec += 1000000L;
383 result->tv_sec--;
384 }
385 return result;
386 }
387
388 static int
389 restart_kill(struct thread *t_kill)
390 {
391 struct restart_info *restart = THREAD_ARG(t_kill);
392 struct timeval delay;
393
394 time_elapsed(&delay,&restart->time);
395 zlog_warn("Warning: %s %s child process %d still running after "
396 "%ld seconds, sending signal %d",
397 restart->what,restart->name,(int)restart->pid, (long)delay.tv_sec,
398 (restart->kills ? SIGKILL : SIGTERM));
399 kill(-restart->pid,(restart->kills ? SIGKILL : SIGTERM));
400 restart->kills++;
401 restart->t_kill = thread_add_timer(master,restart_kill,restart,
402 gs.restart_timeout);
403 return 0;
404 }
405
406 static struct restart_info *
407 find_child(pid_t child)
408 {
409 if (gs.mode == MODE_GLOBAL_RESTART)
410 {
411 if (gs.restart.pid == child)
412 return &gs.restart;
413 }
414 else
415 {
416 struct daemon *dmn;
417 for (dmn = gs.daemons; dmn; dmn = dmn->next)
418 {
419 if (dmn->restart.pid == child)
420 return &dmn->restart;
421 }
422 }
423 return NULL;
424 }
425
426 static void
427 sigchild(void)
428 {
429 pid_t child;
430 int status;
431 const char *name;
432 const char *what;
433 struct restart_info *restart;
434
435 switch (child = waitpid(-1,&status,WNOHANG))
436 {
437 case -1:
438 zlog_err("waitpid failed: %s",safe_strerror(errno));
439 return;
440 case 0:
441 zlog_warn("SIGCHLD received, but waitpid did not reap a child");
442 return;
443 }
444
445 if (child == integrated_write_pid)
446 {
447 integrated_write_sigchld(status);
448 return;
449 }
450
451 if ((restart = find_child(child)) != NULL)
452 {
453 name = restart->name;
454 what = restart->what;
455 restart->pid = 0;
456 gs.numpids--;
457 thread_cancel(restart->t_kill);
458 restart->t_kill = NULL;
459 /* Update restart time to reflect the time the command completed. */
460 gettimeofday(&restart->time,NULL);
461 }
462 else
463 {
464 zlog_err("waitpid returned status for an unknown child process %d",
465 (int)child);
466 name = "(unknown)";
467 what = "background";
468 }
469 if (WIFSTOPPED(status))
470 zlog_warn("warning: %s %s process %d is stopped",
471 what,name,(int)child);
472 else if (WIFSIGNALED(status))
473 zlog_warn("%s %s process %d terminated due to signal %d",
474 what,name,(int)child,WTERMSIG(status));
475 else if (WIFEXITED(status))
476 {
477 if (WEXITSTATUS(status) != 0)
478 zlog_warn("%s %s process %d exited with non-zero status %d",
479 what,name,(int)child,WEXITSTATUS(status));
480 else
481 zlog_debug("%s %s process %d exited normally",what,name,(int)child);
482 }
483 else
484 zlog_err("cannot interpret %s %s process %d wait status 0x%x",
485 what,name,(int)child,status);
486 phase_check();
487 }
488
489 static int
490 run_job(struct restart_info *restart, const char *cmdtype, const char *command,
491 int force, int update_interval)
492 {
493 struct timeval delay;
494
495 if (gs.loglevel > LOG_DEBUG+1)
496 zlog_debug("attempting to %s %s",cmdtype,restart->name);
497
498 if (restart->pid)
499 {
500 if (gs.loglevel > LOG_DEBUG+1)
501 zlog_debug("cannot %s %s, previous pid %d still running",
502 cmdtype,restart->name,(int)restart->pid);
503 return -1;
504 }
505
506 /* Note: time_elapsed test must come before the force test, since we need
507 to make sure that delay is initialized for use below in updating the
508 restart interval. */
509 if ((time_elapsed(&delay,&restart->time)->tv_sec < restart->interval) &&
510 !force)
511 {
512 if (gs.loglevel > LOG_DEBUG+1)
513 zlog_debug("postponing %s %s: "
514 "elapsed time %ld < retry interval %ld",
515 cmdtype,restart->name,(long)delay.tv_sec,restart->interval);
516 return -1;
517 }
518
519 gettimeofday(&restart->time,NULL);
520 restart->kills = 0;
521 {
522 char cmd[strlen(command)+strlen(restart->name)+1];
523 snprintf(cmd,sizeof(cmd),command,restart->name);
524 if ((restart->pid = run_background(cmd)) > 0)
525 {
526 restart->t_kill = thread_add_timer(master,restart_kill,restart,
527 gs.restart_timeout);
528 restart->what = cmdtype;
529 gs.numpids++;
530 }
531 else
532 restart->pid = 0;
533 }
534
535 /* Calculate the new restart interval. */
536 if (update_interval)
537 {
538 if (delay.tv_sec > 2*gs.max_restart_interval)
539 restart->interval = gs.min_restart_interval;
540 else if ((restart->interval *= 2) > gs.max_restart_interval)
541 restart->interval = gs.max_restart_interval;
542 if (gs.loglevel > LOG_DEBUG+1)
543 zlog_debug("restart %s interval is now %ld",
544 restart->name,restart->interval);
545 }
546 return restart->pid;
547 }
548
549 #define SET_READ_HANDLER(DMN) \
550 (DMN)->t_read = thread_add_read(master,handle_read,(DMN),(DMN)->fd)
551
552 #define SET_WAKEUP_DOWN(DMN) \
553 (DMN)->t_wakeup = thread_add_timer_msec(master,wakeup_down,(DMN), \
554 FUZZY(gs.period))
555
556 #define SET_WAKEUP_UNRESPONSIVE(DMN) \
557 (DMN)->t_wakeup = thread_add_timer_msec(master,wakeup_unresponsive,(DMN), \
558 FUZZY(gs.period))
559
560 #define SET_WAKEUP_ECHO(DMN) \
561 (DMN)->t_wakeup = thread_add_timer_msec(master,wakeup_send_echo,(DMN), \
562 FUZZY(gs.period))
563
564 static int
565 wakeup_down(struct thread *t_wakeup)
566 {
567 struct daemon *dmn = THREAD_ARG(t_wakeup);
568
569 dmn->t_wakeup = NULL;
570 if (try_connect(dmn) < 0)
571 SET_WAKEUP_DOWN(dmn);
572 if ((dmn->connect_tries > 1) && (dmn->state != DAEMON_UP))
573 try_restart(dmn);
574 return 0;
575 }
576
577 static int
578 wakeup_init(struct thread *t_wakeup)
579 {
580 struct daemon *dmn = THREAD_ARG(t_wakeup);
581
582 dmn->t_wakeup = NULL;
583 if (try_connect(dmn) < 0)
584 {
585 SET_WAKEUP_DOWN(dmn);
586 zlog_err("%s state -> down : initial connection attempt failed",
587 dmn->name);
588 dmn->state = DAEMON_DOWN;
589 }
590 return 0;
591 }
592
593 static void
594 daemon_down(struct daemon *dmn, const char *why)
595 {
596 if (IS_UP(dmn) || (dmn->state == DAEMON_INIT))
597 zlog_err("%s state -> down : %s",dmn->name,why);
598 else if (gs.loglevel > LOG_DEBUG)
599 zlog_debug("%s still down : %s",dmn->name,why);
600 if (IS_UP(dmn))
601 gs.numdown++;
602 dmn->state = DAEMON_DOWN;
603 if (dmn->fd >= 0)
604 {
605 close(dmn->fd);
606 dmn->fd = -1;
607 }
608 THREAD_OFF(dmn->t_read);
609 THREAD_OFF(dmn->t_write);
610 THREAD_OFF(dmn->t_wakeup);
611 if (try_connect(dmn) < 0)
612 SET_WAKEUP_DOWN(dmn);
613 phase_check();
614 }
615
616 static int
617 handle_read(struct thread *t_read)
618 {
619 struct daemon *dmn = THREAD_ARG(t_read);
620 static const char resp[sizeof(PING_TOKEN)+4] = PING_TOKEN "\n";
621 char buf[sizeof(resp)+100];
622 ssize_t rc;
623 struct timeval delay;
624
625 dmn->t_read = NULL;
626 if ((rc = read(dmn->fd,buf,sizeof(buf))) < 0)
627 {
628 char why[100];
629
630 if (ERRNO_IO_RETRY(errno))
631 {
632 /* Pretend it never happened. */
633 SET_READ_HANDLER(dmn);
634 return 0;
635 }
636 snprintf(why,sizeof(why),"unexpected read error: %s",
637 safe_strerror(errno));
638 daemon_down(dmn,why);
639 return 0;
640 }
641 if (rc == 0)
642 {
643 daemon_down(dmn,"read returned EOF");
644 return 0;
645 }
646 if (!dmn->echo_sent.tv_sec)
647 {
648 char why[sizeof(buf)+100];
649 snprintf(why,sizeof(why),"unexpected read returns %d bytes: %.*s",
650 (int)rc,(int)rc,buf);
651 daemon_down(dmn,why);
652 return 0;
653 }
654
655 /* We are expecting an echo response: is there any chance that the
656 response would not be returned entirely in the first read? That
657 seems inconceivable... */
658 if ((rc != sizeof(resp)) || memcmp(buf,resp,sizeof(resp)))
659 {
660 char why[100+sizeof(buf)];
661 snprintf(why,sizeof(why),"read returned bad echo response of %d bytes "
662 "(expecting %u): %.*s",
663 (int)rc,(u_int)sizeof(resp),(int)rc,buf);
664 daemon_down(dmn,why);
665 return 0;
666 }
667
668 time_elapsed(&delay,&dmn->echo_sent);
669 dmn->echo_sent.tv_sec = 0;
670 if (dmn->state == DAEMON_UNRESPONSIVE)
671 {
672 if (delay.tv_sec < gs.timeout)
673 {
674 dmn->state = DAEMON_UP;
675 zlog_warn("%s state -> up : echo response received after %ld.%06ld "
676 "seconds", dmn->name,
677 (long)delay.tv_sec, (long)delay.tv_usec);
678 }
679 else
680 zlog_warn("%s: slow echo response finally received after %ld.%06ld "
681 "seconds", dmn->name,
682 (long)delay.tv_sec, (long)delay.tv_usec);
683 }
684 else if (gs.loglevel > LOG_DEBUG+1)
685 zlog_debug("%s: echo response received after %ld.%06ld seconds",
686 dmn->name, (long)delay.tv_sec, (long)delay.tv_usec);
687
688 SET_READ_HANDLER(dmn);
689 if (dmn->t_wakeup)
690 thread_cancel(dmn->t_wakeup);
691 SET_WAKEUP_ECHO(dmn);
692
693 return 0;
694 }
695
696 /*
697 * Wait till we notice that all daemons are ready before
698 * we send we are ready to systemd
699 */
700 static void
701 daemon_send_ready (void)
702 {
703 static int sent = 0;
704 if (!sent && gs.numdown == 0)
705 {
706 #if defined (HAVE_CUMULUS)
707 FILE *fp;
708
709 fp = fopen(DAEMON_VTY_DIR "/watchfrr.started", "w");
710 fclose(fp);
711 #endif
712 zlog_notice ("Watchfrr: Notifying Systemd we are up and running");
713 systemd_send_started(master, 0);
714 sent = 1;
715 }
716 }
717
718 static void
719 daemon_up(struct daemon *dmn, const char *why)
720 {
721 dmn->state = DAEMON_UP;
722 gs.numdown--;
723 dmn->connect_tries = 0;
724 zlog_notice("%s state -> up : %s",dmn->name,why);
725 daemon_send_ready();
726 if (gs.do_ping)
727 SET_WAKEUP_ECHO(dmn);
728 phase_check();
729 }
730
731 static int
732 check_connect(struct thread *t_write)
733 {
734 struct daemon *dmn = THREAD_ARG(t_write);
735 int sockerr;
736 socklen_t reslen = sizeof(sockerr);
737
738 dmn->t_write = NULL;
739 if (getsockopt(dmn->fd,SOL_SOCKET,SO_ERROR,(char *)&sockerr,&reslen) < 0)
740 {
741 zlog_warn("%s: check_connect: getsockopt failed: %s",
742 dmn->name,safe_strerror(errno));
743 daemon_down(dmn,"getsockopt failed checking connection success");
744 return 0;
745 }
746 if ((reslen == sizeof(sockerr)) && sockerr)
747 {
748 char why[100];
749 snprintf(why,sizeof(why),
750 "getsockopt reports that connection attempt failed: %s",
751 safe_strerror(sockerr));
752 daemon_down(dmn,why);
753 return 0;
754 }
755
756 daemon_up(dmn,"delayed connect succeeded");
757 return 0;
758 }
759
760 static int
761 wakeup_connect_hanging(struct thread *t_wakeup)
762 {
763 struct daemon *dmn = THREAD_ARG(t_wakeup);
764 char why[100];
765
766 dmn->t_wakeup = NULL;
767 snprintf(why,sizeof(why),"connection attempt timed out after %ld seconds",
768 gs.timeout);
769 daemon_down(dmn,why);
770 return 0;
771 }
772
773 /* Making connection to protocol daemon. */
774 static int
775 try_connect(struct daemon *dmn)
776 {
777 int sock;
778 struct sockaddr_un addr;
779 socklen_t len;
780
781 if (gs.loglevel > LOG_DEBUG+1)
782 zlog_debug("%s: attempting to connect",dmn->name);
783 dmn->connect_tries++;
784
785 memset (&addr, 0, sizeof (struct sockaddr_un));
786 addr.sun_family = AF_UNIX;
787 snprintf(addr.sun_path, sizeof(addr.sun_path), "%s/%s.vty",
788 gs.vtydir,dmn->name);
789 #ifdef HAVE_STRUCT_SOCKADDR_UN_SUN_LEN
790 len = addr.sun_len = SUN_LEN(&addr);
791 #else
792 len = sizeof (addr.sun_family) + strlen (addr.sun_path);
793 #endif /* HAVE_STRUCT_SOCKADDR_UN_SUN_LEN */
794
795 /* Quick check to see if we might succeed before we go to the trouble
796 of creating a socket. */
797 if (access(addr.sun_path, W_OK) < 0)
798 {
799 if (errno != ENOENT)
800 zlog_err("%s: access to socket %s denied: %s",
801 dmn->name,addr.sun_path,safe_strerror(errno));
802 return -1;
803 }
804
805 if ((sock = socket (AF_UNIX, SOCK_STREAM, 0)) < 0)
806 {
807 zlog_err("%s(%s): cannot make socket: %s",
808 __func__,addr.sun_path, safe_strerror(errno));
809 return -1;
810 }
811
812 if (set_nonblocking(sock) < 0 || set_cloexec(sock) < 0)
813 {
814 zlog_err("%s(%s): set_nonblocking/cloexec(%d) failed",
815 __func__, addr.sun_path, sock);
816 close(sock);
817 return -1;
818 }
819
820 if (connect (sock, (struct sockaddr *) &addr, len) < 0)
821 {
822 if ((errno != EINPROGRESS) && (errno != EWOULDBLOCK))
823 {
824 if (gs.loglevel > LOG_DEBUG)
825 zlog_debug("%s(%s): connect failed: %s",
826 __func__,addr.sun_path, safe_strerror(errno));
827 close (sock);
828 return -1;
829 }
830 if (gs.loglevel > LOG_DEBUG)
831 zlog_debug("%s: connection in progress",dmn->name);
832 dmn->state = DAEMON_CONNECTING;
833 dmn->fd = sock;
834 dmn->t_write = thread_add_write(master,check_connect,dmn,dmn->fd);
835 dmn->t_wakeup = thread_add_timer(master,wakeup_connect_hanging,dmn,
836 gs.timeout);
837 SET_READ_HANDLER(dmn);
838 return 0;
839 }
840
841 dmn->fd = sock;
842 SET_READ_HANDLER(dmn);
843 daemon_up(dmn,"connect succeeded");
844 return 1;
845 }
846
847 static int
848 phase_hanging(struct thread *t_hanging)
849 {
850 gs.t_phase_hanging = NULL;
851 zlog_err("Phase [%s] hanging for %ld seconds, aborting phased restart",
852 phase_str[gs.phase],PHASE_TIMEOUT);
853 gs.phase = PHASE_NONE;
854 return 0;
855 }
856
857 static void
858 set_phase(restart_phase_t new_phase)
859 {
860 gs.phase = new_phase;
861 if (gs.t_phase_hanging)
862 thread_cancel(gs.t_phase_hanging);
863 gs.t_phase_hanging = thread_add_timer(master,phase_hanging,NULL,
864 PHASE_TIMEOUT);
865 }
866
867 static void
868 phase_check(void)
869 {
870 switch (gs.phase)
871 {
872 case PHASE_NONE:
873 break;
874 case PHASE_STOPS_PENDING:
875 if (gs.numpids)
876 break;
877 zlog_info("Phased restart: all routing daemon stop jobs have completed.");
878 set_phase(PHASE_WAITING_DOWN);
879 /*FALLTHRU*/
880 case PHASE_WAITING_DOWN:
881 if (gs.numdown+IS_UP(gs.special) < gs.numdaemons)
882 break;
883 zlog_info("Phased restart: all routing daemons now down.");
884 run_job(&gs.special->restart,"restart",gs.restart_command,1,1);
885 set_phase(PHASE_ZEBRA_RESTART_PENDING);
886 /*FALLTHRU*/
887 case PHASE_ZEBRA_RESTART_PENDING:
888 if (gs.special->restart.pid)
889 break;
890 zlog_info("Phased restart: %s restart job completed.",gs.special->name);
891 set_phase(PHASE_WAITING_ZEBRA_UP);
892 /*FALLTHRU*/
893 case PHASE_WAITING_ZEBRA_UP:
894 if (!IS_UP(gs.special))
895 break;
896 zlog_info("Phased restart: %s is now up.",gs.special->name);
897 {
898 struct daemon *dmn;
899 for (dmn = gs.daemons; dmn; dmn = dmn->next)
900 {
901 if (dmn != gs.special)
902 run_job(&dmn->restart,"start",gs.start_command,1,0);
903 }
904 }
905 gs.phase = PHASE_NONE;
906 THREAD_OFF(gs.t_phase_hanging);
907 zlog_notice("Phased global restart has completed.");
908 break;
909 }
910 }
911
912 static void
913 try_restart(struct daemon *dmn)
914 {
915 switch (gs.mode)
916 {
917 case MODE_MONITOR:
918 return;
919 case MODE_GLOBAL_RESTART:
920 run_job(&gs.restart,"restart",gs.restart_command,0,1);
921 break;
922 case MODE_SEPARATE_RESTART:
923 run_job(&dmn->restart,"restart",gs.restart_command,0,1);
924 break;
925 case MODE_PHASED_ZEBRA_RESTART:
926 if (dmn != gs.special)
927 {
928 if ((gs.special->state == DAEMON_UP) && (gs.phase == PHASE_NONE))
929 run_job(&dmn->restart,"restart",gs.restart_command,0,1);
930 else
931 zlog_debug("%s: postponing restart attempt because master %s daemon "
932 "not up [%s], or phased restart in progress",
933 dmn->name,gs.special->name,state_str[gs.special->state]);
934 break;
935 }
936 /*FALLTHRU*/
937 case MODE_PHASED_ALL_RESTART:
938 if ((gs.phase != PHASE_NONE) || gs.numpids)
939 {
940 if (gs.loglevel > LOG_DEBUG+1)
941 zlog_debug("postponing phased global restart: restart already in "
942 "progress [%s], or outstanding child processes [%d]",
943 phase_str[gs.phase],gs.numpids);
944 break;
945 }
946 /* Is it too soon for a restart? */
947 {
948 struct timeval delay;
949 if (time_elapsed(&delay,&gs.special->restart.time)->tv_sec <
950 gs.special->restart.interval)
951 {
952 if (gs.loglevel > LOG_DEBUG+1)
953 zlog_debug("postponing phased global restart: "
954 "elapsed time %ld < retry interval %ld",
955 (long)delay.tv_sec,gs.special->restart.interval);
956 break;
957 }
958 }
959 run_job(&gs.restart,"restart",gs.restart_command,0,1);
960 break;
961 default:
962 zlog_err("error: unknown restart mode %d",gs.mode);
963 break;
964 }
965 }
966
967 static int
968 wakeup_unresponsive(struct thread *t_wakeup)
969 {
970 struct daemon *dmn = THREAD_ARG(t_wakeup);
971
972 dmn->t_wakeup = NULL;
973 if (dmn->state != DAEMON_UNRESPONSIVE)
974 zlog_err("%s: no longer unresponsive (now %s), "
975 "wakeup should have been cancelled!",
976 dmn->name,state_str[dmn->state]);
977 else
978 {
979 SET_WAKEUP_UNRESPONSIVE(dmn);
980 try_restart(dmn);
981 }
982 return 0;
983 }
984
985 static int
986 wakeup_no_answer(struct thread *t_wakeup)
987 {
988 struct daemon *dmn = THREAD_ARG(t_wakeup);
989
990 dmn->t_wakeup = NULL;
991 dmn->state = DAEMON_UNRESPONSIVE;
992 zlog_err("%s state -> unresponsive : no response yet to ping "
993 "sent %ld seconds ago",dmn->name,gs.timeout);
994 if (gs.unresponsive_restart)
995 {
996 SET_WAKEUP_UNRESPONSIVE(dmn);
997 try_restart(dmn);
998 }
999 return 0;
1000 }
1001
1002 static int
1003 wakeup_send_echo(struct thread *t_wakeup)
1004 {
1005 static const char echocmd[] = "echo " PING_TOKEN;
1006 ssize_t rc;
1007 struct daemon *dmn = THREAD_ARG(t_wakeup);
1008
1009 dmn->t_wakeup = NULL;
1010 if (((rc = write(dmn->fd,echocmd,sizeof(echocmd))) < 0) ||
1011 ((size_t)rc != sizeof(echocmd)))
1012 {
1013 char why[100+sizeof(echocmd)];
1014 snprintf(why,sizeof(why),"write '%s' returned %d instead of %u",
1015 echocmd,(int)rc,(u_int)sizeof(echocmd));
1016 daemon_down(dmn,why);
1017 }
1018 else
1019 {
1020 gettimeofday(&dmn->echo_sent,NULL);
1021 dmn->t_wakeup = thread_add_timer(master,wakeup_no_answer,dmn,gs.timeout);
1022 }
1023 return 0;
1024 }
1025
1026 static void
1027 sigint(void)
1028 {
1029 zlog_notice("Terminating on signal");
1030 systemd_send_stopping ();
1031 exit(0);
1032 }
1033
1034 static int
1035 valid_command(const char *cmd)
1036 {
1037 char *p;
1038
1039 return ((p = strchr(cmd,'%')) != NULL) && (*(p+1) == 's') && !strchr(p+1,'%');
1040 }
1041
1042 /* This is an ugly hack to circumvent problems with passing command-line
1043 arguments that contain spaces. The fix is to use a configuration file. */
1044 static char *
1045 translate_blanks(const char *cmd, const char *blankstr)
1046 {
1047 char *res;
1048 char *p;
1049 size_t bslen = strlen(blankstr);
1050
1051 if (!(res = strdup(cmd)))
1052 {
1053 perror("strdup");
1054 exit(1);
1055 }
1056 while ((p = strstr(res,blankstr)) != NULL)
1057 {
1058 *p = ' ';
1059 if (bslen != 1)
1060 memmove(p+1,p+bslen,strlen(p+bslen)+1);
1061 }
1062 return res;
1063 }
1064
1065 struct zebra_privs_t watchfrr_privs =
1066 {
1067 #ifdef VTY_GROUP
1068 .vty_group = VTY_GROUP,
1069 #endif
1070 };
1071
1072 int
1073 main(int argc, char **argv)
1074 {
1075 const char *progname;
1076 int opt;
1077 int daemon_mode = 0;
1078 const char *pidfile = DEFAULT_PIDFILE;
1079 const char *special = "zebra";
1080 const char *blankstr = NULL;
1081 static struct quagga_signal_t my_signals[] =
1082 {
1083 {
1084 .signal = SIGINT,
1085 .handler = sigint,
1086 },
1087 {
1088 .signal = SIGTERM,
1089 .handler = sigint,
1090 },
1091 {
1092 .signal = SIGCHLD,
1093 .handler = sigchild,
1094 },
1095 };
1096
1097 if ((progname = strrchr (argv[0], '/')) != NULL)
1098 progname++;
1099 else
1100 progname = argv[0];
1101
1102 gs.restart.name = "all";
1103 while ((opt = getopt_long(argc, argv, "aAb:dek:l:m:M:i:p:r:R:S:s:t:T:zvh",
1104 longopts, 0)) != EOF)
1105 {
1106 switch (opt)
1107 {
1108 case 0:
1109 break;
1110 case 'a':
1111 if ((gs.mode != MODE_MONITOR) && (gs.mode != MODE_SEPARATE_RESTART))
1112 {
1113 fputs("Ambiguous operating mode selected.\n",stderr);
1114 return usage(progname,1);
1115 }
1116 gs.mode = MODE_PHASED_ZEBRA_RESTART;
1117 break;
1118 case 'A':
1119 if ((gs.mode != MODE_MONITOR) && (gs.mode != MODE_SEPARATE_RESTART))
1120 {
1121 fputs("Ambiguous operating mode selected.\n",stderr);
1122 return usage(progname,1);
1123 }
1124 gs.mode = MODE_PHASED_ALL_RESTART;
1125 break;
1126 case 'b':
1127 blankstr = optarg;
1128 break;
1129 case 'd':
1130 daemon_mode = 1;
1131 break;
1132 case 'e':
1133 gs.do_ping = 0;
1134 break;
1135 case 'k':
1136 if (!valid_command(optarg))
1137 {
1138 fprintf(stderr,"Invalid kill command, must contain '%%s': %s\n",
1139 optarg);
1140 return usage(progname,1);
1141 }
1142 gs.stop_command = optarg;
1143 break;
1144 case 'l':
1145 {
1146 char garbage[3];
1147 if ((sscanf(optarg,"%d%1s",&gs.loglevel,garbage) != 1) ||
1148 (gs.loglevel < LOG_EMERG))
1149 {
1150 fprintf(stderr,"Invalid loglevel argument: %s\n",optarg);
1151 return usage(progname,1);
1152 }
1153 }
1154 break;
1155 case 'm':
1156 {
1157 char garbage[3];
1158 if ((sscanf(optarg,"%ld%1s",
1159 &gs.min_restart_interval,garbage) != 1) ||
1160 (gs.min_restart_interval < 0))
1161 {
1162 fprintf(stderr,"Invalid min_restart_interval argument: %s\n",
1163 optarg);
1164 return usage(progname,1);
1165 }
1166 }
1167 break;
1168 case 'M':
1169 {
1170 char garbage[3];
1171 if ((sscanf(optarg,"%ld%1s",
1172 &gs.max_restart_interval,garbage) != 1) ||
1173 (gs.max_restart_interval < 0))
1174 {
1175 fprintf(stderr,"Invalid max_restart_interval argument: %s\n",
1176 optarg);
1177 return usage(progname,1);
1178 }
1179 }
1180 break;
1181 case 'i':
1182 {
1183 char garbage[3];
1184 int period;
1185 if ((sscanf(optarg,"%d%1s",&period,garbage) != 1) ||
1186 (gs.period < 1))
1187 {
1188 fprintf(stderr,"Invalid interval argument: %s\n",optarg);
1189 return usage(progname,1);
1190 }
1191 gs.period = 1000*period;
1192 }
1193 break;
1194 case 'p':
1195 pidfile = optarg;
1196 break;
1197 case 'r':
1198 if ((gs.mode == MODE_GLOBAL_RESTART) ||
1199 (gs.mode == MODE_SEPARATE_RESTART))
1200 {
1201 fputs("Ambiguous operating mode selected.\n",stderr);
1202 return usage(progname,1);
1203 }
1204 if (!valid_command(optarg))
1205 {
1206 fprintf(stderr,
1207 "Invalid restart command, must contain '%%s': %s\n",
1208 optarg);
1209 return usage(progname,1);
1210 }
1211 gs.restart_command = optarg;
1212 if (gs.mode == MODE_MONITOR)
1213 gs.mode = MODE_SEPARATE_RESTART;
1214 break;
1215 case 'R':
1216 if (gs.mode != MODE_MONITOR)
1217 {
1218 fputs("Ambiguous operating mode selected.\n",stderr);
1219 return usage(progname,1);
1220 }
1221 if (strchr(optarg,'%'))
1222 {
1223 fprintf(stderr,
1224 "Invalid restart-all arg, must not contain '%%s': %s\n",
1225 optarg);
1226 return usage(progname,1);
1227 }
1228 gs.restart_command = optarg;
1229 gs.mode = MODE_GLOBAL_RESTART;
1230 break;
1231 case 's':
1232 if (!valid_command(optarg))
1233 {
1234 fprintf(stderr,"Invalid start command, must contain '%%s': %s\n",
1235 optarg);
1236 return usage(progname,1);
1237 }
1238 gs.start_command = optarg;
1239 break;
1240 case 'S':
1241 gs.vtydir = optarg;
1242 break;
1243 case 't':
1244 {
1245 char garbage[3];
1246 if ((sscanf(optarg,"%ld%1s",&gs.timeout,garbage) != 1) ||
1247 (gs.timeout < 1))
1248 {
1249 fprintf(stderr,"Invalid timeout argument: %s\n",optarg);
1250 return usage(progname,1);
1251 }
1252 }
1253 break;
1254 case 'T':
1255 {
1256 char garbage[3];
1257 if ((sscanf(optarg,"%ld%1s",&gs.restart_timeout,garbage) != 1) ||
1258 (gs.restart_timeout < 1))
1259 {
1260 fprintf(stderr,"Invalid restart timeout argument: %s\n",optarg);
1261 return usage(progname,1);
1262 }
1263 }
1264 break;
1265 case 'z':
1266 gs.unresponsive_restart = 1;
1267 break;
1268 case 'v':
1269 printf ("%s version %s\n", progname, FRR_VERSION);
1270 puts("Copyright 2004 Andrew J. Schorr");
1271 return 0;
1272 case 'h':
1273 return usage(progname,0);
1274 default:
1275 fputs("Invalid option.\n",stderr);
1276 return usage(progname,1);
1277 }
1278 }
1279
1280 if (gs.unresponsive_restart && (gs.mode == MODE_MONITOR))
1281 {
1282 fputs("Option -z requires a -r or -R restart option.\n",stderr);
1283 return usage(progname,1);
1284 }
1285 switch (gs.mode)
1286 {
1287 case MODE_MONITOR:
1288 if (gs.restart_command || gs.start_command || gs.stop_command)
1289 {
1290 fprintf(stderr,"No kill/(re)start commands needed for %s mode.\n",
1291 mode_str[gs.mode]);
1292 return usage(progname,1);
1293 }
1294 break;
1295 case MODE_GLOBAL_RESTART:
1296 case MODE_SEPARATE_RESTART:
1297 if (!gs.restart_command || gs.start_command || gs.stop_command)
1298 {
1299 fprintf(stderr,"No start/kill commands needed in [%s] mode.\n",
1300 mode_str[gs.mode]);
1301 return usage(progname,1);
1302 }
1303 break;
1304 case MODE_PHASED_ZEBRA_RESTART:
1305 case MODE_PHASED_ALL_RESTART:
1306 if (!gs.restart_command || !gs.start_command || !gs.stop_command)
1307 {
1308 fprintf(stderr,
1309 "Need start, kill, and restart commands in [%s] mode.\n",
1310 mode_str[gs.mode]);
1311 return usage(progname,1);
1312 }
1313 break;
1314 }
1315
1316 if (blankstr)
1317 {
1318 if (gs.restart_command)
1319 gs.restart_command = translate_blanks(gs.restart_command,blankstr);
1320 if (gs.start_command)
1321 gs.start_command = translate_blanks(gs.start_command,blankstr);
1322 if (gs.stop_command)
1323 gs.stop_command = translate_blanks(gs.stop_command,blankstr);
1324 }
1325
1326 gs.restart.interval = gs.min_restart_interval;
1327
1328 zprivs_init (&watchfrr_privs);
1329
1330 master = thread_master_create();
1331 cmd_init(-1);
1332 memory_init();
1333 vty_init(master);
1334 watchfrr_vty_init();
1335 vty_serv_sock(NULL, 0, WATCHFRR_VTYSH_PATH);
1336
1337 signal_init (master, array_size(my_signals), my_signals);
1338 srandom(time(NULL));
1339
1340 {
1341 int i;
1342 struct daemon *tail = NULL;
1343
1344 for (i = optind; i < argc; i++)
1345 {
1346 struct daemon *dmn;
1347
1348 if (!(dmn = (struct daemon *)calloc(1,sizeof(*dmn))))
1349 {
1350 fprintf(stderr,"calloc(1,%u) failed: %s\n",
1351 (u_int)sizeof(*dmn), safe_strerror(errno));
1352 return 1;
1353 }
1354 dmn->name = dmn->restart.name = argv[i];
1355 dmn->state = DAEMON_INIT;
1356 gs.numdaemons++;
1357 gs.numdown++;
1358 dmn->fd = -1;
1359 dmn->t_wakeup = thread_add_timer_msec(master,wakeup_init,dmn,
1360 100+(random() % 900));
1361 dmn->restart.interval = gs.min_restart_interval;
1362 if (tail)
1363 tail->next = dmn;
1364 else
1365 gs.daemons = dmn;
1366 tail = dmn;
1367
1368 if (((gs.mode == MODE_PHASED_ZEBRA_RESTART) ||
1369 (gs.mode == MODE_PHASED_ALL_RESTART)) &&
1370 !strcmp(dmn->name,special))
1371 gs.special = dmn;
1372 }
1373 }
1374 if (!gs.daemons)
1375 {
1376 fputs("Must specify one or more daemons to monitor.\n",stderr);
1377 return usage(progname,1);
1378 }
1379 if (((gs.mode == MODE_PHASED_ZEBRA_RESTART) ||
1380 (gs.mode == MODE_PHASED_ALL_RESTART)) && !gs.special)
1381 {
1382 fprintf(stderr,"In mode [%s], but cannot find master daemon %s\n",
1383 mode_str[gs.mode],special);
1384 return usage(progname,1);
1385 }
1386
1387 zlog_default = openzlog(progname, ZLOG_WATCHFRR, 0,
1388 LOG_CONS|LOG_NDELAY|LOG_PID, LOG_DAEMON);
1389 zlog_set_level(NULL, ZLOG_DEST_MONITOR, ZLOG_DISABLED);
1390 if (daemon_mode)
1391 {
1392 zlog_set_level(NULL, ZLOG_DEST_SYSLOG, MIN(gs.loglevel,LOG_DEBUG));
1393 if (daemon (0, 0) < 0)
1394 {
1395 fprintf(stderr, "Watchfrr daemon failed: %s", strerror(errno));
1396 exit (1);
1397 }
1398 }
1399 else
1400 zlog_set_level(NULL, ZLOG_DEST_STDOUT, MIN(gs.loglevel,LOG_DEBUG));
1401
1402 /* Make sure we're not already running. */
1403 pid_output (pidfile);
1404
1405 /* Announce which daemons are being monitored. */
1406 {
1407 struct daemon *dmn;
1408 size_t len = 0;
1409
1410 for (dmn = gs.daemons; dmn; dmn = dmn->next)
1411 len += strlen(dmn->name)+1;
1412
1413 {
1414 char buf[len+1];
1415 char *p = buf;
1416
1417 for (dmn = gs.daemons; dmn; dmn = dmn->next)
1418 {
1419 if (p != buf)
1420 *p++ = ' ';
1421 strcpy(p,dmn->name);
1422 p += strlen(p);
1423 }
1424 zlog_notice("%s %s watching [%s], mode [%s]",
1425 progname, FRR_VERSION, buf, mode_str[gs.mode]);
1426 }
1427 }
1428
1429 {
1430 struct thread thread;
1431
1432 while (thread_fetch (master, &thread))
1433 thread_call (&thread);
1434 }
1435
1436 systemd_send_stopping ();
1437 /* Not reached. */
1438 return 0;
1439 }