2 * Monitor status of frr daemons and restart if necessary.
4 * Copyright (C) 2004 Andrew J. Schorr
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License along
17 * with this program; see the file COPYING; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
26 #include <lib/version.h>
29 #include "lib_errors.h"
30 #include "zlog_targets.h"
41 #include "watchfrr_errors.h"
44 #define MIN(X,Y) (((X) <= (Y)) ? (X) : (Y))
47 /* Macros to help randomize timers. */
48 #define JITTER(X) ((frr_weak_random() % ((X)+1))-((X)/2))
49 #define FUZZY(X) ((X)+JITTER((X)/20))
51 #define DEFAULT_PERIOD 5
52 #define DEFAULT_TIMEOUT 90
53 #define DEFAULT_RESTART_TIMEOUT 20
54 #define DEFAULT_LOGLEVEL LOG_INFO
55 #define DEFAULT_MIN_RESTART 60
56 #define DEFAULT_MAX_RESTART 600
57 #define DEFAULT_OPERATIONAL_TIMEOUT 60
59 #define DEFAULT_RESTART_CMD WATCHFRR_SH_PATH " restart %s"
60 #define DEFAULT_START_CMD WATCHFRR_SH_PATH " start %s"
61 #define DEFAULT_STOP_CMD WATCHFRR_SH_PATH " stop %s"
63 #define PING_TOKEN "PING"
65 DEFINE_MGROUP(WATCHFRR
, "watchfrr");
66 DEFINE_MTYPE_STATIC(WATCHFRR
, WATCHFRR_DAEMON
, "watchfrr daemon entry");
68 /* Needs to be global, referenced somewhere inside libfrr. */
69 struct thread_master
*master
;
71 static bool watch_only
= false;
72 const char *pathspace
;
79 PHASE_ZEBRA_RESTART_PENDING
,
80 PHASE_WAITING_ZEBRA_UP
83 static const char *const phase_str
[] = {
87 "Waiting for other daemons to come down",
88 "Zebra restart job running",
89 "Waiting for zebra to come up",
93 #define PHASE_TIMEOUT (3*gs.restart_timeout)
94 #define STARTUP_TIMEOUT 55 * 1000
102 struct thread
*t_kill
;
106 static struct global_state
{
107 enum restart_phase phase
;
108 struct thread
*t_phase_hanging
;
109 struct thread
*t_startup_timeout
;
110 struct thread
*t_operational
;
114 long restart_timeout
;
115 bool reading_configuration
;
116 long min_restart_interval
;
117 long max_restart_interval
;
118 long operational_timeout
;
119 struct daemon
*daemons
;
120 const char *restart_command
;
121 const char *start_command
;
122 const char *stop_command
;
123 struct restart_info restart
;
125 struct daemon
*special
; /* points to zebra when doing phased restart */
128 int numdown
; /* # of daemons that are not UP or UNRESPONSIVE */
131 .vtydir
= frr_vtydir
,
132 .period
= 1000 * DEFAULT_PERIOD
,
133 .timeout
= DEFAULT_TIMEOUT
,
134 .restart_timeout
= DEFAULT_RESTART_TIMEOUT
,
135 .loglevel
= DEFAULT_LOGLEVEL
,
136 .min_restart_interval
= DEFAULT_MIN_RESTART
,
137 .max_restart_interval
= DEFAULT_MAX_RESTART
,
138 .operational_timeout
= DEFAULT_OPERATIONAL_TIMEOUT
,
139 .restart_command
= DEFAULT_RESTART_CMD
,
140 .start_command
= DEFAULT_START_CMD
,
141 .stop_command
= DEFAULT_STOP_CMD
,
153 (((DMN)->state == DAEMON_UP) || ((DMN)->state == DAEMON_UNRESPONSIVE))
155 static const char *const state_str
[] = {
156 "Init", "Down", "Connecting", "Up", "Unresponsive",
161 enum daemon_state state
;
163 struct timeval echo_sent
;
164 unsigned int connect_tries
;
165 struct thread
*t_wakeup
;
166 struct thread
*t_read
;
167 struct thread
*t_write
;
169 struct restart_info restart
;
172 * For a given daemon, if we've turned on ignore timeouts
173 * ignore the timeout value and assume everything is ok
174 * This is for daemon debugging w/ gdb after we have started
175 * FRR and realize we have something that needs to be looked
181 #define OPTION_MINRESTART 2000
182 #define OPTION_MAXRESTART 2001
183 #define OPTION_DRY 2002
184 #define OPTION_NETNS 2003
185 #define OPTION_MAXOPERATIONAL 2004
187 static const struct option longopts
[] = {
188 {"daemon", no_argument
, NULL
, 'd'},
189 {"statedir", required_argument
, NULL
, 'S'},
190 {"loglevel", required_argument
, NULL
, 'l'},
191 {"interval", required_argument
, NULL
, 'i'},
192 {"timeout", required_argument
, NULL
, 't'},
193 {"restart-timeout", required_argument
, NULL
, 'T'},
194 {"restart", required_argument
, NULL
, 'r'},
195 {"start-command", required_argument
, NULL
, 's'},
196 {"kill-command", required_argument
, NULL
, 'k'},
197 {"dry", no_argument
, NULL
, OPTION_DRY
},
198 {"min-restart-interval", required_argument
, NULL
, OPTION_MINRESTART
},
199 {"max-restart-interval", required_argument
, NULL
, OPTION_MAXRESTART
},
200 {"operational-timeout", required_argument
, NULL
, OPTION_MAXOPERATIONAL
},
201 {"pid-file", required_argument
, NULL
, 'p'},
202 {"blank-string", required_argument
, NULL
, 'b'},
204 {"netns", optional_argument
, NULL
, OPTION_NETNS
},
206 {"help", no_argument
, NULL
, 'h'},
207 {"version", no_argument
, NULL
, 'v'},
210 static int try_connect(struct daemon
*dmn
);
211 static void wakeup_send_echo(struct thread
*t_wakeup
);
212 static void try_restart(struct daemon
*dmn
);
213 static void phase_check(void);
214 static void restart_done(struct daemon
*dmn
);
216 static const char *progname
;
218 void watchfrr_set_ignore_daemon(struct vty
*vty
, const char *dname
, bool ignore
)
222 for (dmn
= gs
.daemons
; dmn
; dmn
= dmn
->next
) {
223 if (strncmp(dmn
->name
, dname
, strlen(dmn
->name
)) == 0)
228 dmn
->ignore_timeout
= ignore
;
229 vty_out(vty
, "%s switching to %s\n", dmn
->name
,
230 ignore
? "ignore" : "watch");
232 vty_out(vty
, "%s is not configured for running at the moment",
236 static void printhelp(FILE *target
)
239 "Usage : %s [OPTION...] <daemon name> ...\n\n\
240 Watchdog program to monitor status of frr daemons and try to restart\n\
241 them if they are down or unresponsive. It determines whether a daemon is\n\
242 up based on whether it can connect to the daemon's vty unix stream socket.\n\
243 It then repeatedly sends echo commands over that socket to determine whether\n\
244 the daemon is responsive. If the daemon crashes, we will receive an EOF\n\
245 on the socket connection and know immediately that the daemon is down.\n\n\
246 The daemons to be monitored should be listed on the command line.\n\n\
247 In order to avoid attempting to restart the daemons in a fast loop,\n\
248 the -m and -M options allow you to control the minimum delay between\n\
249 restart commands. The minimum restart delay is recalculated each time\n\
250 a restart is attempted: if the time since the last restart attempt exceeds\n\
251 twice the -M value, then the restart delay is set to the -m value.\n\
252 Otherwise, the interval is doubled (but capped at the -M value).\n\n",
257 -d, --daemon Run in daemon mode. In this mode, error messages are sent\n\
258 to syslog instead of stdout.\n\
259 -S, --statedir Set the vty socket directory (default is %s)\n\
260 -N, --pathspace Insert prefix into config & socket paths\n"
262 " --netns Create and/or use Linux network namespace. If no name is\n"
263 " given, uses the value from `-N`.\n"
265 "-l, --loglevel Set the logging level (default is %d).\n\
266 The value should range from %d (LOG_EMERG) to %d (LOG_DEBUG),\n\
267 but it can be set higher than %d if extra-verbose debugging\n\
268 messages are desired.\n\
269 --min-restart-interval\n\
270 Set the minimum seconds to wait between invocations of daemon\n\
271 restart commands (default is %d).\n\
272 --max-restart-interval\n\
273 Set the maximum seconds to wait between invocations of daemon\n\
274 restart commands (default is %d).\n\
275 --operational-timeout\n\
276 Set the time before systemd is notified that we are considered\n\
277 operational again after a daemon restart (default is %d).\n\
278 -i, --interval Set the status polling interval in seconds (default is %d)\n\
279 -t, --timeout Set the unresponsiveness timeout in seconds (default is %d)\n\
280 -T, --restart-timeout\n\
281 Set the restart (kill) timeout in seconds (default is %d).\n\
282 If any background jobs are still running after this much\n\
283 time has elapsed, they will be killed.\n\
284 -r, --restart Supply a Bourne shell command to use to restart a single\n\
285 daemon. The command string should include '%%s' where the\n\
286 name of the daemon should be substituted.\n\
288 -s, --start-command\n\
289 Supply a Bourne shell to command to use to start a single\n\
290 daemon. The command string should include '%%s' where the\n\
291 name of the daemon should be substituted.\n\
293 -k, --kill-command\n\
294 Supply a Bourne shell to command to use to stop a single\n\
295 daemon. The command string should include '%%s' where the\n\
296 name of the daemon should be substituted.\n\
298 --dry Do not start or restart anything, just log.\n\
299 -p, --pid-file Set process identifier file name\n\
300 (default is %s/watchfrr.pid).\n\
301 -b, --blank-string\n\
302 When the supplied argument string is found in any of the\n\
303 various shell command arguments (-r, -s, or -k), replace\n\
304 it with a space. This is an ugly hack to circumvent problems\n\
305 passing command-line arguments with embedded spaces.\n\
306 -v, --version Print program version\n\
307 -h, --help Display this help and exit\n",
308 frr_vtydir
, DEFAULT_LOGLEVEL
, LOG_EMERG
, LOG_DEBUG
, LOG_DEBUG
,
309 DEFAULT_MIN_RESTART
, DEFAULT_MAX_RESTART
,
310 DEFAULT_OPERATIONAL_TIMEOUT
, DEFAULT_PERIOD
, DEFAULT_TIMEOUT
,
311 DEFAULT_RESTART_TIMEOUT
, DEFAULT_RESTART_CMD
, DEFAULT_START_CMD
,
312 DEFAULT_STOP_CMD
, frr_vtydir
);
315 static pid_t
run_background(char *shell_cmd
)
319 switch (child
= fork()) {
321 flog_err_sys(EC_LIB_SYSTEM_CALL
,
322 "fork failed, cannot run command [%s]: %s",
323 shell_cmd
, safe_strerror(errno
));
327 /* Use separate process group so child processes can be killed
329 if (setpgid(0, 0) < 0)
330 zlog_warn("setpgid(0,0) failed: %s",
331 safe_strerror(errno
));
335 char *const argv
[4] = {shell
, dashc
, shell_cmd
, NULL
};
336 execv("/bin/sh", argv
);
337 flog_err_sys(EC_LIB_SYSTEM_CALL
,
338 "execv(/bin/sh -c '%s') failed: %s",
339 shell_cmd
, safe_strerror(errno
));
343 /* Parent process: we will reap the child later. */
344 zlog_info("Forked background command [pid %d]: %s", (int)child
,
350 static struct timeval
*time_elapsed(struct timeval
*result
,
351 const struct timeval
*start_time
)
353 gettimeofday(result
, NULL
);
354 result
->tv_sec
-= start_time
->tv_sec
;
355 result
->tv_usec
-= start_time
->tv_usec
;
356 while (result
->tv_usec
< 0) {
357 result
->tv_usec
+= 1000000L;
363 static void restart_kill(struct thread
*t_kill
)
365 struct restart_info
*restart
= THREAD_ARG(t_kill
);
366 struct timeval delay
;
368 time_elapsed(&delay
, &restart
->time
);
370 if (gs
.reading_configuration
) {
372 "%s %s child process appears to still be reading configuration, delaying for another %lu time",
373 restart
->what
, restart
->name
, gs
.restart_timeout
);
374 thread_add_timer(master
, restart_kill
, restart
,
375 gs
.restart_timeout
, &restart
->t_kill
);
380 "%s %s child process %d still running after %ld seconds, sending signal %d",
381 restart
->what
, restart
->name
, (int)restart
->pid
,
382 (long)delay
.tv_sec
, (restart
->kills
? SIGKILL
: SIGTERM
));
383 kill(-restart
->pid
, (restart
->kills
? SIGKILL
: SIGTERM
));
385 thread_add_timer(master
, restart_kill
, restart
, gs
.restart_timeout
,
389 static struct restart_info
*find_child(pid_t child
)
392 if (gs
.restart
.pid
== child
)
395 for (dmn
= gs
.daemons
; dmn
; dmn
= dmn
->next
) {
396 if (dmn
->restart
.pid
== child
)
397 return &dmn
->restart
;
402 static void sigchild(void)
408 struct restart_info
*restart
;
411 switch (child
= waitpid(-1, &status
, WNOHANG
)) {
413 flog_err_sys(EC_LIB_SYSTEM_CALL
, "waitpid failed: %s",
414 safe_strerror(errno
));
417 zlog_warn("SIGCHLD received, but waitpid did not reap a child");
421 if (child
== integrated_write_pid
) {
422 integrated_write_sigchld(status
);
426 if ((restart
= find_child(child
)) != NULL
) {
427 name
= restart
->name
;
428 what
= restart
->what
;
431 thread_cancel(&restart
->t_kill
);
433 /* Update restart time to reflect the time the command
435 gettimeofday(&restart
->time
, NULL
);
439 "waitpid returned status for an unknown child process %d",
444 if (WIFSTOPPED(status
))
445 zlog_warn("%s %s process %d is stopped", what
, name
,
447 else if (WIFSIGNALED(status
))
448 zlog_warn("%s %s process %d terminated due to signal %d", what
,
449 name
, (int)child
, WTERMSIG(status
));
450 else if (WIFEXITED(status
)) {
451 if (WEXITSTATUS(status
) != 0)
453 "%s %s process %d exited with non-zero status %d",
454 what
, name
, (int)child
, WEXITSTATUS(status
));
456 zlog_debug("%s %s process %d exited normally", what
,
459 if (restart
&& restart
!= &gs
.restart
) {
460 dmn
= container_of(restart
, struct daemon
,
464 for (dmn
= gs
.daemons
; dmn
; dmn
= dmn
->next
)
470 "cannot interpret %s %s process %d wait status 0x%x",
471 what
, name
, (int)child
, status
);
475 static int run_job(struct restart_info
*restart
, const char *cmdtype
,
476 const char *command
, int force
, int update_interval
)
478 struct timeval delay
;
480 if (gs
.loglevel
> LOG_DEBUG
+ 1)
481 zlog_debug("attempting to %s %s", cmdtype
, restart
->name
);
484 if (gs
.loglevel
> LOG_DEBUG
+ 1)
486 "cannot %s %s, previous pid %d still running",
487 cmdtype
, restart
->name
, (int)restart
->pid
);
493 snprintf(buffer
, sizeof(buffer
), "restarting %s", restart
->name
);
494 systemd_send_status(buffer
);
496 /* Note: time_elapsed test must come before the force test, since we
498 to make sure that delay is initialized for use below in updating the
500 if ((time_elapsed(&delay
, &restart
->time
)->tv_sec
< restart
->interval
)
503 if (gs
.loglevel
> LOG_DEBUG
+ 1)
505 "postponing %s %s: elapsed time %ld < retry interval %ld",
506 cmdtype
, restart
->name
, (long)delay
.tv_sec
,
511 gettimeofday(&restart
->time
, NULL
);
514 char cmd
[strlen(command
) + strlen(restart
->name
) + 1];
515 snprintf(cmd
, sizeof(cmd
), command
, restart
->name
);
516 if ((restart
->pid
= run_background(cmd
)) > 0) {
517 thread_add_timer(master
, restart_kill
, restart
,
518 gs
.restart_timeout
, &restart
->t_kill
);
519 restart
->what
= cmdtype
;
525 /* Calculate the new restart interval. */
526 if (update_interval
) {
527 if (delay
.tv_sec
> 2 * gs
.max_restart_interval
)
528 restart
->interval
= gs
.min_restart_interval
;
529 else if ((restart
->interval
*= 2) > gs
.max_restart_interval
)
530 restart
->interval
= gs
.max_restart_interval
;
531 if (gs
.loglevel
> LOG_DEBUG
+ 1)
532 zlog_debug("restart %s interval is now %ld",
533 restart
->name
, restart
->interval
);
538 #define SET_READ_HANDLER(DMN) \
540 (DMN)->t_read = NULL; \
541 thread_add_read(master, handle_read, (DMN), (DMN)->fd, \
545 #define SET_WAKEUP_DOWN(DMN) \
547 (DMN)->t_wakeup = NULL; \
548 thread_add_timer_msec(master, wakeup_down, (DMN), \
549 FUZZY(gs.period), &(DMN)->t_wakeup); \
552 #define SET_WAKEUP_UNRESPONSIVE(DMN) \
554 (DMN)->t_wakeup = NULL; \
555 thread_add_timer_msec(master, wakeup_unresponsive, (DMN), \
556 FUZZY(gs.period), &(DMN)->t_wakeup); \
559 #define SET_WAKEUP_ECHO(DMN) \
561 (DMN)->t_wakeup = NULL; \
562 thread_add_timer_msec(master, wakeup_send_echo, (DMN), \
563 FUZZY(gs.period), &(DMN)->t_wakeup); \
566 static void wakeup_down(struct thread
*t_wakeup
)
568 struct daemon
*dmn
= THREAD_ARG(t_wakeup
);
570 dmn
->t_wakeup
= NULL
;
571 if (try_connect(dmn
) < 0)
572 SET_WAKEUP_DOWN(dmn
);
573 if ((dmn
->connect_tries
> 1) && (dmn
->state
!= DAEMON_UP
))
577 static void wakeup_init(struct thread
*t_wakeup
)
579 struct daemon
*dmn
= THREAD_ARG(t_wakeup
);
581 dmn
->t_wakeup
= NULL
;
582 if (try_connect(dmn
) < 0) {
584 "%s state -> down : initial connection attempt failed",
586 dmn
->state
= DAEMON_DOWN
;
591 static void restart_done(struct daemon
*dmn
)
593 if (dmn
->state
!= DAEMON_DOWN
) {
595 "Daemon: %s: is in %s state but expected it to be in DAEMON_DOWN state",
596 dmn
->name
, state_str
[dmn
->state
]);
599 THREAD_OFF(dmn
->t_wakeup
);
601 if (try_connect(dmn
) < 0)
602 SET_WAKEUP_DOWN(dmn
);
605 static void daemon_restarting_operational(struct thread
*thread
)
607 systemd_send_status("FRR Operational");
610 static void daemon_down(struct daemon
*dmn
, const char *why
)
612 if (IS_UP(dmn
) || (dmn
->state
== DAEMON_INIT
))
613 flog_err(EC_WATCHFRR_CONNECTION
, "%s state -> down : %s",
615 else if (gs
.loglevel
> LOG_DEBUG
)
616 zlog_debug("%s still down : %s", dmn
->name
, why
);
619 dmn
->state
= DAEMON_DOWN
;
624 THREAD_OFF(dmn
->t_read
);
625 THREAD_OFF(dmn
->t_write
);
626 THREAD_OFF(dmn
->t_wakeup
);
627 if (try_connect(dmn
) < 0)
628 SET_WAKEUP_DOWN(dmn
);
630 systemd_send_status("FRR partially operational");
634 static void handle_read(struct thread
*t_read
)
636 struct daemon
*dmn
= THREAD_ARG(t_read
);
637 static const char resp
[sizeof(PING_TOKEN
) + 4] = PING_TOKEN
"\n";
638 char buf
[sizeof(resp
) + 100];
640 struct timeval delay
;
643 if ((rc
= read(dmn
->fd
, buf
, sizeof(buf
))) < 0) {
646 if (ERRNO_IO_RETRY(errno
)) {
647 /* Pretend it never happened. */
648 SET_READ_HANDLER(dmn
);
651 snprintf(why
, sizeof(why
), "unexpected read error: %s",
652 safe_strerror(errno
));
653 daemon_down(dmn
, why
);
657 daemon_down(dmn
, "read returned EOF");
660 if (!dmn
->echo_sent
.tv_sec
) {
661 char why
[sizeof(buf
) + 100];
662 snprintf(why
, sizeof(why
),
663 "unexpected read returns %d bytes: %.*s", (int)rc
,
665 daemon_down(dmn
, why
);
669 /* We are expecting an echo response: is there any chance that the
670 response would not be returned entirely in the first read? That
671 seems inconceivable... */
672 if ((rc
!= sizeof(resp
)) || memcmp(buf
, resp
, sizeof(resp
))) {
673 char why
[100 + sizeof(buf
)];
674 snprintf(why
, sizeof(why
),
675 "read returned bad echo response of %d bytes (expecting %u): %.*s",
676 (int)rc
, (unsigned int)sizeof(resp
), (int)rc
, buf
);
677 daemon_down(dmn
, why
);
681 time_elapsed(&delay
, &dmn
->echo_sent
);
682 dmn
->echo_sent
.tv_sec
= 0;
683 if (dmn
->state
== DAEMON_UNRESPONSIVE
) {
684 if (delay
.tv_sec
< gs
.timeout
) {
685 dmn
->state
= DAEMON_UP
;
687 "%s state -> up : echo response received after %ld.%06ld seconds",
688 dmn
->name
, (long)delay
.tv_sec
,
689 (long)delay
.tv_usec
);
692 "%s: slow echo response finally received after %ld.%06ld seconds",
693 dmn
->name
, (long)delay
.tv_sec
,
694 (long)delay
.tv_usec
);
695 } else if (gs
.loglevel
> LOG_DEBUG
+ 1)
696 zlog_debug("%s: echo response received after %ld.%06ld seconds",
697 dmn
->name
, (long)delay
.tv_sec
, (long)delay
.tv_usec
);
699 SET_READ_HANDLER(dmn
);
700 thread_cancel(&dmn
->t_wakeup
);
701 SET_WAKEUP_ECHO(dmn
);
705 * Wait till we notice that all daemons are ready before
706 * we send we are ready to systemd
708 static void daemon_send_ready(int exitcode
)
718 zlog_notice("all daemons up, doing startup-complete notify");
719 else if (gs
.numdown
< gs
.numdaemons
)
720 flog_err(EC_WATCHFRR_CONNECTION
,
721 "startup did not complete within timeout (%d/%d daemons running)",
722 gs
.numdaemons
- gs
.numdown
, gs
.numdaemons
);
724 flog_err(EC_WATCHFRR_CONNECTION
,
725 "all configured daemons failed to start -- exiting watchfrr");
732 snprintf(started
, sizeof(started
), "%s/%s", frr_vtydir
,
734 fp
= fopen(started
, "w");
738 systemd_send_started(master
);
739 systemd_send_status("FRR Operational");
743 static void daemon_up(struct daemon
*dmn
, const char *why
)
745 dmn
->state
= DAEMON_UP
;
747 dmn
->connect_tries
= 0;
748 zlog_notice("%s state -> up : %s", dmn
->name
, why
);
749 if (gs
.numdown
== 0) {
750 daemon_send_ready(0);
752 THREAD_OFF(gs
.t_operational
);
754 thread_add_timer(master
, daemon_restarting_operational
, NULL
,
755 gs
.operational_timeout
, &gs
.t_operational
);
758 SET_WAKEUP_ECHO(dmn
);
762 static void check_connect(struct thread
*t_write
)
764 struct daemon
*dmn
= THREAD_ARG(t_write
);
766 socklen_t reslen
= sizeof(sockerr
);
769 if (getsockopt(dmn
->fd
, SOL_SOCKET
, SO_ERROR
, (char *)&sockerr
, &reslen
)
771 zlog_warn("%s: check_connect: getsockopt failed: %s", dmn
->name
,
772 safe_strerror(errno
));
774 "getsockopt failed checking connection success");
777 if ((reslen
== sizeof(sockerr
)) && sockerr
) {
781 "getsockopt reports that connection attempt failed: %s",
782 safe_strerror(sockerr
));
783 daemon_down(dmn
, why
);
787 daemon_up(dmn
, "delayed connect succeeded");
790 static void wakeup_connect_hanging(struct thread
*t_wakeup
)
792 struct daemon
*dmn
= THREAD_ARG(t_wakeup
);
795 dmn
->t_wakeup
= NULL
;
796 snprintf(why
, sizeof(why
),
797 "connection attempt timed out after %ld seconds", gs
.timeout
);
798 daemon_down(dmn
, why
);
801 /* Making connection to protocol daemon. */
802 static int try_connect(struct daemon
*dmn
)
805 struct sockaddr_un addr
;
808 if (gs
.loglevel
> LOG_DEBUG
+ 1)
809 zlog_debug("%s: attempting to connect", dmn
->name
);
810 dmn
->connect_tries
++;
812 memset(&addr
, 0, sizeof(addr
));
813 addr
.sun_family
= AF_UNIX
;
814 snprintf(addr
.sun_path
, sizeof(addr
.sun_path
), "%s/%s.vty", gs
.vtydir
,
816 #ifdef HAVE_STRUCT_SOCKADDR_UN_SUN_LEN
817 len
= addr
.sun_len
= SUN_LEN(&addr
);
819 len
= sizeof(addr
.sun_family
) + strlen(addr
.sun_path
);
820 #endif /* HAVE_STRUCT_SOCKADDR_UN_SUN_LEN */
822 /* Quick check to see if we might succeed before we go to the trouble
823 of creating a socket. */
824 if (access(addr
.sun_path
, W_OK
) < 0) {
826 flog_err_sys(EC_LIB_SYSTEM_CALL
,
827 "%s: access to socket %s denied: %s",
828 dmn
->name
, addr
.sun_path
,
829 safe_strerror(errno
));
833 if ((sock
= socket(AF_UNIX
, SOCK_STREAM
, 0)) < 0) {
834 flog_err_sys(EC_LIB_SOCKET
, "%s(%s): cannot make socket: %s",
835 __func__
, addr
.sun_path
, safe_strerror(errno
));
839 if (set_nonblocking(sock
) < 0 || set_cloexec(sock
) < 0) {
840 flog_err_sys(EC_LIB_SYSTEM_CALL
,
841 "%s(%s): set_nonblocking/cloexec(%d) failed",
842 __func__
, addr
.sun_path
, sock
);
847 if (connect(sock
, (struct sockaddr
*)&addr
, len
) < 0) {
848 if ((errno
!= EINPROGRESS
) && (errno
!= EWOULDBLOCK
)) {
849 if (gs
.loglevel
> LOG_DEBUG
)
850 zlog_debug("%s(%s): connect failed: %s",
851 __func__
, addr
.sun_path
,
852 safe_strerror(errno
));
856 if (gs
.loglevel
> LOG_DEBUG
)
857 zlog_debug("%s: connection in progress", dmn
->name
);
858 dmn
->state
= DAEMON_CONNECTING
;
860 thread_add_write(master
, check_connect
, dmn
, dmn
->fd
,
862 thread_add_timer(master
, wakeup_connect_hanging
, dmn
,
863 gs
.timeout
, &dmn
->t_wakeup
);
864 SET_READ_HANDLER(dmn
);
869 SET_READ_HANDLER(dmn
);
870 daemon_up(dmn
, "connect succeeded");
874 static void phase_hanging(struct thread
*t_hanging
)
876 gs
.t_phase_hanging
= NULL
;
877 flog_err(EC_WATCHFRR_CONNECTION
,
878 "Phase [%s] hanging for %ld seconds, aborting phased restart",
879 phase_str
[gs
.phase
], PHASE_TIMEOUT
);
880 gs
.phase
= PHASE_NONE
;
883 static void set_phase(enum restart_phase new_phase
)
885 gs
.phase
= new_phase
;
886 thread_cancel(&gs
.t_phase_hanging
);
888 thread_add_timer(master
, phase_hanging
, NULL
, PHASE_TIMEOUT
,
889 &gs
.t_phase_hanging
);
892 static void phase_check(void)
901 for (dmn
= gs
.daemons
; dmn
; dmn
= dmn
->next
)
902 if (dmn
->state
== DAEMON_INIT
)
905 /* startup complete, everything out of INIT */
906 gs
.phase
= PHASE_NONE
;
907 for (dmn
= gs
.daemons
; dmn
; dmn
= dmn
->next
)
908 if (dmn
->state
== DAEMON_DOWN
) {
909 SET_WAKEUP_DOWN(dmn
);
913 case PHASE_STOPS_PENDING
:
917 "Phased restart: all routing daemon stop jobs have completed.");
918 set_phase(PHASE_WAITING_DOWN
);
921 case PHASE_WAITING_DOWN
:
922 if (gs
.numdown
+ IS_UP(gs
.special
) < gs
.numdaemons
)
924 systemd_send_status("Phased Restart");
925 zlog_info("Phased restart: all routing daemons now down.");
926 run_job(&gs
.special
->restart
, "restart", gs
.restart_command
, 1,
928 set_phase(PHASE_ZEBRA_RESTART_PENDING
);
931 case PHASE_ZEBRA_RESTART_PENDING
:
932 if (gs
.special
->restart
.pid
)
934 systemd_send_status("Zebra Restarting");
935 zlog_info("Phased restart: %s restart job completed.",
937 set_phase(PHASE_WAITING_ZEBRA_UP
);
940 case PHASE_WAITING_ZEBRA_UP
:
941 if (!IS_UP(gs
.special
))
943 zlog_info("Phased restart: %s is now up.", gs
.special
->name
);
944 for (dmn
= gs
.daemons
; dmn
; dmn
= dmn
->next
) {
945 if (dmn
!= gs
.special
)
946 run_job(&dmn
->restart
, "start",
947 gs
.start_command
, 1, 0);
949 gs
.phase
= PHASE_NONE
;
950 THREAD_OFF(gs
.t_phase_hanging
);
951 zlog_notice("Phased global restart has completed.");
956 static void try_restart(struct daemon
*dmn
)
961 if (dmn
!= gs
.special
) {
962 if ((gs
.special
->state
== DAEMON_UP
)
963 && (gs
.phase
== PHASE_NONE
))
964 run_job(&dmn
->restart
, "restart", gs
.restart_command
, 0,
968 "%s: postponing restart attempt because master %s daemon not up [%s], or phased restart in progress",
969 dmn
->name
, gs
.special
->name
,
970 state_str
[gs
.special
->state
]);
974 if ((gs
.phase
!= PHASE_NONE
) || gs
.numpids
) {
975 if (gs
.loglevel
> LOG_DEBUG
+ 1)
977 "postponing phased global restart: restart already in progress [%s], or outstanding child processes [%d]",
978 phase_str
[gs
.phase
], gs
.numpids
);
981 /* Is it too soon for a restart? */
983 struct timeval delay
;
984 if (time_elapsed(&delay
, &gs
.special
->restart
.time
)->tv_sec
985 < gs
.special
->restart
.interval
) {
986 if (gs
.loglevel
> LOG_DEBUG
+ 1)
988 "postponing phased global restart: elapsed time %ld < retry interval %ld",
990 gs
.special
->restart
.interval
);
994 run_job(&gs
.restart
, "restart", gs
.restart_command
, 0, 1);
997 static void wakeup_unresponsive(struct thread
*t_wakeup
)
999 struct daemon
*dmn
= THREAD_ARG(t_wakeup
);
1001 dmn
->t_wakeup
= NULL
;
1002 if (dmn
->state
!= DAEMON_UNRESPONSIVE
)
1003 flog_err(EC_WATCHFRR_CONNECTION
,
1004 "%s: no longer unresponsive (now %s), wakeup should have been cancelled!",
1005 dmn
->name
, state_str
[dmn
->state
]);
1007 SET_WAKEUP_UNRESPONSIVE(dmn
);
1012 static void wakeup_no_answer(struct thread
*t_wakeup
)
1014 struct daemon
*dmn
= THREAD_ARG(t_wakeup
);
1016 dmn
->t_wakeup
= NULL
;
1017 dmn
->state
= DAEMON_UNRESPONSIVE
;
1018 if (dmn
->ignore_timeout
)
1020 flog_err(EC_WATCHFRR_CONNECTION
,
1021 "%s state -> unresponsive : no response yet to ping sent %ld seconds ago",
1022 dmn
->name
, gs
.timeout
);
1023 SET_WAKEUP_UNRESPONSIVE(dmn
);
1027 static void wakeup_send_echo(struct thread
*t_wakeup
)
1029 static const char echocmd
[] = "echo " PING_TOKEN
;
1031 struct daemon
*dmn
= THREAD_ARG(t_wakeup
);
1033 dmn
->t_wakeup
= NULL
;
1034 if (((rc
= write(dmn
->fd
, echocmd
, sizeof(echocmd
))) < 0)
1035 || ((size_t)rc
!= sizeof(echocmd
))) {
1036 char why
[100 + sizeof(echocmd
)];
1037 snprintf(why
, sizeof(why
),
1038 "write '%s' returned %d instead of %u", echocmd
,
1039 (int)rc
, (unsigned int)sizeof(echocmd
));
1040 daemon_down(dmn
, why
);
1042 gettimeofday(&dmn
->echo_sent
, NULL
);
1043 thread_add_timer(master
, wakeup_no_answer
, dmn
, gs
.timeout
,
1048 bool check_all_up(void)
1052 for (dmn
= gs
.daemons
; dmn
; dmn
= dmn
->next
)
1053 if (dmn
->state
!= DAEMON_UP
)
1058 void watchfrr_status(struct vty
*vty
)
1061 struct timeval delay
;
1063 vty_out(vty
, "watchfrr global phase: %s\n", phase_str
[gs
.phase
]);
1064 vty_out(vty
, " Restart Command: %pSQq\n", gs
.restart_command
);
1065 vty_out(vty
, " Start Command: %pSQq\n", gs
.start_command
);
1066 vty_out(vty
, " Stop Command: %pSQq\n", gs
.stop_command
);
1067 vty_out(vty
, " Min Restart Interval: %ld\n", gs
.min_restart_interval
);
1068 vty_out(vty
, " Max Restart Interval: %ld\n", gs
.max_restart_interval
);
1069 vty_out(vty
, " Restart Timeout: %ld\n", gs
.restart_timeout
);
1070 vty_out(vty
, " Reading Configuration: %s\n",
1071 gs
.reading_configuration
? "yes" : "no");
1073 vty_out(vty
, " global restart running, pid %ld\n",
1074 (long)gs
.restart
.pid
);
1076 for (dmn
= gs
.daemons
; dmn
; dmn
= dmn
->next
) {
1077 vty_out(vty
, " %-20s %s%s", dmn
->name
, state_str
[dmn
->state
],
1078 dmn
->ignore_timeout
? "/Ignoring Timeout\n" : "\n");
1079 if (dmn
->restart
.pid
)
1080 vty_out(vty
, " restart running, pid %ld\n",
1081 (long)dmn
->restart
.pid
);
1082 else if (dmn
->state
== DAEMON_DOWN
&&
1083 time_elapsed(&delay
, &dmn
->restart
.time
)->tv_sec
1084 < dmn
->restart
.interval
)
1085 vty_out(vty
, " restarting in %jd seconds (%jds backoff interval)\n",
1086 (intmax_t)dmn
->restart
.interval
1087 - (intmax_t)delay
.tv_sec
,
1088 (intmax_t)dmn
->restart
.interval
);
1092 static void sigint(void)
1094 zlog_notice("Terminating on signal");
1095 systemd_send_stopping();
1099 static int valid_command(const char *cmd
)
1106 return ((p
= strchr(cmd
, '%')) != NULL
) && (*(p
+ 1) == 's')
1107 && !strchr(p
+ 1, '%');
1110 /* This is an ugly hack to circumvent problems with passing command-line
1111 arguments that contain spaces. The fix is to use a configuration file. */
1112 static char *translate_blanks(const char *cmd
, const char *blankstr
)
1116 size_t bslen
= strlen(blankstr
);
1118 if (!(res
= strdup(cmd
))) {
1122 while ((p
= strstr(res
, blankstr
)) != NULL
) {
1125 memmove(p
+ 1, p
+ bslen
, strlen(p
+ bslen
) + 1);
1130 static void startup_timeout(struct thread
*t_wakeup
)
1132 daemon_send_ready(1);
1137 #include <sys/mount.h>
1140 #define NETNS_RUN_DIR "/var/run/netns"
1142 static void netns_create(int dirfd
, const char *nsname
)
1144 /* make /var/run/netns shared between mount namespaces
1145 * just like iproute2 sets it up
1147 if (mount("", NETNS_RUN_DIR
, "none", MS_SHARED
| MS_REC
, NULL
)) {
1148 if (errno
!= EINVAL
) {
1153 if (mount(NETNS_RUN_DIR
, NETNS_RUN_DIR
, "none",
1154 MS_BIND
| MS_REC
, NULL
)) {
1159 if (mount("", NETNS_RUN_DIR
, "none", MS_SHARED
| MS_REC
,
1166 /* need an empty file to mount on top of */
1167 int nsfd
= openat(dirfd
, nsname
, O_CREAT
| O_RDONLY
| O_EXCL
, 0);
1170 fprintf(stderr
, "failed to create \"%s/%s\": %s\n",
1171 NETNS_RUN_DIR
, nsname
, strerror(errno
));
1176 if (unshare(CLONE_NEWNET
)) {
1178 unlinkat(dirfd
, nsname
, 0);
1182 char *dstpath
= asprintfrr(MTYPE_TMP
, "%s/%s", NETNS_RUN_DIR
, nsname
);
1184 /* bind-mount so the namespace has a name and is persistent */
1185 if (mount("/proc/self/ns/net", dstpath
, "none", MS_BIND
, NULL
) < 0) {
1186 fprintf(stderr
, "failed to bind-mount netns to \"%s\": %s\n",
1187 dstpath
, strerror(errno
));
1188 unlinkat(dirfd
, nsname
, 0);
1192 XFREE(MTYPE_TMP
, dstpath
);
1195 static void netns_setup(const char *nsname
)
1199 dirfd
= open(NETNS_RUN_DIR
, O_DIRECTORY
| O_RDONLY
);
1201 if (errno
== ENOTDIR
) {
1202 fprintf(stderr
, "error: \"%s\" is not a directory!\n",
1205 } else if (errno
== ENOENT
) {
1206 if (mkdir(NETNS_RUN_DIR
, 0755)) {
1207 fprintf(stderr
, "error: \"%s\": mkdir: %s\n",
1208 NETNS_RUN_DIR
, strerror(errno
));
1211 dirfd
= open(NETNS_RUN_DIR
, O_DIRECTORY
| O_RDONLY
);
1213 fprintf(stderr
, "error: \"%s\": opendir: %s\n",
1214 NETNS_RUN_DIR
, strerror(errno
));
1218 fprintf(stderr
, "error: \"%s\": %s\n",
1219 NETNS_RUN_DIR
, strerror(errno
));
1224 nsfd
= openat(dirfd
, nsname
, O_RDONLY
);
1225 if (nsfd
< 0 && errno
!= ENOENT
) {
1226 fprintf(stderr
, "error: \"%s/%s\": %s\n",
1227 NETNS_RUN_DIR
, nsname
, strerror(errno
));
1231 netns_create(dirfd
, nsname
);
1233 if (setns(nsfd
, CLONE_NEWNET
)) {
1241 /* make sure loopback is up... weird things happen otherwise.
1242 * ioctl is perfectly fine for this, don't need netlink...
1245 struct ifreq ifr
= { };
1247 strlcpy(ifr
.ifr_name
, "lo", sizeof(ifr
.ifr_name
));
1249 sockfd
= socket(AF_INET
, SOCK_DGRAM
, 0);
1254 if (ioctl(sockfd
, SIOCGIFFLAGS
, &ifr
)) {
1255 perror("ioctl(SIOCGIFFLAGS, \"lo\")");
1258 if (!(ifr
.ifr_flags
& IFF_UP
)) {
1259 ifr
.ifr_flags
|= IFF_UP
;
1260 if (ioctl(sockfd
, SIOCSIFFLAGS
, &ifr
)) {
1261 perror("ioctl(SIOCSIFFLAGS, \"lo\")");
1268 #else /* !GNU_LINUX */
1270 static void netns_setup(const char *nsname
)
1272 fprintf(stderr
, "network namespaces are only available on Linux\n");
1277 static void watchfrr_start_config(void)
1279 gs
.reading_configuration
= true;
1282 static void watchfrr_end_config(void)
1284 gs
.reading_configuration
= false;
1287 static void watchfrr_init(int argc
, char **argv
)
1289 const char *special
= "zebra";
1291 struct daemon
*dmn
, **add
= &gs
.daemons
;
1292 char alldaemons
[512] = "", *p
= alldaemons
;
1294 thread_add_timer_msec(master
, startup_timeout
, NULL
, STARTUP_TIMEOUT
,
1295 &gs
.t_startup_timeout
);
1297 for (i
= optind
; i
< argc
; i
++) {
1298 dmn
= XCALLOC(MTYPE_WATCHFRR_DAEMON
, sizeof(*dmn
));
1300 dmn
->name
= dmn
->restart
.name
= argv
[i
];
1301 dmn
->state
= DAEMON_INIT
;
1305 thread_add_timer_msec(master
, wakeup_init
, dmn
, 0,
1307 dmn
->restart
.interval
= gs
.min_restart_interval
;
1311 if (!strcmp(dmn
->name
, special
))
1317 "Must specify one or more daemons to monitor.\n\n");
1320 if (!watch_only
&& !gs
.special
) {
1321 fprintf(stderr
, "\"%s\" daemon must be in daemon lists\n\n",
1326 for (dmn
= gs
.daemons
; dmn
; dmn
= dmn
->next
) {
1327 snprintf(p
, alldaemons
+ sizeof(alldaemons
) - p
, "%s%s",
1328 (p
== alldaemons
) ? "" : " ", dmn
->name
);
1331 zlog_notice("%s %s watching [%s]%s", progname
, FRR_VERSION
, alldaemons
,
1332 watch_only
? ", monitor mode" : "");
1335 struct zebra_privs_t watchfrr_privs
= {
1337 .vty_group
= VTY_GROUP
,
1341 static struct frr_signal_t watchfrr_signals
[] = {
1352 .handler
= sigchild
,
1356 FRR_DAEMON_INFO(watchfrr
, WATCHFRR
,
1357 .flags
= FRR_NO_PRIVSEP
| FRR_NO_TCPVTY
| FRR_LIMITED_CLI
1358 | FRR_NO_CFG_PID_DRY
| FRR_NO_ZCLIENT
1361 .printhelp
= printhelp
,
1362 .copyright
= "Copyright 2004 Andrew J. Schorr",
1364 .signals
= watchfrr_signals
,
1365 .n_signals
= array_size(watchfrr_signals
),
1367 .privs
= &watchfrr_privs
,
1370 #define DEPRECATED_OPTIONS "aAezR:"
1372 int main(int argc
, char **argv
)
1375 const char *blankstr
= NULL
;
1376 const char *netns
= NULL
;
1377 bool netns_en
= false;
1379 frr_preinit(&watchfrr_di
, argc
, argv
);
1380 progname
= watchfrr_di
.progname
;
1382 frr_opt_add("b:di:k:l:N:p:r:S:s:t:T:" DEPRECATED_OPTIONS
, longopts
, "");
1384 gs
.restart
.name
= "all";
1385 while ((opt
= frr_getopt(argc
, argv
, NULL
)) != EOF
) {
1386 if (opt
&& opt
< 128 && strchr(DEPRECATED_OPTIONS
, opt
)) {
1388 "The -%c option no longer exists.\n"
1389 "Please refer to the watchfrr(8) man page.\n",
1404 if (!valid_command(optarg
)) {
1406 "Invalid kill command, must contain '%%s': %s\n",
1410 gs
.stop_command
= optarg
;
1414 if ((sscanf(optarg
, "%d%1s", &gs
.loglevel
, garbage
)
1416 || (gs
.loglevel
< LOG_EMERG
)) {
1418 "Invalid loglevel argument: %s\n",
1423 case OPTION_MINRESTART
: {
1425 if ((sscanf(optarg
, "%ld%1s", &gs
.min_restart_interval
,
1428 || (gs
.min_restart_interval
< 0)) {
1430 "Invalid min_restart_interval argument: %s\n",
1435 case OPTION_MAXRESTART
: {
1437 if ((sscanf(optarg
, "%ld%1s", &gs
.max_restart_interval
,
1440 || (gs
.max_restart_interval
< 0)) {
1442 "Invalid max_restart_interval argument: %s\n",
1447 case OPTION_MAXOPERATIONAL
: {
1450 if ((sscanf(optarg
, "%ld%1s", &gs
.operational_timeout
,
1452 (gs
.operational_timeout
< 0)) {
1454 "Invalid Operational_timeout argument: %s\n",
1461 if (optarg
&& strchr(optarg
, '/')) {
1463 "invalid network namespace name \"%s\" (may not contain slashes)\n",
1472 if ((sscanf(optarg
, "%d%1s", &period
, garbage
) != 1)
1473 || (gs
.period
< 1)) {
1475 "Invalid interval argument: %s\n",
1479 gs
.period
= 1000 * period
;
1482 watchfrr_di
.pid_file
= optarg
;
1485 if (!valid_command(optarg
)) {
1487 "Invalid restart command, must contain '%%s': %s\n",
1491 gs
.restart_command
= optarg
;
1494 if (!valid_command(optarg
)) {
1496 "Invalid start command, must contain '%%s': %s\n",
1500 gs
.start_command
= optarg
;
1507 if ((sscanf(optarg
, "%ld%1s", &gs
.timeout
, garbage
)
1509 || (gs
.timeout
< 1)) {
1511 "Invalid timeout argument: %s\n",
1518 if ((sscanf(optarg
, "%ld%1s", &gs
.restart_timeout
,
1521 || (gs
.restart_timeout
< 1)) {
1523 "Invalid restart timeout argument: %s\n",
1529 fputs("Invalid option.\n", stderr
);
1535 && (gs
.start_command
|| gs
.stop_command
|| gs
.restart_command
)) {
1536 fputs("Options -r/-s/-k are not used when --dry is active.\n",
1540 && (!gs
.restart_command
|| !gs
.start_command
|| !gs
.stop_command
)) {
1542 "Options -s (start), -k (kill), and -r (restart) are required.\n");
1547 if (gs
.restart_command
)
1548 gs
.restart_command
=
1549 translate_blanks(gs
.restart_command
, blankstr
);
1550 if (gs
.start_command
)
1552 translate_blanks(gs
.start_command
, blankstr
);
1553 if (gs
.stop_command
)
1555 translate_blanks(gs
.stop_command
, blankstr
);
1558 gs
.restart
.interval
= gs
.min_restart_interval
;
1560 /* env variable for the processes that we start */
1561 if (watchfrr_di
.pathspace
)
1562 setenv("FRR_PATHSPACE", watchfrr_di
.pathspace
, 1);
1564 unsetenv("FRR_PATHSPACE");
1567 * when watchfrr_di.pathspace is read, if it is not specified
1568 * pathspace is NULL as expected
1570 pathspace
= watchfrr_di
.pathspace
;
1572 if (netns_en
&& !netns
)
1573 netns
= watchfrr_di
.pathspace
;
1575 if (netns_en
&& netns
&& netns
[0])
1578 master
= frr_init();
1579 watchfrr_error_init();
1580 watchfrr_init(argc
, argv
);
1581 cmd_init_config_callbacks(watchfrr_start_config
, watchfrr_end_config
);
1582 watchfrr_vty_init();
1586 if (watchfrr_di
.daemon_mode
)
1587 zlog_syslog_set_prio_min(MIN(gs
.loglevel
, LOG_DEBUG
));
1589 zlog_aux_init(NULL
, MIN(gs
.loglevel
, LOG_DEBUG
));
1593 systemd_send_stopping();