1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * Monitor status of frr daemons and restart if necessary.
5 * Copyright (C) 2004 Andrew J. Schorr
13 #include <lib/version.h>
16 #include "lib_errors.h"
17 #include "zlog_targets.h"
28 #include "watchfrr_errors.h"
31 #define MIN(X,Y) (((X) <= (Y)) ? (X) : (Y))
34 /* Macros to help randomize timers. */
35 #define JITTER(X) ((frr_weak_random() % ((X)+1))-((X)/2))
36 #define FUZZY(X) ((X)+JITTER((X)/20))
38 #define DEFAULT_PERIOD 5
39 #define DEFAULT_TIMEOUT 90
40 #define DEFAULT_RESTART_TIMEOUT 20
41 #define DEFAULT_LOGLEVEL LOG_INFO
42 #define DEFAULT_MIN_RESTART 60
43 #define DEFAULT_MAX_RESTART 600
44 #define DEFAULT_OPERATIONAL_TIMEOUT 60
46 #define DEFAULT_RESTART_CMD WATCHFRR_SH_PATH " restart %s"
47 #define DEFAULT_START_CMD WATCHFRR_SH_PATH " start %s"
48 #define DEFAULT_STOP_CMD WATCHFRR_SH_PATH " stop %s"
50 #define PING_TOKEN "PING"
52 DEFINE_MGROUP(WATCHFRR
, "watchfrr");
53 DEFINE_MTYPE_STATIC(WATCHFRR
, WATCHFRR_DAEMON
, "watchfrr daemon entry");
55 /* Needs to be global, referenced somewhere inside libfrr. */
56 struct thread_master
*master
;
58 static bool watch_only
= false;
59 const char *pathspace
;
66 PHASE_ZEBRA_RESTART_PENDING
,
67 PHASE_WAITING_ZEBRA_UP
70 static const char *const phase_str
[] = {
74 "Waiting for other daemons to come down",
75 "Zebra restart job running",
76 "Waiting for zebra to come up",
80 #define PHASE_TIMEOUT (3*gs.restart_timeout)
81 #define STARTUP_TIMEOUT 55 * 1000
89 struct thread
*t_kill
;
93 static struct global_state
{
94 enum restart_phase phase
;
95 struct thread
*t_phase_hanging
;
96 struct thread
*t_startup_timeout
;
97 struct thread
*t_operational
;
101 long restart_timeout
;
102 bool reading_configuration
;
103 long min_restart_interval
;
104 long max_restart_interval
;
105 long operational_timeout
;
106 struct daemon
*daemons
;
107 const char *restart_command
;
108 const char *start_command
;
109 const char *stop_command
;
110 struct restart_info restart
;
112 struct daemon
*special
; /* points to zebra when doing phased restart */
115 int numdown
; /* # of daemons that are not UP or UNRESPONSIVE */
118 .vtydir
= frr_vtydir
,
119 .period
= 1000 * DEFAULT_PERIOD
,
120 .timeout
= DEFAULT_TIMEOUT
,
121 .restart_timeout
= DEFAULT_RESTART_TIMEOUT
,
122 .loglevel
= DEFAULT_LOGLEVEL
,
123 .min_restart_interval
= DEFAULT_MIN_RESTART
,
124 .max_restart_interval
= DEFAULT_MAX_RESTART
,
125 .operational_timeout
= DEFAULT_OPERATIONAL_TIMEOUT
,
126 .restart_command
= DEFAULT_RESTART_CMD
,
127 .start_command
= DEFAULT_START_CMD
,
128 .stop_command
= DEFAULT_STOP_CMD
,
140 (((DMN)->state == DAEMON_UP) || ((DMN)->state == DAEMON_UNRESPONSIVE))
142 static const char *const state_str
[] = {
143 "Init", "Down", "Connecting", "Up", "Unresponsive",
148 enum daemon_state state
;
150 struct timeval echo_sent
;
151 unsigned int connect_tries
;
152 struct thread
*t_wakeup
;
153 struct thread
*t_read
;
154 struct thread
*t_write
;
156 struct restart_info restart
;
159 * For a given daemon, if we've turned on ignore timeouts
160 * ignore the timeout value and assume everything is ok
161 * This is for daemon debugging w/ gdb after we have started
162 * FRR and realize we have something that needs to be looked
168 #define OPTION_MINRESTART 2000
169 #define OPTION_MAXRESTART 2001
170 #define OPTION_DRY 2002
171 #define OPTION_NETNS 2003
172 #define OPTION_MAXOPERATIONAL 2004
174 static const struct option longopts
[] = {
175 {"daemon", no_argument
, NULL
, 'd'},
176 {"statedir", required_argument
, NULL
, 'S'},
177 {"loglevel", required_argument
, NULL
, 'l'},
178 {"interval", required_argument
, NULL
, 'i'},
179 {"timeout", required_argument
, NULL
, 't'},
180 {"restart-timeout", required_argument
, NULL
, 'T'},
181 {"restart", required_argument
, NULL
, 'r'},
182 {"start-command", required_argument
, NULL
, 's'},
183 {"kill-command", required_argument
, NULL
, 'k'},
184 {"dry", no_argument
, NULL
, OPTION_DRY
},
185 {"min-restart-interval", required_argument
, NULL
, OPTION_MINRESTART
},
186 {"max-restart-interval", required_argument
, NULL
, OPTION_MAXRESTART
},
187 {"operational-timeout", required_argument
, NULL
, OPTION_MAXOPERATIONAL
},
188 {"pid-file", required_argument
, NULL
, 'p'},
189 {"blank-string", required_argument
, NULL
, 'b'},
191 {"netns", optional_argument
, NULL
, OPTION_NETNS
},
193 {"help", no_argument
, NULL
, 'h'},
194 {"version", no_argument
, NULL
, 'v'},
197 static int try_connect(struct daemon
*dmn
);
198 static void wakeup_send_echo(struct thread
*t_wakeup
);
199 static void try_restart(struct daemon
*dmn
);
200 static void phase_check(void);
201 static void restart_done(struct daemon
*dmn
);
203 static const char *progname
;
205 void watchfrr_set_ignore_daemon(struct vty
*vty
, const char *dname
, bool ignore
)
209 for (dmn
= gs
.daemons
; dmn
; dmn
= dmn
->next
) {
210 if (strncmp(dmn
->name
, dname
, strlen(dmn
->name
)) == 0)
215 dmn
->ignore_timeout
= ignore
;
216 vty_out(vty
, "%s switching to %s\n", dmn
->name
,
217 ignore
? "ignore" : "watch");
219 vty_out(vty
, "%s is not configured for running at the moment",
223 static void printhelp(FILE *target
)
226 "Usage : %s [OPTION...] <daemon name> ...\n\n\
227 Watchdog program to monitor status of frr daemons and try to restart\n\
228 them if they are down or unresponsive. It determines whether a daemon is\n\
229 up based on whether it can connect to the daemon's vty unix stream socket.\n\
230 It then repeatedly sends echo commands over that socket to determine whether\n\
231 the daemon is responsive. If the daemon crashes, we will receive an EOF\n\
232 on the socket connection and know immediately that the daemon is down.\n\n\
233 The daemons to be monitored should be listed on the command line.\n\n\
234 In order to avoid attempting to restart the daemons in a fast loop,\n\
235 the -m and -M options allow you to control the minimum delay between\n\
236 restart commands. The minimum restart delay is recalculated each time\n\
237 a restart is attempted: if the time since the last restart attempt exceeds\n\
238 twice the -M value, then the restart delay is set to the -m value.\n\
239 Otherwise, the interval is doubled (but capped at the -M value).\n\n",
244 -d, --daemon Run in daemon mode. In this mode, error messages are sent\n\
245 to syslog instead of stdout.\n\
246 -S, --statedir Set the vty socket directory (default is %s)\n\
247 -N, --pathspace Insert prefix into config & socket paths\n"
249 " --netns Create and/or use Linux network namespace. If no name is\n"
250 " given, uses the value from `-N`.\n"
252 "-l, --loglevel Set the logging level (default is %d).\n\
253 The value should range from %d (LOG_EMERG) to %d (LOG_DEBUG),\n\
254 but it can be set higher than %d if extra-verbose debugging\n\
255 messages are desired.\n\
256 --min-restart-interval\n\
257 Set the minimum seconds to wait between invocations of daemon\n\
258 restart commands (default is %d).\n\
259 --max-restart-interval\n\
260 Set the maximum seconds to wait between invocations of daemon\n\
261 restart commands (default is %d).\n\
262 --operational-timeout\n\
263 Set the time before systemd is notified that we are considered\n\
264 operational again after a daemon restart (default is %d).\n\
265 -i, --interval Set the status polling interval in seconds (default is %d)\n\
266 -t, --timeout Set the unresponsiveness timeout in seconds (default is %d)\n\
267 -T, --restart-timeout\n\
268 Set the restart (kill) timeout in seconds (default is %d).\n\
269 If any background jobs are still running after this much\n\
270 time has elapsed, they will be killed.\n\
271 -r, --restart Supply a Bourne shell command to use to restart a single\n\
272 daemon. The command string should include '%%s' where the\n\
273 name of the daemon should be substituted.\n\
275 -s, --start-command\n\
276 Supply a Bourne shell to command to use to start a single\n\
277 daemon. The command string should include '%%s' where the\n\
278 name of the daemon should be substituted.\n\
280 -k, --kill-command\n\
281 Supply a Bourne shell to command to use to stop a single\n\
282 daemon. The command string should include '%%s' where the\n\
283 name of the daemon should be substituted.\n\
285 --dry Do not start or restart anything, just log.\n\
286 -p, --pid-file Set process identifier file name\n\
287 (default is %s/watchfrr.pid).\n\
288 -b, --blank-string\n\
289 When the supplied argument string is found in any of the\n\
290 various shell command arguments (-r, -s, or -k), replace\n\
291 it with a space. This is an ugly hack to circumvent problems\n\
292 passing command-line arguments with embedded spaces.\n\
293 -v, --version Print program version\n\
294 -h, --help Display this help and exit\n",
295 frr_vtydir
, DEFAULT_LOGLEVEL
, LOG_EMERG
, LOG_DEBUG
, LOG_DEBUG
,
296 DEFAULT_MIN_RESTART
, DEFAULT_MAX_RESTART
,
297 DEFAULT_OPERATIONAL_TIMEOUT
, DEFAULT_PERIOD
, DEFAULT_TIMEOUT
,
298 DEFAULT_RESTART_TIMEOUT
, DEFAULT_RESTART_CMD
, DEFAULT_START_CMD
,
299 DEFAULT_STOP_CMD
, frr_vtydir
);
302 static pid_t
run_background(char *shell_cmd
)
306 switch (child
= fork()) {
308 flog_err_sys(EC_LIB_SYSTEM_CALL
,
309 "fork failed, cannot run command [%s]: %s",
310 shell_cmd
, safe_strerror(errno
));
314 /* Use separate process group so child processes can be killed
316 if (setpgid(0, 0) < 0)
317 zlog_warn("setpgid(0,0) failed: %s",
318 safe_strerror(errno
));
322 char *const argv
[4] = {shell
, dashc
, shell_cmd
, NULL
};
323 execv("/bin/sh", argv
);
324 flog_err_sys(EC_LIB_SYSTEM_CALL
,
325 "execv(/bin/sh -c '%s') failed: %s",
326 shell_cmd
, safe_strerror(errno
));
330 /* Parent process: we will reap the child later. */
331 zlog_info("Forked background command [pid %d]: %s", (int)child
,
337 static struct timeval
*time_elapsed(struct timeval
*result
,
338 const struct timeval
*start_time
)
340 gettimeofday(result
, NULL
);
341 result
->tv_sec
-= start_time
->tv_sec
;
342 result
->tv_usec
-= start_time
->tv_usec
;
343 while (result
->tv_usec
< 0) {
344 result
->tv_usec
+= 1000000L;
350 static void restart_kill(struct thread
*t_kill
)
352 struct restart_info
*restart
= THREAD_ARG(t_kill
);
353 struct timeval delay
;
355 time_elapsed(&delay
, &restart
->time
);
357 if (gs
.reading_configuration
) {
359 "%s %s child process appears to still be reading configuration, delaying for another %lu time",
360 restart
->what
, restart
->name
, gs
.restart_timeout
);
361 thread_add_timer(master
, restart_kill
, restart
,
362 gs
.restart_timeout
, &restart
->t_kill
);
367 "%s %s child process %d still running after %ld seconds, sending signal %d",
368 restart
->what
, restart
->name
, (int)restart
->pid
,
369 (long)delay
.tv_sec
, (restart
->kills
? SIGKILL
: SIGTERM
));
370 kill(-restart
->pid
, (restart
->kills
? SIGKILL
: SIGTERM
));
372 thread_add_timer(master
, restart_kill
, restart
, gs
.restart_timeout
,
376 static struct restart_info
*find_child(pid_t child
)
379 if (gs
.restart
.pid
== child
)
382 for (dmn
= gs
.daemons
; dmn
; dmn
= dmn
->next
) {
383 if (dmn
->restart
.pid
== child
)
384 return &dmn
->restart
;
389 static void sigchild(void)
395 struct restart_info
*restart
;
398 switch (child
= waitpid(-1, &status
, WNOHANG
)) {
400 flog_err_sys(EC_LIB_SYSTEM_CALL
, "waitpid failed: %s",
401 safe_strerror(errno
));
404 zlog_warn("SIGCHLD received, but waitpid did not reap a child");
408 if (child
== integrated_write_pid
) {
409 integrated_write_sigchld(status
);
413 if ((restart
= find_child(child
)) != NULL
) {
414 name
= restart
->name
;
415 what
= restart
->what
;
418 thread_cancel(&restart
->t_kill
);
420 /* Update restart time to reflect the time the command
422 gettimeofday(&restart
->time
, NULL
);
426 "waitpid returned status for an unknown child process %d",
431 if (WIFSTOPPED(status
))
432 zlog_warn("%s %s process %d is stopped", what
, name
,
434 else if (WIFSIGNALED(status
))
435 zlog_warn("%s %s process %d terminated due to signal %d", what
,
436 name
, (int)child
, WTERMSIG(status
));
437 else if (WIFEXITED(status
)) {
438 if (WEXITSTATUS(status
) != 0)
440 "%s %s process %d exited with non-zero status %d",
441 what
, name
, (int)child
, WEXITSTATUS(status
));
443 zlog_debug("%s %s process %d exited normally", what
,
446 if (restart
&& restart
!= &gs
.restart
) {
447 dmn
= container_of(restart
, struct daemon
,
451 for (dmn
= gs
.daemons
; dmn
; dmn
= dmn
->next
)
457 "cannot interpret %s %s process %d wait status 0x%x",
458 what
, name
, (int)child
, status
);
462 static int run_job(struct restart_info
*restart
, const char *cmdtype
,
463 const char *command
, int force
, int update_interval
)
465 struct timeval delay
;
467 if (gs
.loglevel
> LOG_DEBUG
+ 1)
468 zlog_debug("attempting to %s %s", cmdtype
, restart
->name
);
471 if (gs
.loglevel
> LOG_DEBUG
+ 1)
473 "cannot %s %s, previous pid %d still running",
474 cmdtype
, restart
->name
, (int)restart
->pid
);
480 snprintf(buffer
, sizeof(buffer
), "restarting %s", restart
->name
);
481 systemd_send_status(buffer
);
483 /* Note: time_elapsed test must come before the force test, since we
485 to make sure that delay is initialized for use below in updating the
487 if ((time_elapsed(&delay
, &restart
->time
)->tv_sec
< restart
->interval
)
490 if (gs
.loglevel
> LOG_DEBUG
+ 1)
492 "postponing %s %s: elapsed time %ld < retry interval %ld",
493 cmdtype
, restart
->name
, (long)delay
.tv_sec
,
498 gettimeofday(&restart
->time
, NULL
);
501 char cmd
[strlen(command
) + strlen(restart
->name
) + 1];
502 #pragma GCC diagnostic push
503 #pragma GCC diagnostic ignored "-Wformat-nonliteral"
504 /* user supplied command string has a %s for the daemon name */
505 snprintf(cmd
, sizeof(cmd
), command
, restart
->name
);
506 #pragma GCC diagnostic pop
507 if ((restart
->pid
= run_background(cmd
)) > 0) {
508 thread_add_timer(master
, restart_kill
, restart
,
509 gs
.restart_timeout
, &restart
->t_kill
);
510 restart
->what
= cmdtype
;
516 /* Calculate the new restart interval. */
517 if (update_interval
) {
518 if (delay
.tv_sec
> 2 * gs
.max_restart_interval
)
519 restart
->interval
= gs
.min_restart_interval
;
520 else if ((restart
->interval
*= 2) > gs
.max_restart_interval
)
521 restart
->interval
= gs
.max_restart_interval
;
522 if (gs
.loglevel
> LOG_DEBUG
+ 1)
523 zlog_debug("restart %s interval is now %ld",
524 restart
->name
, restart
->interval
);
529 #define SET_READ_HANDLER(DMN) \
531 (DMN)->t_read = NULL; \
532 thread_add_read(master, handle_read, (DMN), (DMN)->fd, \
536 #define SET_WAKEUP_DOWN(DMN) \
538 (DMN)->t_wakeup = NULL; \
539 thread_add_timer_msec(master, wakeup_down, (DMN), \
540 FUZZY(gs.period), &(DMN)->t_wakeup); \
543 #define SET_WAKEUP_UNRESPONSIVE(DMN) \
545 (DMN)->t_wakeup = NULL; \
546 thread_add_timer_msec(master, wakeup_unresponsive, (DMN), \
547 FUZZY(gs.period), &(DMN)->t_wakeup); \
550 #define SET_WAKEUP_ECHO(DMN) \
552 (DMN)->t_wakeup = NULL; \
553 thread_add_timer_msec(master, wakeup_send_echo, (DMN), \
554 FUZZY(gs.period), &(DMN)->t_wakeup); \
557 static void wakeup_down(struct thread
*t_wakeup
)
559 struct daemon
*dmn
= THREAD_ARG(t_wakeup
);
561 dmn
->t_wakeup
= NULL
;
562 if (try_connect(dmn
) < 0)
563 SET_WAKEUP_DOWN(dmn
);
564 if ((dmn
->connect_tries
> 1) && (dmn
->state
!= DAEMON_UP
))
568 static void wakeup_init(struct thread
*t_wakeup
)
570 struct daemon
*dmn
= THREAD_ARG(t_wakeup
);
572 dmn
->t_wakeup
= NULL
;
573 if (try_connect(dmn
) < 0) {
575 "%s state -> down : initial connection attempt failed",
577 dmn
->state
= DAEMON_DOWN
;
582 static void restart_done(struct daemon
*dmn
)
584 if (dmn
->state
!= DAEMON_DOWN
) {
586 "Daemon: %s: is in %s state but expected it to be in DAEMON_DOWN state",
587 dmn
->name
, state_str
[dmn
->state
]);
590 THREAD_OFF(dmn
->t_wakeup
);
592 if (try_connect(dmn
) < 0)
593 SET_WAKEUP_DOWN(dmn
);
596 static void daemon_restarting_operational(struct thread
*thread
)
598 systemd_send_status("FRR Operational");
601 static void daemon_down(struct daemon
*dmn
, const char *why
)
603 if (IS_UP(dmn
) || (dmn
->state
== DAEMON_INIT
))
604 flog_err(EC_WATCHFRR_CONNECTION
, "%s state -> down : %s",
606 else if (gs
.loglevel
> LOG_DEBUG
)
607 zlog_debug("%s still down : %s", dmn
->name
, why
);
610 dmn
->state
= DAEMON_DOWN
;
615 THREAD_OFF(dmn
->t_read
);
616 THREAD_OFF(dmn
->t_write
);
617 THREAD_OFF(dmn
->t_wakeup
);
618 if (try_connect(dmn
) < 0)
619 SET_WAKEUP_DOWN(dmn
);
621 systemd_send_status("FRR partially operational");
625 static void handle_read(struct thread
*t_read
)
627 struct daemon
*dmn
= THREAD_ARG(t_read
);
628 static const char resp
[sizeof(PING_TOKEN
) + 4] = PING_TOKEN
"\n";
629 char buf
[sizeof(resp
) + 100];
631 struct timeval delay
;
634 if ((rc
= read(dmn
->fd
, buf
, sizeof(buf
))) < 0) {
637 if (ERRNO_IO_RETRY(errno
)) {
638 /* Pretend it never happened. */
639 SET_READ_HANDLER(dmn
);
642 snprintf(why
, sizeof(why
), "unexpected read error: %s",
643 safe_strerror(errno
));
644 daemon_down(dmn
, why
);
648 daemon_down(dmn
, "read returned EOF");
651 if (!dmn
->echo_sent
.tv_sec
) {
652 char why
[sizeof(buf
) + 100];
653 snprintf(why
, sizeof(why
),
654 "unexpected read returns %d bytes: %.*s", (int)rc
,
656 daemon_down(dmn
, why
);
660 /* We are expecting an echo response: is there any chance that the
661 response would not be returned entirely in the first read? That
662 seems inconceivable... */
663 if ((rc
!= sizeof(resp
)) || memcmp(buf
, resp
, sizeof(resp
))) {
664 char why
[100 + sizeof(buf
)];
665 snprintf(why
, sizeof(why
),
666 "read returned bad echo response of %d bytes (expecting %u): %.*s",
667 (int)rc
, (unsigned int)sizeof(resp
), (int)rc
, buf
);
668 daemon_down(dmn
, why
);
672 time_elapsed(&delay
, &dmn
->echo_sent
);
673 dmn
->echo_sent
.tv_sec
= 0;
674 if (dmn
->state
== DAEMON_UNRESPONSIVE
) {
675 if (delay
.tv_sec
< gs
.timeout
) {
676 dmn
->state
= DAEMON_UP
;
678 "%s state -> up : echo response received after %ld.%06ld seconds",
679 dmn
->name
, (long)delay
.tv_sec
,
680 (long)delay
.tv_usec
);
683 "%s: slow echo response finally received after %ld.%06ld seconds",
684 dmn
->name
, (long)delay
.tv_sec
,
685 (long)delay
.tv_usec
);
686 } else if (gs
.loglevel
> LOG_DEBUG
+ 1)
687 zlog_debug("%s: echo response received after %ld.%06ld seconds",
688 dmn
->name
, (long)delay
.tv_sec
, (long)delay
.tv_usec
);
690 SET_READ_HANDLER(dmn
);
691 thread_cancel(&dmn
->t_wakeup
);
692 SET_WAKEUP_ECHO(dmn
);
696 * Wait till we notice that all daemons are ready before
697 * we send we are ready to systemd
699 static void daemon_send_ready(int exitcode
)
709 zlog_notice("all daemons up, doing startup-complete notify");
710 else if (gs
.numdown
< gs
.numdaemons
)
711 flog_err(EC_WATCHFRR_CONNECTION
,
712 "startup did not complete within timeout (%d/%d daemons running)",
713 gs
.numdaemons
- gs
.numdown
, gs
.numdaemons
);
715 flog_err(EC_WATCHFRR_CONNECTION
,
716 "all configured daemons failed to start -- exiting watchfrr");
723 snprintf(started
, sizeof(started
), "%s/%s", frr_vtydir
,
725 fp
= fopen(started
, "w");
729 systemd_send_started(master
);
730 systemd_send_status("FRR Operational");
734 static void daemon_up(struct daemon
*dmn
, const char *why
)
736 dmn
->state
= DAEMON_UP
;
738 dmn
->connect_tries
= 0;
739 zlog_notice("%s state -> up : %s", dmn
->name
, why
);
740 if (gs
.numdown
== 0) {
741 daemon_send_ready(0);
743 THREAD_OFF(gs
.t_operational
);
745 thread_add_timer(master
, daemon_restarting_operational
, NULL
,
746 gs
.operational_timeout
, &gs
.t_operational
);
749 SET_WAKEUP_ECHO(dmn
);
753 static void check_connect(struct thread
*t_write
)
755 struct daemon
*dmn
= THREAD_ARG(t_write
);
757 socklen_t reslen
= sizeof(sockerr
);
760 if (getsockopt(dmn
->fd
, SOL_SOCKET
, SO_ERROR
, (char *)&sockerr
, &reslen
)
762 zlog_warn("%s: check_connect: getsockopt failed: %s", dmn
->name
,
763 safe_strerror(errno
));
765 "getsockopt failed checking connection success");
768 if ((reslen
== sizeof(sockerr
)) && sockerr
) {
772 "getsockopt reports that connection attempt failed: %s",
773 safe_strerror(sockerr
));
774 daemon_down(dmn
, why
);
778 daemon_up(dmn
, "delayed connect succeeded");
781 static void wakeup_connect_hanging(struct thread
*t_wakeup
)
783 struct daemon
*dmn
= THREAD_ARG(t_wakeup
);
786 dmn
->t_wakeup
= NULL
;
787 snprintf(why
, sizeof(why
),
788 "connection attempt timed out after %ld seconds", gs
.timeout
);
789 daemon_down(dmn
, why
);
792 /* Making connection to protocol daemon. */
793 static int try_connect(struct daemon
*dmn
)
796 struct sockaddr_un addr
;
799 if (gs
.loglevel
> LOG_DEBUG
+ 1)
800 zlog_debug("%s: attempting to connect", dmn
->name
);
801 dmn
->connect_tries
++;
803 memset(&addr
, 0, sizeof(addr
));
804 addr
.sun_family
= AF_UNIX
;
805 snprintf(addr
.sun_path
, sizeof(addr
.sun_path
), "%s/%s.vty", gs
.vtydir
,
807 #ifdef HAVE_STRUCT_SOCKADDR_UN_SUN_LEN
808 len
= addr
.sun_len
= SUN_LEN(&addr
);
810 len
= sizeof(addr
.sun_family
) + strlen(addr
.sun_path
);
811 #endif /* HAVE_STRUCT_SOCKADDR_UN_SUN_LEN */
813 /* Quick check to see if we might succeed before we go to the trouble
814 of creating a socket. */
815 if (access(addr
.sun_path
, W_OK
) < 0) {
817 flog_err_sys(EC_LIB_SYSTEM_CALL
,
818 "%s: access to socket %s denied: %s",
819 dmn
->name
, addr
.sun_path
,
820 safe_strerror(errno
));
824 if ((sock
= socket(AF_UNIX
, SOCK_STREAM
, 0)) < 0) {
825 flog_err_sys(EC_LIB_SOCKET
, "%s(%s): cannot make socket: %s",
826 __func__
, addr
.sun_path
, safe_strerror(errno
));
830 if (set_nonblocking(sock
) < 0 || set_cloexec(sock
) < 0) {
831 flog_err_sys(EC_LIB_SYSTEM_CALL
,
832 "%s(%s): set_nonblocking/cloexec(%d) failed",
833 __func__
, addr
.sun_path
, sock
);
838 if (connect(sock
, (struct sockaddr
*)&addr
, len
) < 0) {
839 if ((errno
!= EINPROGRESS
) && (errno
!= EWOULDBLOCK
)) {
840 if (gs
.loglevel
> LOG_DEBUG
)
841 zlog_debug("%s(%s): connect failed: %s",
842 __func__
, addr
.sun_path
,
843 safe_strerror(errno
));
847 if (gs
.loglevel
> LOG_DEBUG
)
848 zlog_debug("%s: connection in progress", dmn
->name
);
849 dmn
->state
= DAEMON_CONNECTING
;
851 thread_add_write(master
, check_connect
, dmn
, dmn
->fd
,
853 thread_add_timer(master
, wakeup_connect_hanging
, dmn
,
854 gs
.timeout
, &dmn
->t_wakeup
);
855 SET_READ_HANDLER(dmn
);
860 SET_READ_HANDLER(dmn
);
861 daemon_up(dmn
, "connect succeeded");
865 static void phase_hanging(struct thread
*t_hanging
)
867 gs
.t_phase_hanging
= NULL
;
868 flog_err(EC_WATCHFRR_CONNECTION
,
869 "Phase [%s] hanging for %ld seconds, aborting phased restart",
870 phase_str
[gs
.phase
], PHASE_TIMEOUT
);
871 gs
.phase
= PHASE_NONE
;
874 static void set_phase(enum restart_phase new_phase
)
876 gs
.phase
= new_phase
;
877 thread_cancel(&gs
.t_phase_hanging
);
879 thread_add_timer(master
, phase_hanging
, NULL
, PHASE_TIMEOUT
,
880 &gs
.t_phase_hanging
);
883 static void phase_check(void)
892 for (dmn
= gs
.daemons
; dmn
; dmn
= dmn
->next
)
893 if (dmn
->state
== DAEMON_INIT
)
896 /* startup complete, everything out of INIT */
897 gs
.phase
= PHASE_NONE
;
898 for (dmn
= gs
.daemons
; dmn
; dmn
= dmn
->next
)
899 if (dmn
->state
== DAEMON_DOWN
) {
900 SET_WAKEUP_DOWN(dmn
);
904 case PHASE_STOPS_PENDING
:
908 "Phased restart: all routing daemon stop jobs have completed.");
909 set_phase(PHASE_WAITING_DOWN
);
912 case PHASE_WAITING_DOWN
:
913 if (gs
.numdown
+ IS_UP(gs
.special
) < gs
.numdaemons
)
915 systemd_send_status("Phased Restart");
916 zlog_info("Phased restart: all routing daemons now down.");
917 run_job(&gs
.special
->restart
, "restart", gs
.restart_command
, 1,
919 set_phase(PHASE_ZEBRA_RESTART_PENDING
);
922 case PHASE_ZEBRA_RESTART_PENDING
:
923 if (gs
.special
->restart
.pid
)
925 systemd_send_status("Zebra Restarting");
926 zlog_info("Phased restart: %s restart job completed.",
928 set_phase(PHASE_WAITING_ZEBRA_UP
);
931 case PHASE_WAITING_ZEBRA_UP
:
932 if (!IS_UP(gs
.special
))
934 zlog_info("Phased restart: %s is now up.", gs
.special
->name
);
935 for (dmn
= gs
.daemons
; dmn
; dmn
= dmn
->next
) {
936 if (dmn
!= gs
.special
)
937 run_job(&dmn
->restart
, "start",
938 gs
.start_command
, 1, 0);
940 gs
.phase
= PHASE_NONE
;
941 THREAD_OFF(gs
.t_phase_hanging
);
942 zlog_notice("Phased global restart has completed.");
947 static void try_restart(struct daemon
*dmn
)
952 if (dmn
!= gs
.special
) {
953 if ((gs
.special
->state
== DAEMON_UP
)
954 && (gs
.phase
== PHASE_NONE
))
955 run_job(&dmn
->restart
, "restart", gs
.restart_command
, 0,
959 "%s: postponing restart attempt because master %s daemon not up [%s], or phased restart in progress",
960 dmn
->name
, gs
.special
->name
,
961 state_str
[gs
.special
->state
]);
965 if ((gs
.phase
!= PHASE_NONE
) || gs
.numpids
) {
966 if (gs
.loglevel
> LOG_DEBUG
+ 1)
968 "postponing phased global restart: restart already in progress [%s], or outstanding child processes [%d]",
969 phase_str
[gs
.phase
], gs
.numpids
);
972 /* Is it too soon for a restart? */
974 struct timeval delay
;
975 if (time_elapsed(&delay
, &gs
.special
->restart
.time
)->tv_sec
976 < gs
.special
->restart
.interval
) {
977 if (gs
.loglevel
> LOG_DEBUG
+ 1)
979 "postponing phased global restart: elapsed time %ld < retry interval %ld",
981 gs
.special
->restart
.interval
);
985 run_job(&gs
.restart
, "restart", gs
.restart_command
, 0, 1);
988 static void wakeup_unresponsive(struct thread
*t_wakeup
)
990 struct daemon
*dmn
= THREAD_ARG(t_wakeup
);
992 dmn
->t_wakeup
= NULL
;
993 if (dmn
->state
!= DAEMON_UNRESPONSIVE
)
994 flog_err(EC_WATCHFRR_CONNECTION
,
995 "%s: no longer unresponsive (now %s), wakeup should have been cancelled!",
996 dmn
->name
, state_str
[dmn
->state
]);
998 SET_WAKEUP_UNRESPONSIVE(dmn
);
1003 static void wakeup_no_answer(struct thread
*t_wakeup
)
1005 struct daemon
*dmn
= THREAD_ARG(t_wakeup
);
1007 dmn
->t_wakeup
= NULL
;
1008 dmn
->state
= DAEMON_UNRESPONSIVE
;
1009 if (dmn
->ignore_timeout
)
1011 flog_err(EC_WATCHFRR_CONNECTION
,
1012 "%s state -> unresponsive : no response yet to ping sent %ld seconds ago",
1013 dmn
->name
, gs
.timeout
);
1014 SET_WAKEUP_UNRESPONSIVE(dmn
);
1018 static void wakeup_send_echo(struct thread
*t_wakeup
)
1020 static const char echocmd
[] = "echo " PING_TOKEN
;
1022 struct daemon
*dmn
= THREAD_ARG(t_wakeup
);
1024 dmn
->t_wakeup
= NULL
;
1025 if (((rc
= write(dmn
->fd
, echocmd
, sizeof(echocmd
))) < 0)
1026 || ((size_t)rc
!= sizeof(echocmd
))) {
1027 char why
[100 + sizeof(echocmd
)];
1028 snprintf(why
, sizeof(why
),
1029 "write '%s' returned %d instead of %u", echocmd
,
1030 (int)rc
, (unsigned int)sizeof(echocmd
));
1031 daemon_down(dmn
, why
);
1033 gettimeofday(&dmn
->echo_sent
, NULL
);
1034 thread_add_timer(master
, wakeup_no_answer
, dmn
, gs
.timeout
,
1039 bool check_all_up(void)
1043 for (dmn
= gs
.daemons
; dmn
; dmn
= dmn
->next
)
1044 if (dmn
->state
!= DAEMON_UP
)
1049 void watchfrr_status(struct vty
*vty
)
1052 struct timeval delay
;
1054 vty_out(vty
, "watchfrr global phase: %s\n", phase_str
[gs
.phase
]);
1055 vty_out(vty
, " Restart Command: %pSQq\n", gs
.restart_command
);
1056 vty_out(vty
, " Start Command: %pSQq\n", gs
.start_command
);
1057 vty_out(vty
, " Stop Command: %pSQq\n", gs
.stop_command
);
1058 vty_out(vty
, " Min Restart Interval: %ld\n", gs
.min_restart_interval
);
1059 vty_out(vty
, " Max Restart Interval: %ld\n", gs
.max_restart_interval
);
1060 vty_out(vty
, " Restart Timeout: %ld\n", gs
.restart_timeout
);
1061 vty_out(vty
, " Reading Configuration: %s\n",
1062 gs
.reading_configuration
? "yes" : "no");
1064 vty_out(vty
, " global restart running, pid %ld\n",
1065 (long)gs
.restart
.pid
);
1067 for (dmn
= gs
.daemons
; dmn
; dmn
= dmn
->next
) {
1068 vty_out(vty
, " %-20s %s%s", dmn
->name
, state_str
[dmn
->state
],
1069 dmn
->ignore_timeout
? "/Ignoring Timeout\n" : "\n");
1070 if (dmn
->restart
.pid
)
1071 vty_out(vty
, " restart running, pid %ld\n",
1072 (long)dmn
->restart
.pid
);
1073 else if (dmn
->state
== DAEMON_DOWN
&&
1074 time_elapsed(&delay
, &dmn
->restart
.time
)->tv_sec
1075 < dmn
->restart
.interval
)
1076 vty_out(vty
, " restarting in %jd seconds (%jds backoff interval)\n",
1077 (intmax_t)dmn
->restart
.interval
1078 - (intmax_t)delay
.tv_sec
,
1079 (intmax_t)dmn
->restart
.interval
);
1083 static void sigint(void)
1085 zlog_notice("Terminating on signal");
1086 systemd_send_stopping();
1090 static int valid_command(const char *cmd
)
1097 return ((p
= strchr(cmd
, '%')) != NULL
) && (*(p
+ 1) == 's')
1098 && !strchr(p
+ 1, '%');
1101 /* This is an ugly hack to circumvent problems with passing command-line
1102 arguments that contain spaces. The fix is to use a configuration file. */
1103 static char *translate_blanks(const char *cmd
, const char *blankstr
)
1107 size_t bslen
= strlen(blankstr
);
1109 if (!(res
= strdup(cmd
))) {
1113 while ((p
= strstr(res
, blankstr
)) != NULL
) {
1116 memmove(p
+ 1, p
+ bslen
, strlen(p
+ bslen
) + 1);
1121 static void startup_timeout(struct thread
*t_wakeup
)
1123 daemon_send_ready(1);
1128 #include <sys/mount.h>
1131 #define NETNS_RUN_DIR "/var/run/netns"
1133 static void netns_create(int dirfd
, const char *nsname
)
1135 /* make /var/run/netns shared between mount namespaces
1136 * just like iproute2 sets it up
1138 if (mount("", NETNS_RUN_DIR
, "none", MS_SHARED
| MS_REC
, NULL
)) {
1139 if (errno
!= EINVAL
) {
1144 if (mount(NETNS_RUN_DIR
, NETNS_RUN_DIR
, "none",
1145 MS_BIND
| MS_REC
, NULL
)) {
1150 if (mount("", NETNS_RUN_DIR
, "none", MS_SHARED
| MS_REC
,
1157 /* need an empty file to mount on top of */
1158 int nsfd
= openat(dirfd
, nsname
, O_CREAT
| O_RDONLY
| O_EXCL
, 0);
1161 fprintf(stderr
, "failed to create \"%s/%s\": %s\n",
1162 NETNS_RUN_DIR
, nsname
, strerror(errno
));
1167 if (unshare(CLONE_NEWNET
)) {
1169 unlinkat(dirfd
, nsname
, 0);
1173 char *dstpath
= asprintfrr(MTYPE_TMP
, "%s/%s", NETNS_RUN_DIR
, nsname
);
1175 /* bind-mount so the namespace has a name and is persistent */
1176 if (mount("/proc/self/ns/net", dstpath
, "none", MS_BIND
, NULL
) < 0) {
1177 fprintf(stderr
, "failed to bind-mount netns to \"%s\": %s\n",
1178 dstpath
, strerror(errno
));
1179 unlinkat(dirfd
, nsname
, 0);
1183 XFREE(MTYPE_TMP
, dstpath
);
1186 static void netns_setup(const char *nsname
)
1190 dirfd
= open(NETNS_RUN_DIR
, O_DIRECTORY
| O_RDONLY
);
1192 if (errno
== ENOTDIR
) {
1193 fprintf(stderr
, "error: \"%s\" is not a directory!\n",
1196 } else if (errno
== ENOENT
) {
1197 if (mkdir(NETNS_RUN_DIR
, 0755)) {
1198 fprintf(stderr
, "error: \"%s\": mkdir: %s\n",
1199 NETNS_RUN_DIR
, strerror(errno
));
1202 dirfd
= open(NETNS_RUN_DIR
, O_DIRECTORY
| O_RDONLY
);
1204 fprintf(stderr
, "error: \"%s\": opendir: %s\n",
1205 NETNS_RUN_DIR
, strerror(errno
));
1209 fprintf(stderr
, "error: \"%s\": %s\n",
1210 NETNS_RUN_DIR
, strerror(errno
));
1215 nsfd
= openat(dirfd
, nsname
, O_RDONLY
);
1216 if (nsfd
< 0 && errno
!= ENOENT
) {
1217 fprintf(stderr
, "error: \"%s/%s\": %s\n",
1218 NETNS_RUN_DIR
, nsname
, strerror(errno
));
1222 netns_create(dirfd
, nsname
);
1224 if (setns(nsfd
, CLONE_NEWNET
)) {
1232 /* make sure loopback is up... weird things happen otherwise.
1233 * ioctl is perfectly fine for this, don't need netlink...
1236 struct ifreq ifr
= { };
1238 strlcpy(ifr
.ifr_name
, "lo", sizeof(ifr
.ifr_name
));
1240 sockfd
= socket(AF_INET
, SOCK_DGRAM
, 0);
1245 if (ioctl(sockfd
, SIOCGIFFLAGS
, &ifr
)) {
1246 perror("ioctl(SIOCGIFFLAGS, \"lo\")");
1249 if (!(ifr
.ifr_flags
& IFF_UP
)) {
1250 ifr
.ifr_flags
|= IFF_UP
;
1251 if (ioctl(sockfd
, SIOCSIFFLAGS
, &ifr
)) {
1252 perror("ioctl(SIOCSIFFLAGS, \"lo\")");
1259 #else /* !GNU_LINUX */
1261 static void netns_setup(const char *nsname
)
1263 fprintf(stderr
, "network namespaces are only available on Linux\n");
1268 static void watchfrr_start_config(void)
1270 gs
.reading_configuration
= true;
1273 static void watchfrr_end_config(void)
1275 gs
.reading_configuration
= false;
1278 static void watchfrr_init(int argc
, char **argv
)
1280 const char *special
= "zebra";
1282 struct daemon
*dmn
, **add
= &gs
.daemons
;
1283 char alldaemons
[512] = "", *p
= alldaemons
;
1285 thread_add_timer_msec(master
, startup_timeout
, NULL
, STARTUP_TIMEOUT
,
1286 &gs
.t_startup_timeout
);
1288 for (i
= optind
; i
< argc
; i
++) {
1289 dmn
= XCALLOC(MTYPE_WATCHFRR_DAEMON
, sizeof(*dmn
));
1291 dmn
->name
= dmn
->restart
.name
= argv
[i
];
1292 dmn
->state
= DAEMON_INIT
;
1296 thread_add_timer_msec(master
, wakeup_init
, dmn
, 0,
1298 dmn
->restart
.interval
= gs
.min_restart_interval
;
1302 if (!strcmp(dmn
->name
, special
))
1308 "Must specify one or more daemons to monitor.\n\n");
1311 if (!watch_only
&& !gs
.special
) {
1312 fprintf(stderr
, "\"%s\" daemon must be in daemon lists\n\n",
1317 for (dmn
= gs
.daemons
; dmn
; dmn
= dmn
->next
) {
1318 snprintf(p
, alldaemons
+ sizeof(alldaemons
) - p
, "%s%s",
1319 (p
== alldaemons
) ? "" : " ", dmn
->name
);
1322 zlog_notice("%s %s watching [%s]%s", progname
, FRR_VERSION
, alldaemons
,
1323 watch_only
? ", monitor mode" : "");
1326 struct zebra_privs_t watchfrr_privs
= {
1328 .vty_group
= VTY_GROUP
,
1332 static struct frr_signal_t watchfrr_signals
[] = {
1343 .handler
= sigchild
,
1347 FRR_DAEMON_INFO(watchfrr
, WATCHFRR
,
1348 .flags
= FRR_NO_PRIVSEP
| FRR_NO_TCPVTY
| FRR_LIMITED_CLI
1349 | FRR_NO_CFG_PID_DRY
| FRR_NO_ZCLIENT
1352 .printhelp
= printhelp
,
1353 .copyright
= "Copyright 2004 Andrew J. Schorr",
1355 .signals
= watchfrr_signals
,
1356 .n_signals
= array_size(watchfrr_signals
),
1358 .privs
= &watchfrr_privs
,
1361 #define DEPRECATED_OPTIONS "aAezR:"
1363 int main(int argc
, char **argv
)
1366 const char *blankstr
= NULL
;
1367 const char *netns
= NULL
;
1368 bool netns_en
= false;
1370 frr_preinit(&watchfrr_di
, argc
, argv
);
1371 progname
= watchfrr_di
.progname
;
1373 frr_opt_add("b:di:k:l:N:p:r:S:s:t:T:" DEPRECATED_OPTIONS
, longopts
, "");
1375 gs
.restart
.name
= "all";
1376 while ((opt
= frr_getopt(argc
, argv
, NULL
)) != EOF
) {
1377 if (opt
&& opt
< 128 && strchr(DEPRECATED_OPTIONS
, opt
)) {
1379 "The -%c option no longer exists.\n"
1380 "Please refer to the watchfrr(8) man page.\n",
1395 if (!valid_command(optarg
)) {
1397 "Invalid kill command, must contain '%%s': %s\n",
1401 gs
.stop_command
= optarg
;
1405 if ((sscanf(optarg
, "%d%1s", &gs
.loglevel
, garbage
)
1407 || (gs
.loglevel
< LOG_EMERG
)) {
1409 "Invalid loglevel argument: %s\n",
1414 case OPTION_MINRESTART
: {
1416 if ((sscanf(optarg
, "%ld%1s", &gs
.min_restart_interval
,
1419 || (gs
.min_restart_interval
< 0)) {
1421 "Invalid min_restart_interval argument: %s\n",
1426 case OPTION_MAXRESTART
: {
1428 if ((sscanf(optarg
, "%ld%1s", &gs
.max_restart_interval
,
1431 || (gs
.max_restart_interval
< 0)) {
1433 "Invalid max_restart_interval argument: %s\n",
1438 case OPTION_MAXOPERATIONAL
: {
1441 if ((sscanf(optarg
, "%ld%1s", &gs
.operational_timeout
,
1443 (gs
.operational_timeout
< 0)) {
1445 "Invalid Operational_timeout argument: %s\n",
1452 if (optarg
&& strchr(optarg
, '/')) {
1454 "invalid network namespace name \"%s\" (may not contain slashes)\n",
1463 if ((sscanf(optarg
, "%d%1s", &period
, garbage
) != 1)
1464 || (gs
.period
< 1)) {
1466 "Invalid interval argument: %s\n",
1470 gs
.period
= 1000 * period
;
1473 watchfrr_di
.pid_file
= optarg
;
1476 if (!valid_command(optarg
)) {
1478 "Invalid restart command, must contain '%%s': %s\n",
1482 gs
.restart_command
= optarg
;
1485 if (!valid_command(optarg
)) {
1487 "Invalid start command, must contain '%%s': %s\n",
1491 gs
.start_command
= optarg
;
1498 if ((sscanf(optarg
, "%ld%1s", &gs
.timeout
, garbage
)
1500 || (gs
.timeout
< 1)) {
1502 "Invalid timeout argument: %s\n",
1509 if ((sscanf(optarg
, "%ld%1s", &gs
.restart_timeout
,
1512 || (gs
.restart_timeout
< 1)) {
1514 "Invalid restart timeout argument: %s\n",
1520 fputs("Invalid option.\n", stderr
);
1526 && (gs
.start_command
|| gs
.stop_command
|| gs
.restart_command
)) {
1527 fputs("Options -r/-s/-k are not used when --dry is active.\n",
1531 && (!gs
.restart_command
|| !gs
.start_command
|| !gs
.stop_command
)) {
1533 "Options -s (start), -k (kill), and -r (restart) are required.\n");
1538 if (gs
.restart_command
)
1539 gs
.restart_command
=
1540 translate_blanks(gs
.restart_command
, blankstr
);
1541 if (gs
.start_command
)
1543 translate_blanks(gs
.start_command
, blankstr
);
1544 if (gs
.stop_command
)
1546 translate_blanks(gs
.stop_command
, blankstr
);
1549 gs
.restart
.interval
= gs
.min_restart_interval
;
1551 /* env variable for the processes that we start */
1552 if (watchfrr_di
.pathspace
)
1553 setenv("FRR_PATHSPACE", watchfrr_di
.pathspace
, 1);
1555 unsetenv("FRR_PATHSPACE");
1558 * when watchfrr_di.pathspace is read, if it is not specified
1559 * pathspace is NULL as expected
1561 pathspace
= watchfrr_di
.pathspace
;
1563 if (netns_en
&& !netns
)
1564 netns
= watchfrr_di
.pathspace
;
1566 if (netns_en
&& netns
&& netns
[0])
1569 master
= frr_init();
1570 watchfrr_error_init();
1571 watchfrr_init(argc
, argv
);
1572 cmd_init_config_callbacks(watchfrr_start_config
, watchfrr_end_config
);
1573 watchfrr_vty_init();
1577 if (watchfrr_di
.daemon_mode
)
1578 zlog_syslog_set_prio_min(MIN(gs
.loglevel
, LOG_DEBUG
));
1580 zlog_aux_init(NULL
, MIN(gs
.loglevel
, LOG_DEBUG
));
1584 systemd_send_stopping();