2 * Monitor status of frr daemons and restart if necessary.
4 * Copyright (C) 2004 Andrew J. Schorr
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License along
17 * with this program; see the file COPYING; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
26 #include <lib/version.h>
29 #include "lib_errors.h"
30 #include "zlog_targets.h"
41 #include "watchfrr_errors.h"
44 #define MIN(X,Y) (((X) <= (Y)) ? (X) : (Y))
47 /* Macros to help randomize timers. */
48 #define JITTER(X) ((frr_weak_random() % ((X)+1))-((X)/2))
49 #define FUZZY(X) ((X)+JITTER((X)/20))
51 #define DEFAULT_PERIOD 5
52 #define DEFAULT_TIMEOUT 90
53 #define DEFAULT_RESTART_TIMEOUT 20
54 #define DEFAULT_LOGLEVEL LOG_INFO
55 #define DEFAULT_MIN_RESTART 60
56 #define DEFAULT_MAX_RESTART 600
57 #define DEFAULT_OPERATIONAL_TIMEOUT 60
59 #define DEFAULT_RESTART_CMD WATCHFRR_SH_PATH " restart %s"
60 #define DEFAULT_START_CMD WATCHFRR_SH_PATH " start %s"
61 #define DEFAULT_STOP_CMD WATCHFRR_SH_PATH " stop %s"
63 #define PING_TOKEN "PING"
65 DEFINE_MGROUP(WATCHFRR
, "watchfrr");
66 DEFINE_MTYPE_STATIC(WATCHFRR
, WATCHFRR_DAEMON
, "watchfrr daemon entry");
68 /* Needs to be global, referenced somewhere inside libfrr. */
69 struct thread_master
*master
;
71 static bool watch_only
= false;
72 const char *pathspace
;
79 PHASE_ZEBRA_RESTART_PENDING
,
80 PHASE_WAITING_ZEBRA_UP
83 static const char *const phase_str
[] = {
87 "Waiting for other daemons to come down",
88 "Zebra restart job running",
89 "Waiting for zebra to come up",
93 #define PHASE_TIMEOUT (3*gs.restart_timeout)
94 #define STARTUP_TIMEOUT 55 * 1000
102 struct thread
*t_kill
;
106 static struct global_state
{
107 enum restart_phase phase
;
108 struct thread
*t_phase_hanging
;
109 struct thread
*t_startup_timeout
;
110 struct thread
*t_operational
;
114 long restart_timeout
;
115 bool reading_configuration
;
116 long min_restart_interval
;
117 long max_restart_interval
;
118 long operational_timeout
;
119 struct daemon
*daemons
;
120 const char *restart_command
;
121 const char *start_command
;
122 const char *stop_command
;
123 struct restart_info restart
;
125 struct daemon
*special
; /* points to zebra when doing phased restart */
128 int numdown
; /* # of daemons that are not UP or UNRESPONSIVE */
131 .vtydir
= frr_vtydir
,
132 .period
= 1000 * DEFAULT_PERIOD
,
133 .timeout
= DEFAULT_TIMEOUT
,
134 .restart_timeout
= DEFAULT_RESTART_TIMEOUT
,
135 .loglevel
= DEFAULT_LOGLEVEL
,
136 .min_restart_interval
= DEFAULT_MIN_RESTART
,
137 .max_restart_interval
= DEFAULT_MAX_RESTART
,
138 .operational_timeout
= DEFAULT_OPERATIONAL_TIMEOUT
,
139 .restart_command
= DEFAULT_RESTART_CMD
,
140 .start_command
= DEFAULT_START_CMD
,
141 .stop_command
= DEFAULT_STOP_CMD
,
153 (((DMN)->state == DAEMON_UP) || ((DMN)->state == DAEMON_UNRESPONSIVE))
155 static const char *const state_str
[] = {
156 "Init", "Down", "Connecting", "Up", "Unresponsive",
161 enum daemon_state state
;
163 struct timeval echo_sent
;
164 unsigned int connect_tries
;
165 struct thread
*t_wakeup
;
166 struct thread
*t_read
;
167 struct thread
*t_write
;
169 struct restart_info restart
;
172 * For a given daemon, if we've turned on ignore timeouts
173 * ignore the timeout value and assume everything is ok
174 * This is for daemon debugging w/ gdb after we have started
175 * FRR and realize we have something that needs to be looked
181 #define OPTION_MINRESTART 2000
182 #define OPTION_MAXRESTART 2001
183 #define OPTION_DRY 2002
184 #define OPTION_NETNS 2003
185 #define OPTION_MAXOPERATIONAL 2004
187 static const struct option longopts
[] = {
188 {"daemon", no_argument
, NULL
, 'd'},
189 {"statedir", required_argument
, NULL
, 'S'},
190 {"loglevel", required_argument
, NULL
, 'l'},
191 {"interval", required_argument
, NULL
, 'i'},
192 {"timeout", required_argument
, NULL
, 't'},
193 {"restart-timeout", required_argument
, NULL
, 'T'},
194 {"restart", required_argument
, NULL
, 'r'},
195 {"start-command", required_argument
, NULL
, 's'},
196 {"kill-command", required_argument
, NULL
, 'k'},
197 {"dry", no_argument
, NULL
, OPTION_DRY
},
198 {"min-restart-interval", required_argument
, NULL
, OPTION_MINRESTART
},
199 {"max-restart-interval", required_argument
, NULL
, OPTION_MAXRESTART
},
200 {"operational-timeout", required_argument
, NULL
, OPTION_MAXOPERATIONAL
},
201 {"pid-file", required_argument
, NULL
, 'p'},
202 {"blank-string", required_argument
, NULL
, 'b'},
204 {"netns", optional_argument
, NULL
, OPTION_NETNS
},
206 {"help", no_argument
, NULL
, 'h'},
207 {"version", no_argument
, NULL
, 'v'},
210 static int try_connect(struct daemon
*dmn
);
211 static void wakeup_send_echo(struct thread
*t_wakeup
);
212 static void try_restart(struct daemon
*dmn
);
213 static void phase_check(void);
214 static void restart_done(struct daemon
*dmn
);
216 static const char *progname
;
218 void watchfrr_set_ignore_daemon(struct vty
*vty
, const char *dname
, bool ignore
)
222 for (dmn
= gs
.daemons
; dmn
; dmn
= dmn
->next
) {
223 if (strncmp(dmn
->name
, dname
, strlen(dmn
->name
)) == 0)
228 dmn
->ignore_timeout
= ignore
;
229 vty_out(vty
, "%s switching to %s\n", dmn
->name
,
230 ignore
? "ignore" : "watch");
232 vty_out(vty
, "%s is not configured for running at the moment",
236 static void printhelp(FILE *target
)
239 "Usage : %s [OPTION...] <daemon name> ...\n\n\
240 Watchdog program to monitor status of frr daemons and try to restart\n\
241 them if they are down or unresponsive. It determines whether a daemon is\n\
242 up based on whether it can connect to the daemon's vty unix stream socket.\n\
243 It then repeatedly sends echo commands over that socket to determine whether\n\
244 the daemon is responsive. If the daemon crashes, we will receive an EOF\n\
245 on the socket connection and know immediately that the daemon is down.\n\n\
246 The daemons to be monitored should be listed on the command line.\n\n\
247 In order to avoid attempting to restart the daemons in a fast loop,\n\
248 the -m and -M options allow you to control the minimum delay between\n\
249 restart commands. The minimum restart delay is recalculated each time\n\
250 a restart is attempted: if the time since the last restart attempt exceeds\n\
251 twice the -M value, then the restart delay is set to the -m value.\n\
252 Otherwise, the interval is doubled (but capped at the -M value).\n\n",
257 -d, --daemon Run in daemon mode. In this mode, error messages are sent\n\
258 to syslog instead of stdout.\n\
259 -S, --statedir Set the vty socket directory (default is %s)\n\
260 -N, --pathspace Insert prefix into config & socket paths\n"
262 " --netns Create and/or use Linux network namespace. If no name is\n"
263 " given, uses the value from `-N`.\n"
265 "-l, --loglevel Set the logging level (default is %d).\n\
266 The value should range from %d (LOG_EMERG) to %d (LOG_DEBUG),\n\
267 but it can be set higher than %d if extra-verbose debugging\n\
268 messages are desired.\n\
269 --min-restart-interval\n\
270 Set the minimum seconds to wait between invocations of daemon\n\
271 restart commands (default is %d).\n\
272 --max-restart-interval\n\
273 Set the maximum seconds to wait between invocations of daemon\n\
274 restart commands (default is %d).\n\
275 --operational-timeout\n\
276 Set the time before systemd is notified that we are considered\n\
277 operational again after a daemon restart (default is %d).\n\
278 -i, --interval Set the status polling interval in seconds (default is %d)\n\
279 -t, --timeout Set the unresponsiveness timeout in seconds (default is %d)\n\
280 -T, --restart-timeout\n\
281 Set the restart (kill) timeout in seconds (default is %d).\n\
282 If any background jobs are still running after this much\n\
283 time has elapsed, they will be killed.\n\
284 -r, --restart Supply a Bourne shell command to use to restart a single\n\
285 daemon. The command string should include '%%s' where the\n\
286 name of the daemon should be substituted.\n\
288 -s, --start-command\n\
289 Supply a Bourne shell to command to use to start a single\n\
290 daemon. The command string should include '%%s' where the\n\
291 name of the daemon should be substituted.\n\
293 -k, --kill-command\n\
294 Supply a Bourne shell to command to use to stop a single\n\
295 daemon. The command string should include '%%s' where the\n\
296 name of the daemon should be substituted.\n\
298 --dry Do not start or restart anything, just log.\n\
299 -p, --pid-file Set process identifier file name\n\
300 (default is %s/watchfrr.pid).\n\
301 -b, --blank-string\n\
302 When the supplied argument string is found in any of the\n\
303 various shell command arguments (-r, -s, or -k), replace\n\
304 it with a space. This is an ugly hack to circumvent problems\n\
305 passing command-line arguments with embedded spaces.\n\
306 -v, --version Print program version\n\
307 -h, --help Display this help and exit\n",
308 frr_vtydir
, DEFAULT_LOGLEVEL
, LOG_EMERG
, LOG_DEBUG
, LOG_DEBUG
,
309 DEFAULT_MIN_RESTART
, DEFAULT_MAX_RESTART
,
310 DEFAULT_OPERATIONAL_TIMEOUT
, DEFAULT_PERIOD
, DEFAULT_TIMEOUT
,
311 DEFAULT_RESTART_TIMEOUT
, DEFAULT_RESTART_CMD
, DEFAULT_START_CMD
,
312 DEFAULT_STOP_CMD
, frr_vtydir
);
315 static pid_t
run_background(char *shell_cmd
)
319 switch (child
= fork()) {
321 flog_err_sys(EC_LIB_SYSTEM_CALL
,
322 "fork failed, cannot run command [%s]: %s",
323 shell_cmd
, safe_strerror(errno
));
327 /* Use separate process group so child processes can be killed
329 if (setpgid(0, 0) < 0)
330 zlog_warn("setpgid(0,0) failed: %s",
331 safe_strerror(errno
));
335 char *const argv
[4] = {shell
, dashc
, shell_cmd
, NULL
};
336 execv("/bin/sh", argv
);
337 flog_err_sys(EC_LIB_SYSTEM_CALL
,
338 "execv(/bin/sh -c '%s') failed: %s",
339 shell_cmd
, safe_strerror(errno
));
343 /* Parent process: we will reap the child later. */
344 zlog_info("Forked background command [pid %d]: %s", (int)child
,
350 static struct timeval
*time_elapsed(struct timeval
*result
,
351 const struct timeval
*start_time
)
353 gettimeofday(result
, NULL
);
354 result
->tv_sec
-= start_time
->tv_sec
;
355 result
->tv_usec
-= start_time
->tv_usec
;
356 while (result
->tv_usec
< 0) {
357 result
->tv_usec
+= 1000000L;
363 static void restart_kill(struct thread
*t_kill
)
365 struct restart_info
*restart
= THREAD_ARG(t_kill
);
366 struct timeval delay
;
368 time_elapsed(&delay
, &restart
->time
);
370 if (gs
.reading_configuration
) {
372 "%s %s child process appears to still be reading configuration, delaying for another %lu time",
373 restart
->what
, restart
->name
, gs
.restart_timeout
);
374 thread_add_timer(master
, restart_kill
, restart
,
375 gs
.restart_timeout
, &restart
->t_kill
);
380 "%s %s child process %d still running after %ld seconds, sending signal %d",
381 restart
->what
, restart
->name
, (int)restart
->pid
,
382 (long)delay
.tv_sec
, (restart
->kills
? SIGKILL
: SIGTERM
));
383 kill(-restart
->pid
, (restart
->kills
? SIGKILL
: SIGTERM
));
385 thread_add_timer(master
, restart_kill
, restart
, gs
.restart_timeout
,
389 static struct restart_info
*find_child(pid_t child
)
392 if (gs
.restart
.pid
== child
)
395 for (dmn
= gs
.daemons
; dmn
; dmn
= dmn
->next
) {
396 if (dmn
->restart
.pid
== child
)
397 return &dmn
->restart
;
402 static void sigchild(void)
408 struct restart_info
*restart
;
411 switch (child
= waitpid(-1, &status
, WNOHANG
)) {
413 flog_err_sys(EC_LIB_SYSTEM_CALL
, "waitpid failed: %s",
414 safe_strerror(errno
));
417 zlog_warn("SIGCHLD received, but waitpid did not reap a child");
421 if (child
== integrated_write_pid
) {
422 integrated_write_sigchld(status
);
426 if ((restart
= find_child(child
)) != NULL
) {
427 name
= restart
->name
;
428 what
= restart
->what
;
431 thread_cancel(&restart
->t_kill
);
433 /* Update restart time to reflect the time the command
435 gettimeofday(&restart
->time
, NULL
);
439 "waitpid returned status for an unknown child process %d",
444 if (WIFSTOPPED(status
))
445 zlog_warn("%s %s process %d is stopped", what
, name
,
447 else if (WIFSIGNALED(status
))
448 zlog_warn("%s %s process %d terminated due to signal %d", what
,
449 name
, (int)child
, WTERMSIG(status
));
450 else if (WIFEXITED(status
)) {
451 if (WEXITSTATUS(status
) != 0)
453 "%s %s process %d exited with non-zero status %d",
454 what
, name
, (int)child
, WEXITSTATUS(status
));
456 zlog_debug("%s %s process %d exited normally", what
,
459 if (restart
&& restart
!= &gs
.restart
) {
460 dmn
= container_of(restart
, struct daemon
,
464 for (dmn
= gs
.daemons
; dmn
; dmn
= dmn
->next
)
470 "cannot interpret %s %s process %d wait status 0x%x",
471 what
, name
, (int)child
, status
);
475 static int run_job(struct restart_info
*restart
, const char *cmdtype
,
476 const char *command
, int force
, int update_interval
)
478 struct timeval delay
;
480 if (gs
.loglevel
> LOG_DEBUG
+ 1)
481 zlog_debug("attempting to %s %s", cmdtype
, restart
->name
);
484 if (gs
.loglevel
> LOG_DEBUG
+ 1)
486 "cannot %s %s, previous pid %d still running",
487 cmdtype
, restart
->name
, (int)restart
->pid
);
493 snprintf(buffer
, sizeof(buffer
), "restarting %s", restart
->name
);
494 systemd_send_status(buffer
);
496 /* Note: time_elapsed test must come before the force test, since we
498 to make sure that delay is initialized for use below in updating the
500 if ((time_elapsed(&delay
, &restart
->time
)->tv_sec
< restart
->interval
)
503 if (gs
.loglevel
> LOG_DEBUG
+ 1)
505 "postponing %s %s: elapsed time %ld < retry interval %ld",
506 cmdtype
, restart
->name
, (long)delay
.tv_sec
,
511 gettimeofday(&restart
->time
, NULL
);
514 char cmd
[strlen(command
) + strlen(restart
->name
) + 1];
515 snprintf(cmd
, sizeof(cmd
), command
, restart
->name
);
516 if ((restart
->pid
= run_background(cmd
)) > 0) {
517 thread_add_timer(master
, restart_kill
, restart
,
518 gs
.restart_timeout
, &restart
->t_kill
);
519 restart
->what
= cmdtype
;
525 /* Calculate the new restart interval. */
526 if (update_interval
) {
527 if (delay
.tv_sec
> 2 * gs
.max_restart_interval
)
528 restart
->interval
= gs
.min_restart_interval
;
529 else if ((restart
->interval
*= 2) > gs
.max_restart_interval
)
530 restart
->interval
= gs
.max_restart_interval
;
531 if (gs
.loglevel
> LOG_DEBUG
+ 1)
532 zlog_debug("restart %s interval is now %ld",
533 restart
->name
, restart
->interval
);
538 #define SET_READ_HANDLER(DMN) \
540 (DMN)->t_read = NULL; \
541 thread_add_read(master, handle_read, (DMN), (DMN)->fd, \
545 #define SET_WAKEUP_DOWN(DMN) \
547 (DMN)->t_wakeup = NULL; \
548 thread_add_timer_msec(master, wakeup_down, (DMN), \
549 FUZZY(gs.period), &(DMN)->t_wakeup); \
552 #define SET_WAKEUP_UNRESPONSIVE(DMN) \
554 (DMN)->t_wakeup = NULL; \
555 thread_add_timer_msec(master, wakeup_unresponsive, (DMN), \
556 FUZZY(gs.period), &(DMN)->t_wakeup); \
559 #define SET_WAKEUP_ECHO(DMN) \
561 (DMN)->t_wakeup = NULL; \
562 thread_add_timer_msec(master, wakeup_send_echo, (DMN), \
563 FUZZY(gs.period), &(DMN)->t_wakeup); \
566 static void wakeup_down(struct thread
*t_wakeup
)
568 struct daemon
*dmn
= THREAD_ARG(t_wakeup
);
570 dmn
->t_wakeup
= NULL
;
571 if (try_connect(dmn
) < 0)
572 SET_WAKEUP_DOWN(dmn
);
573 if ((dmn
->connect_tries
> 1) && (dmn
->state
!= DAEMON_UP
))
577 static void wakeup_init(struct thread
*t_wakeup
)
579 struct daemon
*dmn
= THREAD_ARG(t_wakeup
);
581 dmn
->t_wakeup
= NULL
;
582 if (try_connect(dmn
) < 0) {
584 "%s state -> down : initial connection attempt failed",
586 dmn
->state
= DAEMON_DOWN
;
591 static void restart_done(struct daemon
*dmn
)
593 if (dmn
->state
!= DAEMON_DOWN
) {
595 "Daemon: %s: is in %s state but expected it to be in DAEMON_DOWN state",
596 dmn
->name
, state_str
[dmn
->state
]);
599 THREAD_OFF(dmn
->t_wakeup
);
601 if (try_connect(dmn
) < 0)
602 SET_WAKEUP_DOWN(dmn
);
605 static void daemon_restarting_operational(struct thread
*thread
)
607 systemd_send_status("FRR Operational");
610 static void daemon_down(struct daemon
*dmn
, const char *why
)
612 if (IS_UP(dmn
) || (dmn
->state
== DAEMON_INIT
))
613 flog_err(EC_WATCHFRR_CONNECTION
, "%s state -> down : %s",
615 else if (gs
.loglevel
> LOG_DEBUG
)
616 zlog_debug("%s still down : %s", dmn
->name
, why
);
619 dmn
->state
= DAEMON_DOWN
;
624 THREAD_OFF(dmn
->t_read
);
625 THREAD_OFF(dmn
->t_write
);
626 THREAD_OFF(dmn
->t_wakeup
);
627 if (try_connect(dmn
) < 0)
628 SET_WAKEUP_DOWN(dmn
);
630 systemd_send_status("FRR partially operational");
634 static void handle_read(struct thread
*t_read
)
636 struct daemon
*dmn
= THREAD_ARG(t_read
);
637 static const char resp
[sizeof(PING_TOKEN
) + 4] = PING_TOKEN
"\n";
638 char buf
[sizeof(resp
) + 100];
640 struct timeval delay
;
643 if ((rc
= read(dmn
->fd
, buf
, sizeof(buf
))) < 0) {
646 if (ERRNO_IO_RETRY(errno
)) {
647 /* Pretend it never happened. */
648 SET_READ_HANDLER(dmn
);
651 snprintf(why
, sizeof(why
), "unexpected read error: %s",
652 safe_strerror(errno
));
653 daemon_down(dmn
, why
);
657 daemon_down(dmn
, "read returned EOF");
660 if (!dmn
->echo_sent
.tv_sec
) {
661 char why
[sizeof(buf
) + 100];
662 snprintf(why
, sizeof(why
),
663 "unexpected read returns %d bytes: %.*s", (int)rc
,
665 daemon_down(dmn
, why
);
669 /* We are expecting an echo response: is there any chance that the
670 response would not be returned entirely in the first read? That
671 seems inconceivable... */
672 if ((rc
!= sizeof(resp
)) || memcmp(buf
, resp
, sizeof(resp
))) {
673 char why
[100 + sizeof(buf
)];
674 snprintf(why
, sizeof(why
),
675 "read returned bad echo response of %d bytes (expecting %u): %.*s",
676 (int)rc
, (unsigned int)sizeof(resp
), (int)rc
, buf
);
677 daemon_down(dmn
, why
);
681 time_elapsed(&delay
, &dmn
->echo_sent
);
682 dmn
->echo_sent
.tv_sec
= 0;
683 if (dmn
->state
== DAEMON_UNRESPONSIVE
) {
684 if (delay
.tv_sec
< gs
.timeout
) {
685 dmn
->state
= DAEMON_UP
;
687 "%s state -> up : echo response received after %ld.%06ld seconds",
688 dmn
->name
, (long)delay
.tv_sec
,
689 (long)delay
.tv_usec
);
692 "%s: slow echo response finally received after %ld.%06ld seconds",
693 dmn
->name
, (long)delay
.tv_sec
,
694 (long)delay
.tv_usec
);
695 } else if (gs
.loglevel
> LOG_DEBUG
+ 1)
696 zlog_debug("%s: echo response received after %ld.%06ld seconds",
697 dmn
->name
, (long)delay
.tv_sec
, (long)delay
.tv_usec
);
699 SET_READ_HANDLER(dmn
);
700 thread_cancel(&dmn
->t_wakeup
);
701 SET_WAKEUP_ECHO(dmn
);
705 * Wait till we notice that all daemons are ready before
706 * we send we are ready to systemd
708 static void daemon_send_ready(int exitcode
)
718 zlog_notice("all daemons up, doing startup-complete notify");
719 else if (gs
.numdown
< gs
.numdaemons
)
720 flog_err(EC_WATCHFRR_CONNECTION
,
721 "startup did not complete within timeout (%d/%d daemons running)",
722 gs
.numdaemons
- gs
.numdown
, gs
.numdaemons
);
724 flog_err(EC_WATCHFRR_CONNECTION
,
725 "all configured daemons failed to start -- exiting watchfrr");
732 snprintf(started
, sizeof(started
), "%s/%s", frr_vtydir
,
734 fp
= fopen(started
, "w");
738 systemd_send_started(master
);
739 systemd_send_status("FRR Operational");
743 static void daemon_up(struct daemon
*dmn
, const char *why
)
745 dmn
->state
= DAEMON_UP
;
747 dmn
->connect_tries
= 0;
748 zlog_notice("%s state -> up : %s", dmn
->name
, why
);
749 if (gs
.numdown
== 0) {
750 daemon_send_ready(0);
752 THREAD_OFF(gs
.t_operational
);
754 thread_add_timer(master
, daemon_restarting_operational
, NULL
,
755 gs
.operational_timeout
, &gs
.t_operational
);
758 SET_WAKEUP_ECHO(dmn
);
762 static void check_connect(struct thread
*t_write
)
764 struct daemon
*dmn
= THREAD_ARG(t_write
);
766 socklen_t reslen
= sizeof(sockerr
);
769 if (getsockopt(dmn
->fd
, SOL_SOCKET
, SO_ERROR
, (char *)&sockerr
, &reslen
)
771 zlog_warn("%s: check_connect: getsockopt failed: %s", dmn
->name
,
772 safe_strerror(errno
));
774 "getsockopt failed checking connection success");
777 if ((reslen
== sizeof(sockerr
)) && sockerr
) {
781 "getsockopt reports that connection attempt failed: %s",
782 safe_strerror(sockerr
));
783 daemon_down(dmn
, why
);
787 daemon_up(dmn
, "delayed connect succeeded");
790 static void wakeup_connect_hanging(struct thread
*t_wakeup
)
792 struct daemon
*dmn
= THREAD_ARG(t_wakeup
);
795 dmn
->t_wakeup
= NULL
;
796 snprintf(why
, sizeof(why
),
797 "connection attempt timed out after %ld seconds", gs
.timeout
);
798 daemon_down(dmn
, why
);
801 /* Making connection to protocol daemon. */
802 static int try_connect(struct daemon
*dmn
)
805 struct sockaddr_un addr
;
808 if (gs
.loglevel
> LOG_DEBUG
+ 1)
809 zlog_debug("%s: attempting to connect", dmn
->name
);
810 dmn
->connect_tries
++;
812 memset(&addr
, 0, sizeof(addr
));
813 addr
.sun_family
= AF_UNIX
;
814 snprintf(addr
.sun_path
, sizeof(addr
.sun_path
), "%s/%s.vty", gs
.vtydir
,
816 #ifdef HAVE_STRUCT_SOCKADDR_UN_SUN_LEN
817 len
= addr
.sun_len
= SUN_LEN(&addr
);
819 len
= sizeof(addr
.sun_family
) + strlen(addr
.sun_path
);
820 #endif /* HAVE_STRUCT_SOCKADDR_UN_SUN_LEN */
822 /* Quick check to see if we might succeed before we go to the trouble
823 of creating a socket. */
824 if (access(addr
.sun_path
, W_OK
) < 0) {
826 flog_err_sys(EC_LIB_SYSTEM_CALL
,
827 "%s: access to socket %s denied: %s",
828 dmn
->name
, addr
.sun_path
,
829 safe_strerror(errno
));
833 if ((sock
= socket(AF_UNIX
, SOCK_STREAM
, 0)) < 0) {
834 flog_err_sys(EC_LIB_SOCKET
, "%s(%s): cannot make socket: %s",
835 __func__
, addr
.sun_path
, safe_strerror(errno
));
839 if (set_nonblocking(sock
) < 0 || set_cloexec(sock
) < 0) {
840 flog_err_sys(EC_LIB_SYSTEM_CALL
,
841 "%s(%s): set_nonblocking/cloexec(%d) failed",
842 __func__
, addr
.sun_path
, sock
);
847 if (connect(sock
, (struct sockaddr
*)&addr
, len
) < 0) {
848 if ((errno
!= EINPROGRESS
) && (errno
!= EWOULDBLOCK
)) {
849 if (gs
.loglevel
> LOG_DEBUG
)
850 zlog_debug("%s(%s): connect failed: %s",
851 __func__
, addr
.sun_path
,
852 safe_strerror(errno
));
856 if (gs
.loglevel
> LOG_DEBUG
)
857 zlog_debug("%s: connection in progress", dmn
->name
);
858 dmn
->state
= DAEMON_CONNECTING
;
860 thread_add_write(master
, check_connect
, dmn
, dmn
->fd
,
862 thread_add_timer(master
, wakeup_connect_hanging
, dmn
,
863 gs
.timeout
, &dmn
->t_wakeup
);
864 SET_READ_HANDLER(dmn
);
869 SET_READ_HANDLER(dmn
);
870 daemon_up(dmn
, "connect succeeded");
874 static void phase_hanging(struct thread
*t_hanging
)
876 gs
.t_phase_hanging
= NULL
;
877 flog_err(EC_WATCHFRR_CONNECTION
,
878 "Phase [%s] hanging for %ld seconds, aborting phased restart",
879 phase_str
[gs
.phase
], PHASE_TIMEOUT
);
880 gs
.phase
= PHASE_NONE
;
883 static void set_phase(enum restart_phase new_phase
)
885 gs
.phase
= new_phase
;
886 thread_cancel(&gs
.t_phase_hanging
);
888 thread_add_timer(master
, phase_hanging
, NULL
, PHASE_TIMEOUT
,
889 &gs
.t_phase_hanging
);
892 static void phase_check(void)
901 for (dmn
= gs
.daemons
; dmn
; dmn
= dmn
->next
)
902 if (dmn
->state
== DAEMON_INIT
)
905 /* startup complete, everything out of INIT */
906 gs
.phase
= PHASE_NONE
;
907 for (dmn
= gs
.daemons
; dmn
; dmn
= dmn
->next
)
908 if (dmn
->state
== DAEMON_DOWN
) {
909 SET_WAKEUP_DOWN(dmn
);
913 case PHASE_STOPS_PENDING
:
917 "Phased restart: all routing daemon stop jobs have completed.");
918 set_phase(PHASE_WAITING_DOWN
);
921 case PHASE_WAITING_DOWN
:
922 if (gs
.numdown
+ IS_UP(gs
.special
) < gs
.numdaemons
)
924 systemd_send_status("Phased Restart");
925 zlog_info("Phased restart: all routing daemons now down.");
926 run_job(&gs
.special
->restart
, "restart", gs
.restart_command
, 1,
928 set_phase(PHASE_ZEBRA_RESTART_PENDING
);
931 case PHASE_ZEBRA_RESTART_PENDING
:
932 if (gs
.special
->restart
.pid
)
934 systemd_send_status("Zebra Restarting");
935 zlog_info("Phased restart: %s restart job completed.",
937 set_phase(PHASE_WAITING_ZEBRA_UP
);
940 case PHASE_WAITING_ZEBRA_UP
:
941 if (!IS_UP(gs
.special
))
943 zlog_info("Phased restart: %s is now up.", gs
.special
->name
);
946 for (dmn
= gs
.daemons
; dmn
; dmn
= dmn
->next
) {
947 if (dmn
!= gs
.special
)
948 run_job(&dmn
->restart
, "start",
949 gs
.start_command
, 1, 0);
952 gs
.phase
= PHASE_NONE
;
953 THREAD_OFF(gs
.t_phase_hanging
);
954 zlog_notice("Phased global restart has completed.");
959 static void try_restart(struct daemon
*dmn
)
964 if (dmn
!= gs
.special
) {
965 if ((gs
.special
->state
== DAEMON_UP
)
966 && (gs
.phase
== PHASE_NONE
))
967 run_job(&dmn
->restart
, "restart", gs
.restart_command
, 0,
971 "%s: postponing restart attempt because master %s daemon not up [%s], or phased restart in progress",
972 dmn
->name
, gs
.special
->name
,
973 state_str
[gs
.special
->state
]);
977 if ((gs
.phase
!= PHASE_NONE
) || gs
.numpids
) {
978 if (gs
.loglevel
> LOG_DEBUG
+ 1)
980 "postponing phased global restart: restart already in progress [%s], or outstanding child processes [%d]",
981 phase_str
[gs
.phase
], gs
.numpids
);
984 /* Is it too soon for a restart? */
986 struct timeval delay
;
987 if (time_elapsed(&delay
, &gs
.special
->restart
.time
)->tv_sec
988 < gs
.special
->restart
.interval
) {
989 if (gs
.loglevel
> LOG_DEBUG
+ 1)
991 "postponing phased global restart: elapsed time %ld < retry interval %ld",
993 gs
.special
->restart
.interval
);
997 run_job(&gs
.restart
, "restart", gs
.restart_command
, 0, 1);
1000 static void wakeup_unresponsive(struct thread
*t_wakeup
)
1002 struct daemon
*dmn
= THREAD_ARG(t_wakeup
);
1004 dmn
->t_wakeup
= NULL
;
1005 if (dmn
->state
!= DAEMON_UNRESPONSIVE
)
1006 flog_err(EC_WATCHFRR_CONNECTION
,
1007 "%s: no longer unresponsive (now %s), wakeup should have been cancelled!",
1008 dmn
->name
, state_str
[dmn
->state
]);
1010 SET_WAKEUP_UNRESPONSIVE(dmn
);
1015 static void wakeup_no_answer(struct thread
*t_wakeup
)
1017 struct daemon
*dmn
= THREAD_ARG(t_wakeup
);
1019 dmn
->t_wakeup
= NULL
;
1020 dmn
->state
= DAEMON_UNRESPONSIVE
;
1021 if (dmn
->ignore_timeout
)
1023 flog_err(EC_WATCHFRR_CONNECTION
,
1024 "%s state -> unresponsive : no response yet to ping sent %ld seconds ago",
1025 dmn
->name
, gs
.timeout
);
1026 SET_WAKEUP_UNRESPONSIVE(dmn
);
1030 static void wakeup_send_echo(struct thread
*t_wakeup
)
1032 static const char echocmd
[] = "echo " PING_TOKEN
;
1034 struct daemon
*dmn
= THREAD_ARG(t_wakeup
);
1036 dmn
->t_wakeup
= NULL
;
1037 if (((rc
= write(dmn
->fd
, echocmd
, sizeof(echocmd
))) < 0)
1038 || ((size_t)rc
!= sizeof(echocmd
))) {
1039 char why
[100 + sizeof(echocmd
)];
1040 snprintf(why
, sizeof(why
),
1041 "write '%s' returned %d instead of %u", echocmd
,
1042 (int)rc
, (unsigned int)sizeof(echocmd
));
1043 daemon_down(dmn
, why
);
1045 gettimeofday(&dmn
->echo_sent
, NULL
);
1046 thread_add_timer(master
, wakeup_no_answer
, dmn
, gs
.timeout
,
1051 bool check_all_up(void)
1055 for (dmn
= gs
.daemons
; dmn
; dmn
= dmn
->next
)
1056 if (dmn
->state
!= DAEMON_UP
)
1061 void watchfrr_status(struct vty
*vty
)
1064 struct timeval delay
;
1066 vty_out(vty
, "watchfrr global phase: %s\n", phase_str
[gs
.phase
]);
1067 vty_out(vty
, " Restart Command: %pSQq\n", gs
.restart_command
);
1068 vty_out(vty
, " Start Command: %pSQq\n", gs
.start_command
);
1069 vty_out(vty
, " Stop Command: %pSQq\n", gs
.stop_command
);
1070 vty_out(vty
, " Min Restart Interval: %ld\n", gs
.min_restart_interval
);
1071 vty_out(vty
, " Max Restart Interval: %ld\n", gs
.max_restart_interval
);
1072 vty_out(vty
, " Restart Timeout: %ld\n", gs
.restart_timeout
);
1073 vty_out(vty
, " Reading Configuration: %s\n",
1074 gs
.reading_configuration
? "yes" : "no");
1076 vty_out(vty
, " global restart running, pid %ld\n",
1077 (long)gs
.restart
.pid
);
1079 for (dmn
= gs
.daemons
; dmn
; dmn
= dmn
->next
) {
1080 vty_out(vty
, " %-20s %s%s", dmn
->name
, state_str
[dmn
->state
],
1081 dmn
->ignore_timeout
? "/Ignoring Timeout\n" : "\n");
1082 if (dmn
->restart
.pid
)
1083 vty_out(vty
, " restart running, pid %ld\n",
1084 (long)dmn
->restart
.pid
);
1085 else if (dmn
->state
== DAEMON_DOWN
&&
1086 time_elapsed(&delay
, &dmn
->restart
.time
)->tv_sec
1087 < dmn
->restart
.interval
)
1088 vty_out(vty
, " restarting in %jd seconds (%jds backoff interval)\n",
1089 (intmax_t)dmn
->restart
.interval
1090 - (intmax_t)delay
.tv_sec
,
1091 (intmax_t)dmn
->restart
.interval
);
1095 static void sigint(void)
1097 zlog_notice("Terminating on signal");
1098 systemd_send_stopping();
1102 static int valid_command(const char *cmd
)
1109 return ((p
= strchr(cmd
, '%')) != NULL
) && (*(p
+ 1) == 's')
1110 && !strchr(p
+ 1, '%');
1113 /* This is an ugly hack to circumvent problems with passing command-line
1114 arguments that contain spaces. The fix is to use a configuration file. */
1115 static char *translate_blanks(const char *cmd
, const char *blankstr
)
1119 size_t bslen
= strlen(blankstr
);
1121 if (!(res
= strdup(cmd
))) {
1125 while ((p
= strstr(res
, blankstr
)) != NULL
) {
1128 memmove(p
+ 1, p
+ bslen
, strlen(p
+ bslen
) + 1);
1133 static void startup_timeout(struct thread
*t_wakeup
)
1135 daemon_send_ready(1);
1140 #include <sys/mount.h>
1143 #define NETNS_RUN_DIR "/var/run/netns"
1145 static void netns_create(int dirfd
, const char *nsname
)
1147 /* make /var/run/netns shared between mount namespaces
1148 * just like iproute2 sets it up
1150 if (mount("", NETNS_RUN_DIR
, "none", MS_SHARED
| MS_REC
, NULL
)) {
1151 if (errno
!= EINVAL
) {
1156 if (mount(NETNS_RUN_DIR
, NETNS_RUN_DIR
, "none",
1157 MS_BIND
| MS_REC
, NULL
)) {
1162 if (mount("", NETNS_RUN_DIR
, "none", MS_SHARED
| MS_REC
,
1169 /* need an empty file to mount on top of */
1170 int nsfd
= openat(dirfd
, nsname
, O_CREAT
| O_RDONLY
| O_EXCL
, 0);
1173 fprintf(stderr
, "failed to create \"%s/%s\": %s\n",
1174 NETNS_RUN_DIR
, nsname
, strerror(errno
));
1179 if (unshare(CLONE_NEWNET
)) {
1181 unlinkat(dirfd
, nsname
, 0);
1185 char *dstpath
= asprintfrr(MTYPE_TMP
, "%s/%s", NETNS_RUN_DIR
, nsname
);
1187 /* bind-mount so the namespace has a name and is persistent */
1188 if (mount("/proc/self/ns/net", dstpath
, "none", MS_BIND
, NULL
) < 0) {
1189 fprintf(stderr
, "failed to bind-mount netns to \"%s\": %s\n",
1190 dstpath
, strerror(errno
));
1191 unlinkat(dirfd
, nsname
, 0);
1195 XFREE(MTYPE_TMP
, dstpath
);
1198 static void netns_setup(const char *nsname
)
1202 dirfd
= open(NETNS_RUN_DIR
, O_DIRECTORY
| O_RDONLY
);
1204 if (errno
== ENOTDIR
) {
1205 fprintf(stderr
, "error: \"%s\" is not a directory!\n",
1208 } else if (errno
== ENOENT
) {
1209 if (mkdir(NETNS_RUN_DIR
, 0755)) {
1210 fprintf(stderr
, "error: \"%s\": mkdir: %s\n",
1211 NETNS_RUN_DIR
, strerror(errno
));
1214 dirfd
= open(NETNS_RUN_DIR
, O_DIRECTORY
| O_RDONLY
);
1216 fprintf(stderr
, "error: \"%s\": opendir: %s\n",
1217 NETNS_RUN_DIR
, strerror(errno
));
1221 fprintf(stderr
, "error: \"%s\": %s\n",
1222 NETNS_RUN_DIR
, strerror(errno
));
1227 nsfd
= openat(dirfd
, nsname
, O_RDONLY
);
1228 if (nsfd
< 0 && errno
!= ENOENT
) {
1229 fprintf(stderr
, "error: \"%s/%s\": %s\n",
1230 NETNS_RUN_DIR
, nsname
, strerror(errno
));
1234 netns_create(dirfd
, nsname
);
1236 if (setns(nsfd
, CLONE_NEWNET
)) {
1244 /* make sure loopback is up... weird things happen otherwise.
1245 * ioctl is perfectly fine for this, don't need netlink...
1248 struct ifreq ifr
= { };
1250 strlcpy(ifr
.ifr_name
, "lo", sizeof(ifr
.ifr_name
));
1252 sockfd
= socket(AF_INET
, SOCK_DGRAM
, 0);
1257 if (ioctl(sockfd
, SIOCGIFFLAGS
, &ifr
)) {
1258 perror("ioctl(SIOCGIFFLAGS, \"lo\")");
1261 if (!(ifr
.ifr_flags
& IFF_UP
)) {
1262 ifr
.ifr_flags
|= IFF_UP
;
1263 if (ioctl(sockfd
, SIOCSIFFLAGS
, &ifr
)) {
1264 perror("ioctl(SIOCSIFFLAGS, \"lo\")");
1271 #else /* !GNU_LINUX */
1273 static void netns_setup(const char *nsname
)
1275 fprintf(stderr
, "network namespaces are only available on Linux\n");
1280 static void watchfrr_start_config(void)
1282 gs
.reading_configuration
= true;
1285 static void watchfrr_end_config(void)
1287 gs
.reading_configuration
= false;
1290 static void watchfrr_init(int argc
, char **argv
)
1292 const char *special
= "zebra";
1294 struct daemon
*dmn
, **add
= &gs
.daemons
;
1295 char alldaemons
[512] = "", *p
= alldaemons
;
1297 thread_add_timer_msec(master
, startup_timeout
, NULL
, STARTUP_TIMEOUT
,
1298 &gs
.t_startup_timeout
);
1300 for (i
= optind
; i
< argc
; i
++) {
1301 dmn
= XCALLOC(MTYPE_WATCHFRR_DAEMON
, sizeof(*dmn
));
1303 dmn
->name
= dmn
->restart
.name
= argv
[i
];
1304 dmn
->state
= DAEMON_INIT
;
1308 thread_add_timer_msec(master
, wakeup_init
, dmn
, 0,
1310 dmn
->restart
.interval
= gs
.min_restart_interval
;
1314 if (!strcmp(dmn
->name
, special
))
1320 "Must specify one or more daemons to monitor.\n\n");
1323 if (!watch_only
&& !gs
.special
) {
1324 fprintf(stderr
, "\"%s\" daemon must be in daemon lists\n\n",
1329 for (dmn
= gs
.daemons
; dmn
; dmn
= dmn
->next
) {
1330 snprintf(p
, alldaemons
+ sizeof(alldaemons
) - p
, "%s%s",
1331 (p
== alldaemons
) ? "" : " ", dmn
->name
);
1334 zlog_notice("%s %s watching [%s]%s", progname
, FRR_VERSION
, alldaemons
,
1335 watch_only
? ", monitor mode" : "");
1338 struct zebra_privs_t watchfrr_privs
= {
1340 .vty_group
= VTY_GROUP
,
1344 static struct frr_signal_t watchfrr_signals
[] = {
1355 .handler
= sigchild
,
1359 FRR_DAEMON_INFO(watchfrr
, WATCHFRR
,
1360 .flags
= FRR_NO_PRIVSEP
| FRR_NO_TCPVTY
| FRR_LIMITED_CLI
1361 | FRR_NO_CFG_PID_DRY
| FRR_NO_ZCLIENT
1364 .printhelp
= printhelp
,
1365 .copyright
= "Copyright 2004 Andrew J. Schorr",
1367 .signals
= watchfrr_signals
,
1368 .n_signals
= array_size(watchfrr_signals
),
1370 .privs
= &watchfrr_privs
,
1373 #define DEPRECATED_OPTIONS "aAezR:"
1375 int main(int argc
, char **argv
)
1378 const char *blankstr
= NULL
;
1379 const char *netns
= NULL
;
1380 bool netns_en
= false;
1382 frr_preinit(&watchfrr_di
, argc
, argv
);
1383 progname
= watchfrr_di
.progname
;
1385 frr_opt_add("b:di:k:l:N:p:r:S:s:t:T:" DEPRECATED_OPTIONS
, longopts
, "");
1387 gs
.restart
.name
= "all";
1388 while ((opt
= frr_getopt(argc
, argv
, NULL
)) != EOF
) {
1389 if (opt
&& opt
< 128 && strchr(DEPRECATED_OPTIONS
, opt
)) {
1391 "The -%c option no longer exists.\n"
1392 "Please refer to the watchfrr(8) man page.\n",
1407 if (!valid_command(optarg
)) {
1409 "Invalid kill command, must contain '%%s': %s\n",
1413 gs
.stop_command
= optarg
;
1417 if ((sscanf(optarg
, "%d%1s", &gs
.loglevel
, garbage
)
1419 || (gs
.loglevel
< LOG_EMERG
)) {
1421 "Invalid loglevel argument: %s\n",
1426 case OPTION_MINRESTART
: {
1428 if ((sscanf(optarg
, "%ld%1s", &gs
.min_restart_interval
,
1431 || (gs
.min_restart_interval
< 0)) {
1433 "Invalid min_restart_interval argument: %s\n",
1438 case OPTION_MAXRESTART
: {
1440 if ((sscanf(optarg
, "%ld%1s", &gs
.max_restart_interval
,
1443 || (gs
.max_restart_interval
< 0)) {
1445 "Invalid max_restart_interval argument: %s\n",
1450 case OPTION_MAXOPERATIONAL
: {
1453 if ((sscanf(optarg
, "%ld%1s", &gs
.operational_timeout
,
1455 (gs
.operational_timeout
< 0)) {
1457 "Invalid Operational_timeout argument: %s\n",
1464 if (optarg
&& strchr(optarg
, '/')) {
1466 "invalid network namespace name \"%s\" (may not contain slashes)\n",
1475 if ((sscanf(optarg
, "%d%1s", &period
, garbage
) != 1)
1476 || (gs
.period
< 1)) {
1478 "Invalid interval argument: %s\n",
1482 gs
.period
= 1000 * period
;
1485 watchfrr_di
.pid_file
= optarg
;
1488 if (!valid_command(optarg
)) {
1490 "Invalid restart command, must contain '%%s': %s\n",
1494 gs
.restart_command
= optarg
;
1497 if (!valid_command(optarg
)) {
1499 "Invalid start command, must contain '%%s': %s\n",
1503 gs
.start_command
= optarg
;
1510 if ((sscanf(optarg
, "%ld%1s", &gs
.timeout
, garbage
)
1512 || (gs
.timeout
< 1)) {
1514 "Invalid timeout argument: %s\n",
1521 if ((sscanf(optarg
, "%ld%1s", &gs
.restart_timeout
,
1524 || (gs
.restart_timeout
< 1)) {
1526 "Invalid restart timeout argument: %s\n",
1532 fputs("Invalid option.\n", stderr
);
1538 && (gs
.start_command
|| gs
.stop_command
|| gs
.restart_command
)) {
1539 fputs("Options -r/-s/-k are not used when --dry is active.\n",
1543 && (!gs
.restart_command
|| !gs
.start_command
|| !gs
.stop_command
)) {
1545 "Options -s (start), -k (kill), and -r (restart) are required.\n");
1550 if (gs
.restart_command
)
1551 gs
.restart_command
=
1552 translate_blanks(gs
.restart_command
, blankstr
);
1553 if (gs
.start_command
)
1555 translate_blanks(gs
.start_command
, blankstr
);
1556 if (gs
.stop_command
)
1558 translate_blanks(gs
.stop_command
, blankstr
);
1561 gs
.restart
.interval
= gs
.min_restart_interval
;
1563 /* env variable for the processes that we start */
1564 if (watchfrr_di
.pathspace
)
1565 setenv("FRR_PATHSPACE", watchfrr_di
.pathspace
, 1);
1567 unsetenv("FRR_PATHSPACE");
1570 * when watchfrr_di.pathspace is read, if it is not specified
1571 * pathspace is NULL as expected
1573 pathspace
= watchfrr_di
.pathspace
;
1575 if (netns_en
&& !netns
)
1576 netns
= watchfrr_di
.pathspace
;
1578 if (netns_en
&& netns
&& netns
[0])
1581 master
= frr_init();
1582 watchfrr_error_init();
1583 watchfrr_init(argc
, argv
);
1584 cmd_init_config_callbacks(watchfrr_start_config
, watchfrr_end_config
);
1585 watchfrr_vty_init();
1589 if (watchfrr_di
.daemon_mode
)
1590 zlog_syslog_set_prio_min(MIN(gs
.loglevel
, LOG_DEBUG
));
1592 zlog_aux_init(NULL
, MIN(gs
.loglevel
, LOG_DEBUG
));
1596 systemd_send_stopping();