2 * Monitor status of frr daemons and restart if necessary.
4 * Copyright (C) 2004 Andrew J. Schorr
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License along
17 * with this program; see the file COPYING; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
26 #include <lib/version.h>
29 #include "lib_errors.h"
30 #include "zlog_targets.h"
41 #include "watchfrr_errors.h"
44 #define MIN(X,Y) (((X) <= (Y)) ? (X) : (Y))
47 /* Macros to help randomize timers. */
48 #define JITTER(X) ((frr_weak_random() % ((X)+1))-((X)/2))
49 #define FUZZY(X) ((X)+JITTER((X)/20))
51 #define DEFAULT_PERIOD 5
52 #define DEFAULT_TIMEOUT 90
53 #define DEFAULT_RESTART_TIMEOUT 20
54 #define DEFAULT_LOGLEVEL LOG_INFO
55 #define DEFAULT_MIN_RESTART 60
56 #define DEFAULT_MAX_RESTART 600
58 #define DEFAULT_RESTART_CMD WATCHFRR_SH_PATH " restart %s"
59 #define DEFAULT_START_CMD WATCHFRR_SH_PATH " start %s"
60 #define DEFAULT_STOP_CMD WATCHFRR_SH_PATH " stop %s"
62 #define PING_TOKEN "PING"
64 DEFINE_MGROUP(WATCHFRR
, "watchfrr")
65 DEFINE_MTYPE_STATIC(WATCHFRR
, WATCHFRR_DAEMON
, "watchfrr daemon entry")
67 /* Needs to be global, referenced somewhere inside libfrr. */
68 struct thread_master
*master
;
70 static bool watch_only
= false;
77 PHASE_ZEBRA_RESTART_PENDING
,
78 PHASE_WAITING_ZEBRA_UP
81 static const char *const phase_str
[] = {
85 "Waiting for other daemons to come down",
86 "Zebra restart job running",
87 "Waiting for zebra to come up",
91 #define PHASE_TIMEOUT (3*gs.restart_timeout)
92 #define STARTUP_TIMEOUT 55 * 1000
100 struct thread
*t_kill
;
104 static struct global_state
{
105 restart_phase_t phase
;
106 struct thread
*t_phase_hanging
;
107 struct thread
*t_startup_timeout
;
111 long restart_timeout
;
112 long min_restart_interval
;
113 long max_restart_interval
;
114 struct daemon
*daemons
;
115 const char *restart_command
;
116 const char *start_command
;
117 const char *stop_command
;
118 struct restart_info restart
;
120 struct daemon
*special
; /* points to zebra when doing phased restart */
123 int numdown
; /* # of daemons that are not UP or UNRESPONSIVE */
126 .vtydir
= frr_vtydir
,
127 .period
= 1000 * DEFAULT_PERIOD
,
128 .timeout
= DEFAULT_TIMEOUT
,
129 .restart_timeout
= DEFAULT_RESTART_TIMEOUT
,
130 .loglevel
= DEFAULT_LOGLEVEL
,
131 .min_restart_interval
= DEFAULT_MIN_RESTART
,
132 .max_restart_interval
= DEFAULT_MAX_RESTART
,
133 .restart_command
= DEFAULT_RESTART_CMD
,
134 .start_command
= DEFAULT_START_CMD
,
135 .stop_command
= DEFAULT_STOP_CMD
,
147 (((DMN)->state == DAEMON_UP) || ((DMN)->state == DAEMON_UNRESPONSIVE))
149 static const char *const state_str
[] = {
150 "Init", "Down", "Connecting", "Up", "Unresponsive",
155 daemon_state_t state
;
157 struct timeval echo_sent
;
158 unsigned int connect_tries
;
159 struct thread
*t_wakeup
;
160 struct thread
*t_read
;
161 struct thread
*t_write
;
163 struct restart_info restart
;
166 * For a given daemon, if we've turned on ignore timeouts
167 * ignore the timeout value and assume everything is ok
168 * This is for daemon debugging w/ gdb after we have started
169 * FRR and realize we have something that needs to be looked
175 #define OPTION_MINRESTART 2000
176 #define OPTION_MAXRESTART 2001
177 #define OPTION_DRY 2002
178 #define OPTION_NETNS 2003
180 static const struct option longopts
[] = {
181 {"daemon", no_argument
, NULL
, 'd'},
182 {"statedir", required_argument
, NULL
, 'S'},
183 {"loglevel", required_argument
, NULL
, 'l'},
184 {"interval", required_argument
, NULL
, 'i'},
185 {"timeout", required_argument
, NULL
, 't'},
186 {"restart-timeout", required_argument
, NULL
, 'T'},
187 {"restart", required_argument
, NULL
, 'r'},
188 {"start-command", required_argument
, NULL
, 's'},
189 {"kill-command", required_argument
, NULL
, 'k'},
190 {"dry", no_argument
, NULL
, OPTION_DRY
},
191 {"min-restart-interval", required_argument
, NULL
, OPTION_MINRESTART
},
192 {"max-restart-interval", required_argument
, NULL
, OPTION_MAXRESTART
},
193 {"pid-file", required_argument
, NULL
, 'p'},
194 {"blank-string", required_argument
, NULL
, 'b'},
196 {"netns", optional_argument
, NULL
, OPTION_NETNS
},
198 {"help", no_argument
, NULL
, 'h'},
199 {"version", no_argument
, NULL
, 'v'},
202 static int try_connect(struct daemon
*dmn
);
203 static int wakeup_send_echo(struct thread
*t_wakeup
);
204 static void try_restart(struct daemon
*dmn
);
205 static void phase_check(void);
206 static void restart_done(struct daemon
*dmn
);
208 static const char *progname
;
210 void watchfrr_set_ignore_daemon(struct vty
*vty
, const char *dname
, bool ignore
)
214 for (dmn
= gs
.daemons
; dmn
; dmn
= dmn
->next
) {
215 if (strncmp(dmn
->name
, dname
, strlen(dmn
->name
)) == 0)
220 dmn
->ignore_timeout
= ignore
;
221 vty_out(vty
, "%s switching to %s\n", dmn
->name
,
222 ignore
? "ignore" : "watch");
224 vty_out(vty
, "%s is not configured for running at the moment",
228 static void printhelp(FILE *target
)
231 "Usage : %s [OPTION...] <daemon name> ...\n\n\
232 Watchdog program to monitor status of frr daemons and try to restart\n\
233 them if they are down or unresponsive. It determines whether a daemon is\n\
234 up based on whether it can connect to the daemon's vty unix stream socket.\n\
235 It then repeatedly sends echo commands over that socket to determine whether\n\
236 the daemon is responsive. If the daemon crashes, we will receive an EOF\n\
237 on the socket connection and know immediately that the daemon is down.\n\n\
238 The daemons to be monitored should be listed on the command line.\n\n\
239 In order to avoid attempting to restart the daemons in a fast loop,\n\
240 the -m and -M options allow you to control the minimum delay between\n\
241 restart commands. The minimum restart delay is recalculated each time\n\
242 a restart is attempted: if the time since the last restart attempt exceeds\n\
243 twice the -M value, then the restart delay is set to the -m value.\n\
244 Otherwise, the interval is doubled (but capped at the -M value).\n\n",
249 -d, --daemon Run in daemon mode. In this mode, error messages are sent\n\
250 to syslog instead of stdout.\n\
251 -S, --statedir Set the vty socket directory (default is %s)\n\
252 -N, --pathspace Insert prefix into config & socket paths\n"
254 " --netns Create and/or use Linux network namespace. If no name is\n"
255 " given, uses the value from `-N`.\n"
257 "-l, --loglevel Set the logging level (default is %d).\n\
258 The value should range from %d (LOG_EMERG) to %d (LOG_DEBUG),\n\
259 but it can be set higher than %d if extra-verbose debugging\n\
260 messages are desired.\n\
261 --min-restart-interval\n\
262 Set the minimum seconds to wait between invocations of daemon\n\
263 restart commands (default is %d).\n\
264 --max-restart-interval\n\
265 Set the maximum seconds to wait between invocations of daemon\n\
266 restart commands (default is %d).\n\
267 -i, --interval Set the status polling interval in seconds (default is %d)\n\
268 -t, --timeout Set the unresponsiveness timeout in seconds (default is %d)\n\
269 -T, --restart-timeout\n\
270 Set the restart (kill) timeout in seconds (default is %d).\n\
271 If any background jobs are still running after this much\n\
272 time has elapsed, they will be killed.\n\
273 -r, --restart Supply a Bourne shell command to use to restart a single\n\
274 daemon. The command string should include '%%s' where the\n\
275 name of the daemon should be substituted.\n\
277 -s, --start-command\n\
278 Supply a Bourne shell to command to use to start a single\n\
279 daemon. The command string should include '%%s' where the\n\
280 name of the daemon should be substituted.\n\
282 -k, --kill-command\n\
283 Supply a Bourne shell to command to use to stop a single\n\
284 daemon. The command string should include '%%s' where the\n\
285 name of the daemon should be substituted.\n\
287 --dry Do not start or restart anything, just log.\n\
288 -p, --pid-file Set process identifier file name\n\
289 (default is %s/watchfrr.pid).\n\
290 -b, --blank-string\n\
291 When the supplied argument string is found in any of the\n\
292 various shell command arguments (-r, -s, or -k), replace\n\
293 it with a space. This is an ugly hack to circumvent problems\n\
294 passing command-line arguments with embedded spaces.\n\
295 -v, --version Print program version\n\
296 -h, --help Display this help and exit\n",
297 frr_vtydir
, DEFAULT_LOGLEVEL
, LOG_EMERG
, LOG_DEBUG
, LOG_DEBUG
,
298 DEFAULT_MIN_RESTART
, DEFAULT_MAX_RESTART
, DEFAULT_PERIOD
,
299 DEFAULT_TIMEOUT
, DEFAULT_RESTART_TIMEOUT
,
300 DEFAULT_RESTART_CMD
, DEFAULT_START_CMD
, DEFAULT_STOP_CMD
,
304 static pid_t
run_background(char *shell_cmd
)
308 switch (child
= fork()) {
310 flog_err_sys(EC_LIB_SYSTEM_CALL
,
311 "fork failed, cannot run command [%s]: %s",
312 shell_cmd
, safe_strerror(errno
));
316 /* Use separate process group so child processes can be killed
318 if (setpgid(0, 0) < 0)
319 zlog_warn("warning: setpgid(0,0) failed: %s",
320 safe_strerror(errno
));
324 char *const argv
[4] = {shell
, dashc
, shell_cmd
, NULL
};
325 execv("/bin/sh", argv
);
326 flog_err_sys(EC_LIB_SYSTEM_CALL
,
327 "execv(/bin/sh -c '%s') failed: %s",
328 shell_cmd
, safe_strerror(errno
));
332 /* Parent process: we will reap the child later. */
333 zlog_info("Forked background command [pid %d]: %s", (int)child
,
339 static struct timeval
*time_elapsed(struct timeval
*result
,
340 const struct timeval
*start_time
)
342 gettimeofday(result
, NULL
);
343 result
->tv_sec
-= start_time
->tv_sec
;
344 result
->tv_usec
-= start_time
->tv_usec
;
345 while (result
->tv_usec
< 0) {
346 result
->tv_usec
+= 1000000L;
352 static int restart_kill(struct thread
*t_kill
)
354 struct restart_info
*restart
= THREAD_ARG(t_kill
);
355 struct timeval delay
;
357 time_elapsed(&delay
, &restart
->time
);
359 "Warning: %s %s child process %d still running after %ld seconds, sending signal %d",
360 restart
->what
, restart
->name
, (int)restart
->pid
,
361 (long)delay
.tv_sec
, (restart
->kills
? SIGKILL
: SIGTERM
));
362 kill(-restart
->pid
, (restart
->kills
? SIGKILL
: SIGTERM
));
364 restart
->t_kill
= NULL
;
365 thread_add_timer(master
, restart_kill
, restart
, gs
.restart_timeout
,
370 static struct restart_info
*find_child(pid_t child
)
373 if (gs
.restart
.pid
== child
)
376 for (dmn
= gs
.daemons
; dmn
; dmn
= dmn
->next
) {
377 if (dmn
->restart
.pid
== child
)
378 return &dmn
->restart
;
383 static void sigchild(void)
389 struct restart_info
*restart
;
392 switch (child
= waitpid(-1, &status
, WNOHANG
)) {
394 flog_err_sys(EC_LIB_SYSTEM_CALL
, "waitpid failed: %s",
395 safe_strerror(errno
));
398 zlog_warn("SIGCHLD received, but waitpid did not reap a child");
402 if (child
== integrated_write_pid
) {
403 integrated_write_sigchld(status
);
407 if ((restart
= find_child(child
)) != NULL
) {
408 name
= restart
->name
;
409 what
= restart
->what
;
412 thread_cancel(restart
->t_kill
);
413 restart
->t_kill
= NULL
;
414 /* Update restart time to reflect the time the command
416 gettimeofday(&restart
->time
, NULL
);
420 "waitpid returned status for an unknown child process %d",
425 if (WIFSTOPPED(status
))
426 zlog_warn("warning: %s %s process %d is stopped", what
, name
,
428 else if (WIFSIGNALED(status
))
429 zlog_warn("%s %s process %d terminated due to signal %d", what
,
430 name
, (int)child
, WTERMSIG(status
));
431 else if (WIFEXITED(status
)) {
432 if (WEXITSTATUS(status
) != 0)
434 "%s %s process %d exited with non-zero status %d",
435 what
, name
, (int)child
, WEXITSTATUS(status
));
437 zlog_debug("%s %s process %d exited normally", what
,
440 if (restart
&& restart
!= &gs
.restart
) {
441 dmn
= container_of(restart
, struct daemon
,
445 for (dmn
= gs
.daemons
; dmn
; dmn
= dmn
->next
)
451 "cannot interpret %s %s process %d wait status 0x%x",
452 what
, name
, (int)child
, status
);
456 static int run_job(struct restart_info
*restart
, const char *cmdtype
,
457 const char *command
, int force
, int update_interval
)
459 struct timeval delay
;
461 if (gs
.loglevel
> LOG_DEBUG
+ 1)
462 zlog_debug("attempting to %s %s", cmdtype
, restart
->name
);
465 if (gs
.loglevel
> LOG_DEBUG
+ 1)
467 "cannot %s %s, previous pid %d still running",
468 cmdtype
, restart
->name
, (int)restart
->pid
);
472 #if defined HAVE_SYSTEMD
475 snprintf(buffer
, sizeof(buffer
), "restarting %s", restart
->name
);
476 systemd_send_status(buffer
);
479 /* Note: time_elapsed test must come before the force test, since we
481 to make sure that delay is initialized for use below in updating the
483 if ((time_elapsed(&delay
, &restart
->time
)->tv_sec
< restart
->interval
)
486 if (gs
.loglevel
> LOG_DEBUG
+ 1)
488 "postponing %s %s: elapsed time %ld < retry interval %ld",
489 cmdtype
, restart
->name
, (long)delay
.tv_sec
,
494 gettimeofday(&restart
->time
, NULL
);
497 char cmd
[strlen(command
) + strlen(restart
->name
) + 1];
498 snprintf(cmd
, sizeof(cmd
), command
, restart
->name
);
499 if ((restart
->pid
= run_background(cmd
)) > 0) {
500 restart
->t_kill
= NULL
;
501 thread_add_timer(master
, restart_kill
, restart
,
502 gs
.restart_timeout
, &restart
->t_kill
);
503 restart
->what
= cmdtype
;
509 #if defined HAVE_SYSTEMD
510 systemd_send_status("FRR Operational");
512 /* Calculate the new restart interval. */
513 if (update_interval
) {
514 if (delay
.tv_sec
> 2 * gs
.max_restart_interval
)
515 restart
->interval
= gs
.min_restart_interval
;
516 else if ((restart
->interval
*= 2) > gs
.max_restart_interval
)
517 restart
->interval
= gs
.max_restart_interval
;
518 if (gs
.loglevel
> LOG_DEBUG
+ 1)
519 zlog_debug("restart %s interval is now %ld",
520 restart
->name
, restart
->interval
);
525 #define SET_READ_HANDLER(DMN) \
527 (DMN)->t_read = NULL; \
528 thread_add_read(master, handle_read, (DMN), (DMN)->fd, \
532 #define SET_WAKEUP_DOWN(DMN) \
534 (DMN)->t_wakeup = NULL; \
535 thread_add_timer_msec(master, wakeup_down, (DMN), \
536 FUZZY(gs.period), &(DMN)->t_wakeup); \
539 #define SET_WAKEUP_UNRESPONSIVE(DMN) \
541 (DMN)->t_wakeup = NULL; \
542 thread_add_timer_msec(master, wakeup_unresponsive, (DMN), \
543 FUZZY(gs.period), &(DMN)->t_wakeup); \
546 #define SET_WAKEUP_ECHO(DMN) \
548 (DMN)->t_wakeup = NULL; \
549 thread_add_timer_msec(master, wakeup_send_echo, (DMN), \
550 FUZZY(gs.period), &(DMN)->t_wakeup); \
553 static int wakeup_down(struct thread
*t_wakeup
)
555 struct daemon
*dmn
= THREAD_ARG(t_wakeup
);
557 dmn
->t_wakeup
= NULL
;
558 if (try_connect(dmn
) < 0)
559 SET_WAKEUP_DOWN(dmn
);
560 if ((dmn
->connect_tries
> 1) && (dmn
->state
!= DAEMON_UP
))
565 static int wakeup_init(struct thread
*t_wakeup
)
567 struct daemon
*dmn
= THREAD_ARG(t_wakeup
);
569 dmn
->t_wakeup
= NULL
;
570 if (try_connect(dmn
) < 0) {
572 "%s state -> down : initial connection attempt failed",
574 dmn
->state
= DAEMON_DOWN
;
580 static void restart_done(struct daemon
*dmn
)
582 if (dmn
->state
!= DAEMON_DOWN
) {
584 "Daemon: %s: is in %s state but expected it to be in DAEMON_DOWN state",
585 dmn
->name
, state_str
[dmn
->state
]);
588 THREAD_OFF(dmn
->t_wakeup
);
589 if (try_connect(dmn
) < 0)
590 SET_WAKEUP_DOWN(dmn
);
593 static void daemon_down(struct daemon
*dmn
, const char *why
)
595 if (IS_UP(dmn
) || (dmn
->state
== DAEMON_INIT
))
596 flog_err(EC_WATCHFRR_CONNECTION
, "%s state -> down : %s",
598 else if (gs
.loglevel
> LOG_DEBUG
)
599 zlog_debug("%s still down : %s", dmn
->name
, why
);
602 dmn
->state
= DAEMON_DOWN
;
607 THREAD_OFF(dmn
->t_read
);
608 THREAD_OFF(dmn
->t_write
);
609 THREAD_OFF(dmn
->t_wakeup
);
610 if (try_connect(dmn
) < 0)
611 SET_WAKEUP_DOWN(dmn
);
615 static int handle_read(struct thread
*t_read
)
617 struct daemon
*dmn
= THREAD_ARG(t_read
);
618 static const char resp
[sizeof(PING_TOKEN
) + 4] = PING_TOKEN
"\n";
619 char buf
[sizeof(resp
) + 100];
621 struct timeval delay
;
624 if ((rc
= read(dmn
->fd
, buf
, sizeof(buf
))) < 0) {
627 if (ERRNO_IO_RETRY(errno
)) {
628 /* Pretend it never happened. */
629 SET_READ_HANDLER(dmn
);
632 snprintf(why
, sizeof(why
), "unexpected read error: %s",
633 safe_strerror(errno
));
634 daemon_down(dmn
, why
);
638 daemon_down(dmn
, "read returned EOF");
641 if (!dmn
->echo_sent
.tv_sec
) {
642 char why
[sizeof(buf
) + 100];
643 snprintf(why
, sizeof(why
),
644 "unexpected read returns %d bytes: %.*s", (int)rc
,
646 daemon_down(dmn
, why
);
650 /* We are expecting an echo response: is there any chance that the
651 response would not be returned entirely in the first read? That
652 seems inconceivable... */
653 if ((rc
!= sizeof(resp
)) || memcmp(buf
, resp
, sizeof(resp
))) {
654 char why
[100 + sizeof(buf
)];
655 snprintf(why
, sizeof(why
),
656 "read returned bad echo response of %d bytes (expecting %u): %.*s",
657 (int)rc
, (unsigned int)sizeof(resp
), (int)rc
, buf
);
658 daemon_down(dmn
, why
);
662 time_elapsed(&delay
, &dmn
->echo_sent
);
663 dmn
->echo_sent
.tv_sec
= 0;
664 if (dmn
->state
== DAEMON_UNRESPONSIVE
) {
665 if (delay
.tv_sec
< gs
.timeout
) {
666 dmn
->state
= DAEMON_UP
;
668 "%s state -> up : echo response received after %ld.%06ld seconds",
669 dmn
->name
, (long)delay
.tv_sec
,
670 (long)delay
.tv_usec
);
673 "%s: slow echo response finally received after %ld.%06ld seconds",
674 dmn
->name
, (long)delay
.tv_sec
,
675 (long)delay
.tv_usec
);
676 } else if (gs
.loglevel
> LOG_DEBUG
+ 1)
677 zlog_debug("%s: echo response received after %ld.%06ld seconds",
678 dmn
->name
, (long)delay
.tv_sec
, (long)delay
.tv_usec
);
680 SET_READ_HANDLER(dmn
);
682 thread_cancel(dmn
->t_wakeup
);
683 SET_WAKEUP_ECHO(dmn
);
689 * Wait till we notice that all daemons are ready before
690 * we send we are ready to systemd
692 static void daemon_send_ready(int exitcode
)
702 zlog_notice("all daemons up, doing startup-complete notify");
703 else if (gs
.numdown
< gs
.numdaemons
)
704 flog_err(EC_WATCHFRR_CONNECTION
,
705 "startup did not complete within timeout (%d/%d daemons running)",
706 gs
.numdaemons
- gs
.numdown
, gs
.numdaemons
);
708 flog_err(EC_WATCHFRR_CONNECTION
,
709 "all configured daemons failed to start -- exiting watchfrr");
716 snprintf(started
, sizeof(started
), "%s/%s", frr_vtydir
,
718 fp
= fopen(started
, "w");
721 #if defined HAVE_SYSTEMD
722 systemd_send_started(master
, 0);
723 systemd_send_status("FRR Operational");
728 static void daemon_up(struct daemon
*dmn
, const char *why
)
730 dmn
->state
= DAEMON_UP
;
732 dmn
->connect_tries
= 0;
733 zlog_notice("%s state -> up : %s", dmn
->name
, why
);
735 daemon_send_ready(0);
736 SET_WAKEUP_ECHO(dmn
);
740 static int check_connect(struct thread
*t_write
)
742 struct daemon
*dmn
= THREAD_ARG(t_write
);
744 socklen_t reslen
= sizeof(sockerr
);
747 if (getsockopt(dmn
->fd
, SOL_SOCKET
, SO_ERROR
, (char *)&sockerr
, &reslen
)
749 zlog_warn("%s: check_connect: getsockopt failed: %s", dmn
->name
,
750 safe_strerror(errno
));
752 "getsockopt failed checking connection success");
755 if ((reslen
== sizeof(sockerr
)) && sockerr
) {
759 "getsockopt reports that connection attempt failed: %s",
760 safe_strerror(sockerr
));
761 daemon_down(dmn
, why
);
765 daemon_up(dmn
, "delayed connect succeeded");
769 static int wakeup_connect_hanging(struct thread
*t_wakeup
)
771 struct daemon
*dmn
= THREAD_ARG(t_wakeup
);
774 dmn
->t_wakeup
= NULL
;
775 snprintf(why
, sizeof(why
),
776 "connection attempt timed out after %ld seconds", gs
.timeout
);
777 daemon_down(dmn
, why
);
781 /* Making connection to protocol daemon. */
782 static int try_connect(struct daemon
*dmn
)
785 struct sockaddr_un addr
;
788 if (gs
.loglevel
> LOG_DEBUG
+ 1)
789 zlog_debug("%s: attempting to connect", dmn
->name
);
790 dmn
->connect_tries
++;
792 memset(&addr
, 0, sizeof(struct sockaddr_un
));
793 addr
.sun_family
= AF_UNIX
;
794 snprintf(addr
.sun_path
, sizeof(addr
.sun_path
), "%s/%s.vty", gs
.vtydir
,
796 #ifdef HAVE_STRUCT_SOCKADDR_UN_SUN_LEN
797 len
= addr
.sun_len
= SUN_LEN(&addr
);
799 len
= sizeof(addr
.sun_family
) + strlen(addr
.sun_path
);
800 #endif /* HAVE_STRUCT_SOCKADDR_UN_SUN_LEN */
802 /* Quick check to see if we might succeed before we go to the trouble
803 of creating a socket. */
804 if (access(addr
.sun_path
, W_OK
) < 0) {
806 flog_err_sys(EC_LIB_SYSTEM_CALL
,
807 "%s: access to socket %s denied: %s",
808 dmn
->name
, addr
.sun_path
,
809 safe_strerror(errno
));
813 if ((sock
= socket(AF_UNIX
, SOCK_STREAM
, 0)) < 0) {
814 flog_err_sys(EC_LIB_SOCKET
, "%s(%s): cannot make socket: %s",
815 __func__
, addr
.sun_path
, safe_strerror(errno
));
819 if (set_nonblocking(sock
) < 0 || set_cloexec(sock
) < 0) {
820 flog_err_sys(EC_LIB_SYSTEM_CALL
,
821 "%s(%s): set_nonblocking/cloexec(%d) failed",
822 __func__
, addr
.sun_path
, sock
);
827 if (connect(sock
, (struct sockaddr
*)&addr
, len
) < 0) {
828 if ((errno
!= EINPROGRESS
) && (errno
!= EWOULDBLOCK
)) {
829 if (gs
.loglevel
> LOG_DEBUG
)
830 zlog_debug("%s(%s): connect failed: %s",
831 __func__
, addr
.sun_path
,
832 safe_strerror(errno
));
836 if (gs
.loglevel
> LOG_DEBUG
)
837 zlog_debug("%s: connection in progress", dmn
->name
);
838 dmn
->state
= DAEMON_CONNECTING
;
841 thread_add_write(master
, check_connect
, dmn
, dmn
->fd
,
843 dmn
->t_wakeup
= NULL
;
844 thread_add_timer(master
, wakeup_connect_hanging
, dmn
,
845 gs
.timeout
, &dmn
->t_wakeup
);
846 SET_READ_HANDLER(dmn
);
851 SET_READ_HANDLER(dmn
);
852 daemon_up(dmn
, "connect succeeded");
856 static int phase_hanging(struct thread
*t_hanging
)
858 gs
.t_phase_hanging
= NULL
;
859 flog_err(EC_WATCHFRR_CONNECTION
,
860 "Phase [%s] hanging for %ld seconds, aborting phased restart",
861 phase_str
[gs
.phase
], PHASE_TIMEOUT
);
862 gs
.phase
= PHASE_NONE
;
866 static void set_phase(restart_phase_t new_phase
)
868 gs
.phase
= new_phase
;
869 if (gs
.t_phase_hanging
)
870 thread_cancel(gs
.t_phase_hanging
);
871 gs
.t_phase_hanging
= NULL
;
872 thread_add_timer(master
, phase_hanging
, NULL
, PHASE_TIMEOUT
,
873 &gs
.t_phase_hanging
);
876 static void phase_check(void)
885 for (dmn
= gs
.daemons
; dmn
; dmn
= dmn
->next
)
886 if (dmn
->state
== DAEMON_INIT
)
889 /* startup complete, everything out of INIT */
890 gs
.phase
= PHASE_NONE
;
891 for (dmn
= gs
.daemons
; dmn
; dmn
= dmn
->next
)
892 if (dmn
->state
== DAEMON_DOWN
) {
893 SET_WAKEUP_DOWN(dmn
);
897 case PHASE_STOPS_PENDING
:
901 "Phased restart: all routing daemon stop jobs have completed.");
902 set_phase(PHASE_WAITING_DOWN
);
905 case PHASE_WAITING_DOWN
:
906 if (gs
.numdown
+ IS_UP(gs
.special
) < gs
.numdaemons
)
908 zlog_info("Phased restart: all routing daemons now down.");
909 run_job(&gs
.special
->restart
, "restart", gs
.restart_command
, 1,
911 set_phase(PHASE_ZEBRA_RESTART_PENDING
);
914 case PHASE_ZEBRA_RESTART_PENDING
:
915 if (gs
.special
->restart
.pid
)
917 zlog_info("Phased restart: %s restart job completed.",
919 set_phase(PHASE_WAITING_ZEBRA_UP
);
922 case PHASE_WAITING_ZEBRA_UP
:
923 if (!IS_UP(gs
.special
))
925 zlog_info("Phased restart: %s is now up.", gs
.special
->name
);
928 for (dmn
= gs
.daemons
; dmn
; dmn
= dmn
->next
) {
929 if (dmn
!= gs
.special
)
930 run_job(&dmn
->restart
, "start",
931 gs
.start_command
, 1, 0);
934 gs
.phase
= PHASE_NONE
;
935 THREAD_OFF(gs
.t_phase_hanging
);
936 zlog_notice("Phased global restart has completed.");
941 static void try_restart(struct daemon
*dmn
)
946 if (dmn
!= gs
.special
) {
947 if ((gs
.special
->state
== DAEMON_UP
)
948 && (gs
.phase
== PHASE_NONE
))
949 run_job(&dmn
->restart
, "restart", gs
.restart_command
, 0,
953 "%s: postponing restart attempt because master %s daemon not up [%s], or phased restart in progress",
954 dmn
->name
, gs
.special
->name
,
955 state_str
[gs
.special
->state
]);
959 if ((gs
.phase
!= PHASE_NONE
) || gs
.numpids
) {
960 if (gs
.loglevel
> LOG_DEBUG
+ 1)
962 "postponing phased global restart: restart already in progress [%s], or outstanding child processes [%d]",
963 phase_str
[gs
.phase
], gs
.numpids
);
966 /* Is it too soon for a restart? */
968 struct timeval delay
;
969 if (time_elapsed(&delay
, &gs
.special
->restart
.time
)->tv_sec
970 < gs
.special
->restart
.interval
) {
971 if (gs
.loglevel
> LOG_DEBUG
+ 1)
973 "postponing phased global restart: elapsed time %ld < retry interval %ld",
975 gs
.special
->restart
.interval
);
979 run_job(&gs
.restart
, "restart", gs
.restart_command
, 0, 1);
982 static int wakeup_unresponsive(struct thread
*t_wakeup
)
984 struct daemon
*dmn
= THREAD_ARG(t_wakeup
);
986 dmn
->t_wakeup
= NULL
;
987 if (dmn
->state
!= DAEMON_UNRESPONSIVE
)
988 flog_err(EC_WATCHFRR_CONNECTION
,
989 "%s: no longer unresponsive (now %s), wakeup should have been cancelled!",
990 dmn
->name
, state_str
[dmn
->state
]);
992 SET_WAKEUP_UNRESPONSIVE(dmn
);
998 static int wakeup_no_answer(struct thread
*t_wakeup
)
1000 struct daemon
*dmn
= THREAD_ARG(t_wakeup
);
1002 dmn
->t_wakeup
= NULL
;
1003 dmn
->state
= DAEMON_UNRESPONSIVE
;
1004 if (dmn
->ignore_timeout
)
1006 flog_err(EC_WATCHFRR_CONNECTION
,
1007 "%s state -> unresponsive : no response yet to ping sent %ld seconds ago",
1008 dmn
->name
, gs
.timeout
);
1009 SET_WAKEUP_UNRESPONSIVE(dmn
);
1014 static int wakeup_send_echo(struct thread
*t_wakeup
)
1016 static const char echocmd
[] = "echo " PING_TOKEN
;
1018 struct daemon
*dmn
= THREAD_ARG(t_wakeup
);
1020 dmn
->t_wakeup
= NULL
;
1021 if (((rc
= write(dmn
->fd
, echocmd
, sizeof(echocmd
))) < 0)
1022 || ((size_t)rc
!= sizeof(echocmd
))) {
1023 char why
[100 + sizeof(echocmd
)];
1024 snprintf(why
, sizeof(why
),
1025 "write '%s' returned %d instead of %u", echocmd
,
1026 (int)rc
, (unsigned int)sizeof(echocmd
));
1027 daemon_down(dmn
, why
);
1029 gettimeofday(&dmn
->echo_sent
, NULL
);
1030 dmn
->t_wakeup
= NULL
;
1031 thread_add_timer(master
, wakeup_no_answer
, dmn
, gs
.timeout
,
1037 bool check_all_up(void)
1041 for (dmn
= gs
.daemons
; dmn
; dmn
= dmn
->next
)
1042 if (dmn
->state
!= DAEMON_UP
)
1047 void watchfrr_status(struct vty
*vty
)
1050 struct timeval delay
;
1052 vty_out(vty
, "watchfrr global phase: %s\n", phase_str
[gs
.phase
]);
1054 vty_out(vty
, " global restart running, pid %ld\n",
1055 (long)gs
.restart
.pid
);
1057 for (dmn
= gs
.daemons
; dmn
; dmn
= dmn
->next
) {
1058 vty_out(vty
, " %-20s %s%s", dmn
->name
, state_str
[dmn
->state
],
1059 dmn
->ignore_timeout
? "/Ignoring Timeout\n" : "\n");
1060 if (dmn
->restart
.pid
)
1061 vty_out(vty
, " restart running, pid %ld\n",
1062 (long)dmn
->restart
.pid
);
1063 else if (dmn
->state
== DAEMON_DOWN
&&
1064 time_elapsed(&delay
, &dmn
->restart
.time
)->tv_sec
1065 < dmn
->restart
.interval
)
1066 vty_out(vty
, " restarting in %jd seconds (%jds backoff interval)\n",
1067 (intmax_t)dmn
->restart
.interval
1068 - (intmax_t)delay
.tv_sec
,
1069 (intmax_t)dmn
->restart
.interval
);
1073 static void sigint(void)
1075 zlog_notice("Terminating on signal");
1076 systemd_send_stopping();
1080 static int valid_command(const char *cmd
)
1084 return ((p
= strchr(cmd
, '%')) != NULL
) && (*(p
+ 1) == 's')
1085 && !strchr(p
+ 1, '%');
1088 /* This is an ugly hack to circumvent problems with passing command-line
1089 arguments that contain spaces. The fix is to use a configuration file. */
1090 static char *translate_blanks(const char *cmd
, const char *blankstr
)
1094 size_t bslen
= strlen(blankstr
);
1096 if (!(res
= strdup(cmd
))) {
1100 while ((p
= strstr(res
, blankstr
)) != NULL
) {
1103 memmove(p
+ 1, p
+ bslen
, strlen(p
+ bslen
) + 1);
1108 static int startup_timeout(struct thread
*t_wakeup
)
1110 daemon_send_ready(1);
1116 #include <sys/mount.h>
1119 #define NETNS_RUN_DIR "/var/run/netns"
1121 static void netns_create(int dirfd
, const char *nsname
)
1123 /* make /var/run/netns shared between mount namespaces
1124 * just like iproute2 sets it up
1126 if (mount("", NETNS_RUN_DIR
, "none", MS_SHARED
| MS_REC
, NULL
)) {
1127 if (errno
!= EINVAL
) {
1132 if (mount(NETNS_RUN_DIR
, NETNS_RUN_DIR
, "none",
1133 MS_BIND
| MS_REC
, NULL
)) {
1138 if (mount("", NETNS_RUN_DIR
, "none", MS_SHARED
| MS_REC
,
1145 /* need an empty file to mount on top of */
1146 int nsfd
= openat(dirfd
, nsname
, O_CREAT
| O_RDONLY
| O_EXCL
, 0);
1149 fprintf(stderr
, "failed to create \"%s/%s\": %s\n",
1150 NETNS_RUN_DIR
, nsname
, strerror(errno
));
1155 if (unshare(CLONE_NEWNET
)) {
1157 unlinkat(dirfd
, nsname
, 0);
1161 char *dstpath
= asprintfrr(MTYPE_TMP
, "%s/%s", NETNS_RUN_DIR
, nsname
);
1163 /* bind-mount so the namespace has a name and is persistent */
1164 if (mount("/proc/self/ns/net", dstpath
, "none", MS_BIND
, NULL
) < 0) {
1165 fprintf(stderr
, "failed to bind-mount netns to \"%s\": %s\n",
1166 dstpath
, strerror(errno
));
1167 unlinkat(dirfd
, nsname
, 0);
1171 XFREE(MTYPE_TMP
, dstpath
);
1174 static void netns_setup(const char *nsname
)
1178 dirfd
= open(NETNS_RUN_DIR
, O_DIRECTORY
| O_RDONLY
);
1180 if (errno
== ENOTDIR
) {
1181 fprintf(stderr
, "error: \"%s\" is not a directory!\n",
1184 } else if (errno
== ENOENT
) {
1185 if (mkdir(NETNS_RUN_DIR
, 0755)) {
1186 fprintf(stderr
, "error: \"%s\": mkdir: %s\n",
1187 NETNS_RUN_DIR
, strerror(errno
));
1190 dirfd
= open(NETNS_RUN_DIR
, O_DIRECTORY
| O_RDONLY
);
1192 fprintf(stderr
, "error: \"%s\": opendir: %s\n",
1193 NETNS_RUN_DIR
, strerror(errno
));
1197 fprintf(stderr
, "error: \"%s\": %s\n",
1198 NETNS_RUN_DIR
, strerror(errno
));
1203 nsfd
= openat(dirfd
, nsname
, O_RDONLY
);
1204 if (nsfd
< 0 && errno
!= ENOENT
) {
1205 fprintf(stderr
, "error: \"%s/%s\": %s\n",
1206 NETNS_RUN_DIR
, nsname
, strerror(errno
));
1210 netns_create(dirfd
, nsname
);
1212 if (setns(nsfd
, CLONE_NEWNET
)) {
1220 /* make sure loopback is up... weird things happen otherwise.
1221 * ioctl is perfectly fine for this, don't need netlink...
1224 struct ifreq ifr
= { };
1226 strlcpy(ifr
.ifr_name
, "lo", sizeof(ifr
.ifr_name
));
1228 sockfd
= socket(AF_INET
, SOCK_DGRAM
, 0);
1233 if (ioctl(sockfd
, SIOCGIFFLAGS
, &ifr
)) {
1234 perror("ioctl(SIOCGIFFLAGS, \"lo\")");
1237 if (!(ifr
.ifr_flags
& IFF_UP
)) {
1238 ifr
.ifr_flags
|= IFF_UP
;
1239 if (ioctl(sockfd
, SIOCSIFFLAGS
, &ifr
)) {
1240 perror("ioctl(SIOCSIFFLAGS, \"lo\")");
1247 #else /* !GNU_LINUX */
1249 static void netns_setup(const char *nsname
)
1251 fprintf(stderr
, "network namespaces are only available on Linux\n");
1256 static void watchfrr_init(int argc
, char **argv
)
1258 const char *special
= "zebra";
1260 struct daemon
*dmn
, **add
= &gs
.daemons
;
1261 char alldaemons
[512] = "", *p
= alldaemons
;
1263 thread_add_timer_msec(master
, startup_timeout
, NULL
, STARTUP_TIMEOUT
,
1264 &gs
.t_startup_timeout
);
1266 for (i
= optind
; i
< argc
; i
++) {
1267 dmn
= XCALLOC(MTYPE_WATCHFRR_DAEMON
, sizeof(*dmn
));
1269 dmn
->name
= dmn
->restart
.name
= argv
[i
];
1270 dmn
->state
= DAEMON_INIT
;
1274 dmn
->t_wakeup
= NULL
;
1275 thread_add_timer_msec(master
, wakeup_init
, dmn
, 0,
1277 dmn
->restart
.interval
= gs
.min_restart_interval
;
1281 if (!strcmp(dmn
->name
, special
))
1287 "Must specify one or more daemons to monitor.\n\n");
1290 if (!watch_only
&& !gs
.special
) {
1291 fprintf(stderr
, "\"%s\" daemon must be in daemon lists\n\n",
1296 for (dmn
= gs
.daemons
; dmn
; dmn
= dmn
->next
) {
1297 snprintf(p
, alldaemons
+ sizeof(alldaemons
) - p
, "%s%s",
1298 (p
== alldaemons
) ? "" : " ", dmn
->name
);
1301 zlog_notice("%s %s watching [%s]%s", progname
, FRR_VERSION
, alldaemons
,
1302 watch_only
? ", monitor mode" : "");
1305 struct zebra_privs_t watchfrr_privs
= {
1307 .vty_group
= VTY_GROUP
,
1311 static struct quagga_signal_t watchfrr_signals
[] = {
1322 .handler
= sigchild
,
1326 FRR_DAEMON_INFO(watchfrr
, WATCHFRR
,
1327 .flags
= FRR_NO_PRIVSEP
| FRR_NO_TCPVTY
| FRR_LIMITED_CLI
1328 | FRR_NO_CFG_PID_DRY
| FRR_NO_ZCLIENT
1331 .printhelp
= printhelp
,
1332 .copyright
= "Copyright 2004 Andrew J. Schorr",
1334 .signals
= watchfrr_signals
,
1335 .n_signals
= array_size(watchfrr_signals
),
1337 .privs
= &watchfrr_privs
, )
1339 #define DEPRECATED_OPTIONS "aAezR:"
1341 int main(int argc
, char **argv
)
1344 const char *blankstr
= NULL
;
1345 const char *netns
= NULL
;
1346 bool netns_en
= false;
1348 frr_preinit(&watchfrr_di
, argc
, argv
);
1349 progname
= watchfrr_di
.progname
;
1351 frr_opt_add("b:di:k:l:N:p:r:S:s:t:T:" DEPRECATED_OPTIONS
, longopts
, "");
1353 gs
.restart
.name
= "all";
1354 while ((opt
= frr_getopt(argc
, argv
, NULL
)) != EOF
) {
1355 if (opt
&& opt
< 128 && strchr(DEPRECATED_OPTIONS
, opt
)) {
1357 "The -%c option no longer exists.\n"
1358 "Please refer to the watchfrr(8) man page.\n",
1373 if (!valid_command(optarg
)) {
1375 "Invalid kill command, must contain '%%s': %s\n",
1379 gs
.stop_command
= optarg
;
1383 if ((sscanf(optarg
, "%d%1s", &gs
.loglevel
, garbage
)
1385 || (gs
.loglevel
< LOG_EMERG
)) {
1387 "Invalid loglevel argument: %s\n",
1392 case OPTION_MINRESTART
: {
1394 if ((sscanf(optarg
, "%ld%1s", &gs
.min_restart_interval
,
1397 || (gs
.min_restart_interval
< 0)) {
1399 "Invalid min_restart_interval argument: %s\n",
1404 case OPTION_MAXRESTART
: {
1406 if ((sscanf(optarg
, "%ld%1s", &gs
.max_restart_interval
,
1409 || (gs
.max_restart_interval
< 0)) {
1411 "Invalid max_restart_interval argument: %s\n",
1418 if (strchr(optarg
, '/')) {
1420 "invalid network namespace name \"%s\" (may not contain slashes)\n",
1429 if ((sscanf(optarg
, "%d%1s", &period
, garbage
) != 1)
1430 || (gs
.period
< 1)) {
1432 "Invalid interval argument: %s\n",
1436 gs
.period
= 1000 * period
;
1439 watchfrr_di
.pid_file
= optarg
;
1442 if (!valid_command(optarg
)) {
1444 "Invalid restart command, must contain '%%s': %s\n",
1448 gs
.restart_command
= optarg
;
1451 if (!valid_command(optarg
)) {
1453 "Invalid start command, must contain '%%s': %s\n",
1457 gs
.start_command
= optarg
;
1464 if ((sscanf(optarg
, "%ld%1s", &gs
.timeout
, garbage
)
1466 || (gs
.timeout
< 1)) {
1468 "Invalid timeout argument: %s\n",
1475 if ((sscanf(optarg
, "%ld%1s", &gs
.restart_timeout
,
1478 || (gs
.restart_timeout
< 1)) {
1480 "Invalid restart timeout argument: %s\n",
1486 fputs("Invalid option.\n", stderr
);
1492 && (gs
.start_command
|| gs
.stop_command
|| gs
.restart_command
)) {
1493 fputs("Options -r/-s/-k are not used when --dry is active.\n",
1497 && (!gs
.restart_command
|| !gs
.start_command
|| !gs
.stop_command
)) {
1499 "Options -s (start), -k (kill), and -r (restart) are required.\n");
1504 if (gs
.restart_command
)
1505 gs
.restart_command
=
1506 translate_blanks(gs
.restart_command
, blankstr
);
1507 if (gs
.start_command
)
1509 translate_blanks(gs
.start_command
, blankstr
);
1510 if (gs
.stop_command
)
1512 translate_blanks(gs
.stop_command
, blankstr
);
1515 gs
.restart
.interval
= gs
.min_restart_interval
;
1517 /* env variable for the processes that we start */
1518 if (watchfrr_di
.pathspace
)
1519 setenv("FRR_PATHSPACE", watchfrr_di
.pathspace
, 1);
1521 unsetenv("FRR_PATHSPACE");
1523 if (netns_en
&& !netns
)
1524 netns
= watchfrr_di
.pathspace
;
1525 if (netns_en
&& netns
&& netns
[0])
1528 master
= frr_init();
1529 watchfrr_error_init();
1530 watchfrr_init(argc
, argv
);
1531 watchfrr_vty_init();
1535 if (watchfrr_di
.daemon_mode
)
1536 zlog_syslog_set_prio_min(MIN(gs
.loglevel
, LOG_DEBUG
));
1538 zlog_aux_init(NULL
, MIN(gs
.loglevel
, LOG_DEBUG
));
1542 systemd_send_stopping();