+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Monitor status of frr daemons and restart if necessary.
*
* Copyright (C) 2004 Andrew J. Schorr
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; see the file COPYING; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <zebra.h>
#define DEFAULT_LOGLEVEL LOG_INFO
#define DEFAULT_MIN_RESTART 60
#define DEFAULT_MAX_RESTART 600
+#define DEFAULT_OPERATIONAL_TIMEOUT 60
#define DEFAULT_RESTART_CMD WATCHFRR_SH_PATH " restart %s"
#define DEFAULT_START_CMD WATCHFRR_SH_PATH " start %s"
enum restart_phase phase;
struct thread *t_phase_hanging;
struct thread *t_startup_timeout;
+ struct thread *t_operational;
const char *vtydir;
long period;
long timeout;
long restart_timeout;
+ bool reading_configuration;
long min_restart_interval;
long max_restart_interval;
+ long operational_timeout;
struct daemon *daemons;
const char *restart_command;
const char *start_command;
.loglevel = DEFAULT_LOGLEVEL,
.min_restart_interval = DEFAULT_MIN_RESTART,
.max_restart_interval = DEFAULT_MAX_RESTART,
+ .operational_timeout = DEFAULT_OPERATIONAL_TIMEOUT,
.restart_command = DEFAULT_RESTART_CMD,
.start_command = DEFAULT_START_CMD,
.stop_command = DEFAULT_STOP_CMD,
#define OPTION_MAXRESTART 2001
#define OPTION_DRY 2002
#define OPTION_NETNS 2003
+#define OPTION_MAXOPERATIONAL 2004
static const struct option longopts[] = {
{"daemon", no_argument, NULL, 'd'},
{"dry", no_argument, NULL, OPTION_DRY},
{"min-restart-interval", required_argument, NULL, OPTION_MINRESTART},
{"max-restart-interval", required_argument, NULL, OPTION_MAXRESTART},
+ {"operational-timeout", required_argument, NULL, OPTION_MAXOPERATIONAL},
{"pid-file", required_argument, NULL, 'p'},
{"blank-string", required_argument, NULL, 'b'},
#ifdef GNU_LINUX
--max-restart-interval\n\
Set the maximum seconds to wait between invocations of daemon\n\
restart commands (default is %d).\n\
+ --operational-timeout\n\
+ Set the time before systemd is notified that we are considered\n\
+ operational again after a daemon restart (default is %d).\n\
-i, --interval Set the status polling interval in seconds (default is %d)\n\
-t, --timeout Set the unresponsiveness timeout in seconds (default is %d)\n\
-T, --restart-timeout\n\
-v, --version Print program version\n\
-h, --help Display this help and exit\n",
frr_vtydir, DEFAULT_LOGLEVEL, LOG_EMERG, LOG_DEBUG, LOG_DEBUG,
- DEFAULT_MIN_RESTART, DEFAULT_MAX_RESTART, DEFAULT_PERIOD,
- DEFAULT_TIMEOUT, DEFAULT_RESTART_TIMEOUT,
- DEFAULT_RESTART_CMD, DEFAULT_START_CMD, DEFAULT_STOP_CMD,
- frr_vtydir);
+ DEFAULT_MIN_RESTART, DEFAULT_MAX_RESTART,
+ DEFAULT_OPERATIONAL_TIMEOUT, DEFAULT_PERIOD, DEFAULT_TIMEOUT,
+ DEFAULT_RESTART_TIMEOUT, DEFAULT_RESTART_CMD, DEFAULT_START_CMD,
+ DEFAULT_STOP_CMD, frr_vtydir);
}
static pid_t run_background(char *shell_cmd)
struct timeval delay;
time_elapsed(&delay, &restart->time);
+
+ if (gs.reading_configuration) {
+ zlog_err(
+ "%s %s child process appears to still be reading configuration, delaying for another %lu time",
+ restart->what, restart->name, gs.restart_timeout);
+ thread_add_timer(master, restart_kill, restart,
+ gs.restart_timeout, &restart->t_kill);
+ return;
+ }
+
zlog_warn(
"%s %s child process %d still running after %ld seconds, sending signal %d",
restart->what, restart->name, (int)restart->pid,
restart->kills = 0;
{
char cmd[strlen(command) + strlen(restart->name) + 1];
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wformat-nonliteral"
+ /* user supplied command string has a %s for the daemon name */
snprintf(cmd, sizeof(cmd), command, restart->name);
+#pragma GCC diagnostic pop
if ((restart->pid = run_background(cmd)) > 0) {
thread_add_timer(master, restart_kill, restart,
gs.restart_timeout, &restart->t_kill);
restart->pid = 0;
}
- systemd_send_status("FRR Operational");
-
/* Calculate the new restart interval. */
if (update_interval) {
if (delay.tv_sec > 2 * gs.max_restart_interval)
SET_WAKEUP_DOWN(dmn);
}
+static void daemon_restarting_operational(struct thread *thread)
+{
+ systemd_send_status("FRR Operational");
+}
+
static void daemon_down(struct daemon *dmn, const char *why)
{
if (IS_UP(dmn) || (dmn->state == DAEMON_INIT))
THREAD_OFF(dmn->t_wakeup);
if (try_connect(dmn) < 0)
SET_WAKEUP_DOWN(dmn);
+
+ systemd_send_status("FRR partially operational");
phase_check();
}
gs.numdown--;
dmn->connect_tries = 0;
zlog_notice("%s state -> up : %s", dmn->name, why);
- if (gs.numdown == 0)
+ if (gs.numdown == 0) {
daemon_send_ready(0);
+
+ THREAD_OFF(gs.t_operational);
+
+ thread_add_timer(master, daemon_restarting_operational, NULL,
+ gs.operational_timeout, &gs.t_operational);
+ }
+
SET_WAKEUP_ECHO(dmn);
phase_check();
}
zlog_debug("%s: attempting to connect", dmn->name);
dmn->connect_tries++;
- memset(&addr, 0, sizeof(struct sockaddr_un));
+ memset(&addr, 0, sizeof(addr));
addr.sun_family = AF_UNIX;
snprintf(addr.sun_path, sizeof(addr.sun_path), "%s/%s.vty", gs.vtydir,
dmn->name);
case PHASE_WAITING_DOWN:
if (gs.numdown + IS_UP(gs.special) < gs.numdaemons)
break;
+ systemd_send_status("Phased Restart");
zlog_info("Phased restart: all routing daemons now down.");
run_job(&gs.special->restart, "restart", gs.restart_command, 1,
1);
case PHASE_ZEBRA_RESTART_PENDING:
if (gs.special->restart.pid)
break;
+ systemd_send_status("Zebra Restarting");
zlog_info("Phased restart: %s restart job completed.",
gs.special->name);
set_phase(PHASE_WAITING_ZEBRA_UP);
if (!IS_UP(gs.special))
break;
zlog_info("Phased restart: %s is now up.", gs.special->name);
- {
- struct daemon *dmn;
- for (dmn = gs.daemons; dmn; dmn = dmn->next) {
- if (dmn != gs.special)
- run_job(&dmn->restart, "start",
- gs.start_command, 1, 0);
- }
+ for (dmn = gs.daemons; dmn; dmn = dmn->next) {
+ if (dmn != gs.special)
+ run_job(&dmn->restart, "start",
+ gs.start_command, 1, 0);
}
gs.phase = PHASE_NONE;
THREAD_OFF(gs.t_phase_hanging);
struct timeval delay;
vty_out(vty, "watchfrr global phase: %s\n", phase_str[gs.phase]);
+ vty_out(vty, " Restart Command: %pSQq\n", gs.restart_command);
+ vty_out(vty, " Start Command: %pSQq\n", gs.start_command);
+ vty_out(vty, " Stop Command: %pSQq\n", gs.stop_command);
+ vty_out(vty, " Min Restart Interval: %ld\n", gs.min_restart_interval);
+ vty_out(vty, " Max Restart Interval: %ld\n", gs.max_restart_interval);
+ vty_out(vty, " Restart Timeout: %ld\n", gs.restart_timeout);
+ vty_out(vty, " Reading Configuration: %s\n",
+ gs.reading_configuration ? "yes" : "no");
if (gs.restart.pid)
vty_out(vty, " global restart running, pid %ld\n",
(long)gs.restart.pid);
}
#endif
+static void watchfrr_start_config(void)
+{
+ gs.reading_configuration = true;
+}
+
+static void watchfrr_end_config(void)
+{
+ gs.reading_configuration = false;
+}
+
static void watchfrr_init(int argc, char **argv)
{
const char *special = "zebra";
frr_help_exit(1);
}
} break;
+ case OPTION_MAXOPERATIONAL: {
+ char garbage[3];
+
+ if ((sscanf(optarg, "%ld%1s", &gs.operational_timeout,
+ garbage) != 1) ||
+ (gs.operational_timeout < 0)) {
+ fprintf(stderr,
+ "Invalid Operational_timeout argument: %s\n",
+ optarg);
+ frr_help_exit(1);
+ }
+ } break;
case OPTION_NETNS:
netns_en = true;
if (optarg && strchr(optarg, '/')) {
master = frr_init();
watchfrr_error_init();
watchfrr_init(argc, argv);
+ cmd_init_config_callbacks(watchfrr_start_config, watchfrr_end_config);
watchfrr_vty_init();
frr_config_fork();