#include "command.h"
#include "memory_vty.h"
#include "libfrr.h"
+#include "lib_errors.h"
#include <getopt.h>
#include <sys/un.h>
#include <systemd.h>
#include "watchfrr.h"
+#include "watchfrr_errors.h"
#ifndef MIN
#define MIN(X,Y) (((X) <= (Y)) ? (X) : (Y))
#define FUZZY(X) ((X)+JITTER((X)/20))
#define DEFAULT_PERIOD 5
-#define DEFAULT_TIMEOUT 10
+#define DEFAULT_TIMEOUT 90
#define DEFAULT_RESTART_TIMEOUT 20
#define DEFAULT_LOGLEVEL LOG_INFO
#define DEFAULT_MIN_RESTART 60
#define DEFAULT_MAX_RESTART 600
-#ifdef PATH_WATCHFRR_PID
-#define DEFAULT_PIDFILE PATH_WATCHFRR_PID
-#else
-#define DEFAULT_PIDFILE STATEDIR "/watchfrr.pid"
-#endif
-#ifdef DAEMON_VTY_DIR
-#define VTYDIR DAEMON_VTY_DIR
-#else
-#define VTYDIR STATEDIR
-#endif
#define PING_TOKEN "PING"
/* Needs to be global, referenced somewhere inside libfrr. */
struct thread_master *master;
+static char pidfile_default[256];
static bool watch_only = false;
int numdown; /* # of daemons that are not UP or UNRESPONSIVE */
} gs = {
.phase = PHASE_NONE,
- .vtydir = VTYDIR,
+ .vtydir = frr_vtydir,
.period = 1000 * DEFAULT_PERIOD,
.timeout = DEFAULT_TIMEOUT,
.restart_timeout = DEFAULT_RESTART_TIMEOUT,
daemon_state_t state;
int fd;
struct timeval echo_sent;
- u_int connect_tries;
+ unsigned int connect_tries;
struct thread *t_wakeup;
struct thread *t_read;
struct thread *t_write;
passing command-line arguments with embedded spaces.\n\
-v, --version Print program version\n\
-h, --help Display this help and exit\n",
- VTYDIR, DEFAULT_LOGLEVEL, LOG_EMERG, LOG_DEBUG, LOG_DEBUG,
+ frr_vtydir, DEFAULT_LOGLEVEL, LOG_EMERG, LOG_DEBUG, LOG_DEBUG,
DEFAULT_MIN_RESTART, DEFAULT_MAX_RESTART, DEFAULT_PERIOD,
- DEFAULT_TIMEOUT, DEFAULT_RESTART_TIMEOUT, DEFAULT_PIDFILE);
+ DEFAULT_TIMEOUT, DEFAULT_RESTART_TIMEOUT, pidfile_default);
}
static pid_t run_background(char *shell_cmd)
switch (child = fork()) {
case -1:
- zlog_err("fork failed, cannot run command [%s]: %s", shell_cmd,
- safe_strerror(errno));
+ flog_err_sys(LIB_ERR_SYSTEM_CALL,
+ "fork failed, cannot run command [%s]: %s",
+ shell_cmd, safe_strerror(errno));
return -1;
case 0:
/* Child process. */
char dashc[] = "-c";
char *const argv[4] = {shell, dashc, shell_cmd, NULL};
execv("/bin/sh", argv);
- zlog_err("execv(/bin/sh -c '%s') failed: %s", shell_cmd,
- safe_strerror(errno));
+ flog_err_sys(LIB_ERR_SYSTEM_CALL,
+ "execv(/bin/sh -c '%s') failed: %s",
+ shell_cmd, safe_strerror(errno));
_exit(127);
}
default:
/* Parent process: we will reap the child later. */
- zlog_err("Forked background command [pid %d]: %s", (int)child,
- shell_cmd);
+ flog_err_sys(LIB_ERR_SYSTEM_CALL,
+ "Forked background command [pid %d]: %s",
+ (int)child, shell_cmd);
return child;
}
}
switch (child = waitpid(-1, &status, WNOHANG)) {
case -1:
- zlog_err("waitpid failed: %s", safe_strerror(errno));
+ flog_err_sys(LIB_ERR_SYSTEM_CALL, "waitpid failed: %s",
+ safe_strerror(errno));
return;
case 0:
zlog_warn("SIGCHLD received, but waitpid did not reap a child");
* completed. */
gettimeofday(&restart->time, NULL);
} else {
- zlog_err(
+ flog_err_sys(
+ LIB_ERR_SYSTEM_CALL,
"waitpid returned status for an unknown child process %d",
(int)child);
name = "(unknown)";
zlog_debug("%s %s process %d exited normally", what,
name, (int)child);
} else
- zlog_err("cannot interpret %s %s process %d wait status 0x%x",
- what, name, (int)child, status);
+ flog_err_sys(
+ LIB_ERR_SYSTEM_CALL,
+ "cannot interpret %s %s process %d wait status 0x%x",
+ what, name, (int)child, status);
phase_check();
}
dmn->t_wakeup = NULL;
if (try_connect(dmn) < 0) {
SET_WAKEUP_DOWN(dmn);
- zlog_err("%s state -> down : initial connection attempt failed",
- dmn->name);
+ flog_err(WATCHFRR_ERR_CONNECTION,
+ "%s state -> down : initial connection attempt failed",
+ dmn->name);
dmn->state = DAEMON_DOWN;
}
return 0;
static void daemon_down(struct daemon *dmn, const char *why)
{
if (IS_UP(dmn) || (dmn->state == DAEMON_INIT))
- zlog_err("%s state -> down : %s", dmn->name, why);
+ flog_err(WATCHFRR_ERR_CONNECTION,
+ "%s state -> down : %s", dmn->name, why);
else if (gs.loglevel > LOG_DEBUG)
zlog_debug("%s still down : %s", dmn->name, why);
if (IS_UP(dmn))
snprintf(why, sizeof(why),
"read returned bad echo response of %d bytes "
"(expecting %u): %.*s",
- (int)rc, (u_int)sizeof(resp), (int)rc, buf);
+ (int)rc, (unsigned int)sizeof(resp), (int)rc, buf);
daemon_down(dmn, why);
return 0;
}
FILE *fp;
fp = fopen(DAEMON_VTY_DIR "/watchfrr.started", "w");
- fclose(fp);
+ if (fp)
+ fclose(fp);
+#if defined HAVE_SYSTEMD
zlog_notice(
"Watchfrr: Notifying Systemd we are up and running");
systemd_send_started(master, 0);
+#endif
sent = 1;
}
}
of creating a socket. */
if (access(addr.sun_path, W_OK) < 0) {
if (errno != ENOENT)
- zlog_err("%s: access to socket %s denied: %s",
- dmn->name, addr.sun_path,
- safe_strerror(errno));
+ flog_err_sys(LIB_ERR_SYSTEM_CALL,
+ "%s: access to socket %s denied: %s",
+ dmn->name, addr.sun_path,
+ safe_strerror(errno));
return -1;
}
if ((sock = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) {
- zlog_err("%s(%s): cannot make socket: %s", __func__,
- addr.sun_path, safe_strerror(errno));
+ flog_err_sys(LIB_ERR_SOCKET, "%s(%s): cannot make socket: %s",
+ __func__, addr.sun_path, safe_strerror(errno));
return -1;
}
if (set_nonblocking(sock) < 0 || set_cloexec(sock) < 0) {
- zlog_err("%s(%s): set_nonblocking/cloexec(%d) failed", __func__,
- addr.sun_path, sock);
+ flog_err_sys(LIB_ERR_SYSTEM_CALL,
+ "%s(%s): set_nonblocking/cloexec(%d) failed",
+ __func__, addr.sun_path, sock);
close(sock);
return -1;
}
static int phase_hanging(struct thread *t_hanging)
{
gs.t_phase_hanging = NULL;
- zlog_err("Phase [%s] hanging for %ld seconds, aborting phased restart",
- phase_str[gs.phase], PHASE_TIMEOUT);
+ flog_err(WATCHFRR_ERR_CONNECTION,
+ "Phase [%s] hanging for %ld seconds, aborting phased restart",
+ phase_str[gs.phase], PHASE_TIMEOUT);
gs.phase = PHASE_NONE;
return 0;
}
dmn->t_wakeup = NULL;
if (dmn->state != DAEMON_UNRESPONSIVE)
- zlog_err(
- "%s: no longer unresponsive (now %s), "
- "wakeup should have been cancelled!",
- dmn->name, state_str[dmn->state]);
+ flog_err(WATCHFRR_ERR_CONNECTION,
+ "%s: no longer unresponsive (now %s), "
+ "wakeup should have been cancelled!",
+ dmn->name, state_str[dmn->state]);
else {
SET_WAKEUP_UNRESPONSIVE(dmn);
try_restart(dmn);
dmn->t_wakeup = NULL;
dmn->state = DAEMON_UNRESPONSIVE;
- zlog_err(
- "%s state -> unresponsive : no response yet to ping "
- "sent %ld seconds ago",
- dmn->name, gs.timeout);
+ flog_err(WATCHFRR_ERR_CONNECTION,
+ "%s state -> unresponsive : no response yet to ping "
+ "sent %ld seconds ago",
+ dmn->name, gs.timeout);
SET_WAKEUP_UNRESPONSIVE(dmn);
try_restart(dmn);
return 0;
char why[100 + sizeof(echocmd)];
snprintf(why, sizeof(why),
"write '%s' returned %d instead of %u", echocmd,
- (int)rc, (u_int)sizeof(echocmd));
+ (int)rc, (unsigned int)sizeof(echocmd));
daemon_down(dmn, why);
} else {
gettimeofday(&dmn->echo_sent, NULL);
return 0;
}
+bool check_all_up(void)
+{
+ struct daemon *dmn;
+
+ for (dmn = gs.daemons; dmn; dmn = dmn->next)
+ if (dmn->state != DAEMON_UP)
+ return false;
+ return true;
+}
+
static void sigint(void)
{
zlog_notice("Terminating on signal");
.privs = &watchfrr_privs, )
+#define DEPRECATED_OPTIONS "aAezR:"
+
int main(int argc, char **argv)
{
int opt;
- const char *pidfile = DEFAULT_PIDFILE;
+ const char *pidfile = pidfile_default;
const char *special = "zebra";
const char *blankstr = NULL;
+ snprintf(pidfile_default, sizeof(pidfile_default), "%s/watchfrr.pid",
+ frr_vtydir);
+
frr_preinit(&watchfrr_di, argc, argv);
progname = watchfrr_di.progname;
- frr_opt_add("b:dk:l:i:p:r:S:s:t:T:", longopts, "");
+ frr_opt_add("b:dk:l:i:p:r:S:s:t:T:" DEPRECATED_OPTIONS, longopts, "");
gs.restart.name = "all";
while ((opt = frr_getopt(argc, argv, NULL)) != EOF) {
+ if (opt && opt < 128 && strchr(DEPRECATED_OPTIONS, opt)) {
+ fprintf(stderr,
+ "The -%c option no longer exists.\n"
+ "Please refer to the watchfrr(8) man page.\n",
+ opt);
+ exit(1);
+ }
+
switch (opt) {
case 0:
break;
gs.restart.interval = gs.min_restart_interval;
master = frr_init();
+ watchfrr_error_init();
zlog_set_level(ZLOG_DEST_MONITOR, ZLOG_DISABLED);
if (watchfrr_di.daemon_mode) {
if (!(dmn = (struct daemon *)calloc(1, sizeof(*dmn)))) {
fprintf(stderr, "calloc(1,%u) failed: %s\n",
- (u_int)sizeof(*dmn),
+ (unsigned int)sizeof(*dmn),
safe_strerror(errno));
return 1;
}