]> git.proxmox.com Git - mirror_frr.git/blobdiff - watchfrr/watchfrr.c
Merge pull request #2834 from dslicenc/import-vrf-fixes
[mirror_frr.git] / watchfrr / watchfrr.c
index 6926154552e5380999fc581c4574099ec350d8b9..c6e7505979fbc9546c5e0cd438abacc5f5fe28e0 100644 (file)
@@ -27,6 +27,7 @@
 #include "command.h"
 #include "memory_vty.h"
 #include "libfrr.h"
+#include "lib_errors.h"
 
 #include <getopt.h>
 #include <sys/un.h>
@@ -35,6 +36,7 @@
 #include <systemd.h>
 
 #include "watchfrr.h"
+#include "watchfrr_errors.h"
 
 #ifndef MIN
 #define MIN(X,Y) (((X) <= (Y)) ? (X) : (Y))
 #define FUZZY(X) ((X)+JITTER((X)/20))
 
 #define DEFAULT_PERIOD         5
-#define DEFAULT_TIMEOUT                10
+#define DEFAULT_TIMEOUT                90
 #define DEFAULT_RESTART_TIMEOUT        20
 #define DEFAULT_LOGLEVEL       LOG_INFO
 #define DEFAULT_MIN_RESTART    60
 #define DEFAULT_MAX_RESTART    600
-#ifdef PATH_WATCHFRR_PID
-#define DEFAULT_PIDFILE                PATH_WATCHFRR_PID
-#else
-#define DEFAULT_PIDFILE                STATEDIR "/watchfrr.pid"
-#endif
-#ifdef DAEMON_VTY_DIR
-#define VTYDIR                 DAEMON_VTY_DIR
-#else
-#define VTYDIR                 STATEDIR
-#endif
 
 #define PING_TOKEN     "PING"
 
 /* Needs to be global, referenced somewhere inside libfrr. */
 struct thread_master *master;
+static char pidfile_default[256];
 
 static bool watch_only = false;
 
@@ -118,7 +111,7 @@ static struct global_state {
        int numdown; /* # of daemons that are not UP or UNRESPONSIVE */
 } gs = {
        .phase = PHASE_NONE,
-       .vtydir = VTYDIR,
+       .vtydir = frr_vtydir,
        .period = 1000 * DEFAULT_PERIOD,
        .timeout = DEFAULT_TIMEOUT,
        .restart_timeout = DEFAULT_RESTART_TIMEOUT,
@@ -147,7 +140,7 @@ struct daemon {
        daemon_state_t state;
        int fd;
        struct timeval echo_sent;
-       u_int connect_tries;
+       unsigned int connect_tries;
        struct thread *t_wakeup;
        struct thread *t_read;
        struct thread *t_write;
@@ -245,9 +238,9 @@ Otherwise, the interval is doubled (but capped at the -M value).\n\n",
                passing command-line arguments with embedded spaces.\n\
 -v, --version  Print program version\n\
 -h, --help     Display this help and exit\n",
-               VTYDIR, DEFAULT_LOGLEVEL, LOG_EMERG, LOG_DEBUG, LOG_DEBUG,
+               frr_vtydir, DEFAULT_LOGLEVEL, LOG_EMERG, LOG_DEBUG, LOG_DEBUG,
                DEFAULT_MIN_RESTART, DEFAULT_MAX_RESTART, DEFAULT_PERIOD,
-               DEFAULT_TIMEOUT, DEFAULT_RESTART_TIMEOUT, DEFAULT_PIDFILE);
+               DEFAULT_TIMEOUT, DEFAULT_RESTART_TIMEOUT, pidfile_default);
 }
 
 static pid_t run_background(char *shell_cmd)
@@ -256,8 +249,9 @@ static pid_t run_background(char *shell_cmd)
 
        switch (child = fork()) {
        case -1:
-               zlog_err("fork failed, cannot run command [%s]: %s", shell_cmd,
-                        safe_strerror(errno));
+               flog_err_sys(LIB_ERR_SYSTEM_CALL,
+                            "fork failed, cannot run command [%s]: %s",
+                            shell_cmd, safe_strerror(errno));
                return -1;
        case 0:
                /* Child process. */
@@ -271,14 +265,16 @@ static pid_t run_background(char *shell_cmd)
                        char dashc[] = "-c";
                        char *const argv[4] = {shell, dashc, shell_cmd, NULL};
                        execv("/bin/sh", argv);
-                       zlog_err("execv(/bin/sh -c '%s') failed: %s", shell_cmd,
-                                safe_strerror(errno));
+                       flog_err_sys(LIB_ERR_SYSTEM_CALL,
+                                    "execv(/bin/sh -c '%s') failed: %s",
+                                    shell_cmd, safe_strerror(errno));
                        _exit(127);
                }
        default:
                /* Parent process: we will reap the child later. */
-               zlog_err("Forked background command [pid %d]: %s", (int)child,
-                        shell_cmd);
+               flog_err_sys(LIB_ERR_SYSTEM_CALL,
+                            "Forked background command [pid %d]: %s",
+                            (int)child, shell_cmd);
                return child;
        }
 }
@@ -335,7 +331,8 @@ static void sigchild(void)
 
        switch (child = waitpid(-1, &status, WNOHANG)) {
        case -1:
-               zlog_err("waitpid failed: %s", safe_strerror(errno));
+               flog_err_sys(LIB_ERR_SYSTEM_CALL, "waitpid failed: %s",
+                            safe_strerror(errno));
                return;
        case 0:
                zlog_warn("SIGCHLD received, but waitpid did not reap a child");
@@ -358,7 +355,8 @@ static void sigchild(void)
                 * completed. */
                gettimeofday(&restart->time, NULL);
        } else {
-               zlog_err(
+               flog_err_sys(
+                       LIB_ERR_SYSTEM_CALL,
                        "waitpid returned status for an unknown child process %d",
                        (int)child);
                name = "(unknown)";
@@ -379,8 +377,10 @@ static void sigchild(void)
                        zlog_debug("%s %s process %d exited normally", what,
                                   name, (int)child);
        } else
-               zlog_err("cannot interpret %s %s process %d wait status 0x%x",
-                        what, name, (int)child, status);
+               flog_err_sys(
+                       LIB_ERR_SYSTEM_CALL,
+                       "cannot interpret %s %s process %d wait status 0x%x",
+                       what, name, (int)child, status);
        phase_check();
 }
 
@@ -490,8 +490,9 @@ static int wakeup_init(struct thread *t_wakeup)
        dmn->t_wakeup = NULL;
        if (try_connect(dmn) < 0) {
                SET_WAKEUP_DOWN(dmn);
-               zlog_err("%s state -> down : initial connection attempt failed",
-                        dmn->name);
+               flog_err(WATCHFRR_ERR_CONNECTION,
+                         "%s state -> down : initial connection attempt failed",
+                         dmn->name);
                dmn->state = DAEMON_DOWN;
        }
        return 0;
@@ -500,7 +501,8 @@ static int wakeup_init(struct thread *t_wakeup)
 static void daemon_down(struct daemon *dmn, const char *why)
 {
        if (IS_UP(dmn) || (dmn->state == DAEMON_INIT))
-               zlog_err("%s state -> down : %s", dmn->name, why);
+               flog_err(WATCHFRR_ERR_CONNECTION,
+                         "%s state -> down : %s", dmn->name, why);
        else if (gs.loglevel > LOG_DEBUG)
                zlog_debug("%s still down : %s", dmn->name, why);
        if (IS_UP(dmn))
@@ -561,7 +563,7 @@ static int handle_read(struct thread *t_read)
                snprintf(why, sizeof(why),
                         "read returned bad echo response of %d bytes "
                         "(expecting %u): %.*s",
-                        (int)rc, (u_int)sizeof(resp), (int)rc, buf);
+                        (int)rc, (unsigned int)sizeof(resp), (int)rc, buf);
                daemon_down(dmn, why);
                return 0;
        }
@@ -605,10 +607,13 @@ static void daemon_send_ready(void)
                FILE *fp;
 
                fp = fopen(DAEMON_VTY_DIR "/watchfrr.started", "w");
-               fclose(fp);
+               if (fp)
+                       fclose(fp);
+#if defined HAVE_SYSTEMD
                zlog_notice(
                        "Watchfrr: Notifying Systemd we are up and running");
                systemd_send_started(master, 0);
+#endif
                sent = 1;
        }
 }
@@ -690,21 +695,23 @@ static int try_connect(struct daemon *dmn)
           of creating a socket. */
        if (access(addr.sun_path, W_OK) < 0) {
                if (errno != ENOENT)
-                       zlog_err("%s: access to socket %s denied: %s",
-                                dmn->name, addr.sun_path,
-                                safe_strerror(errno));
+                       flog_err_sys(LIB_ERR_SYSTEM_CALL,
+                                    "%s: access to socket %s denied: %s",
+                                    dmn->name, addr.sun_path,
+                                    safe_strerror(errno));
                return -1;
        }
 
        if ((sock = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) {
-               zlog_err("%s(%s): cannot make socket: %s", __func__,
-                        addr.sun_path, safe_strerror(errno));
+               flog_err_sys(LIB_ERR_SOCKET, "%s(%s): cannot make socket: %s",
+                            __func__, addr.sun_path, safe_strerror(errno));
                return -1;
        }
 
        if (set_nonblocking(sock) < 0 || set_cloexec(sock) < 0) {
-               zlog_err("%s(%s): set_nonblocking/cloexec(%d) failed", __func__,
-                        addr.sun_path, sock);
+               flog_err_sys(LIB_ERR_SYSTEM_CALL,
+                            "%s(%s): set_nonblocking/cloexec(%d) failed",
+                            __func__, addr.sun_path, sock);
                close(sock);
                return -1;
        }
@@ -741,8 +748,9 @@ static int try_connect(struct daemon *dmn)
 static int phase_hanging(struct thread *t_hanging)
 {
        gs.t_phase_hanging = NULL;
-       zlog_err("Phase [%s] hanging for %ld seconds, aborting phased restart",
-                phase_str[gs.phase], PHASE_TIMEOUT);
+       flog_err(WATCHFRR_ERR_CONNECTION,
+                 "Phase [%s] hanging for %ld seconds, aborting phased restart",
+                 phase_str[gs.phase], PHASE_TIMEOUT);
        gs.phase = PHASE_NONE;
        return 0;
 }
@@ -856,10 +864,10 @@ static int wakeup_unresponsive(struct thread *t_wakeup)
 
        dmn->t_wakeup = NULL;
        if (dmn->state != DAEMON_UNRESPONSIVE)
-               zlog_err(
-                       "%s: no longer unresponsive (now %s), "
-                       "wakeup should have been cancelled!",
-                       dmn->name, state_str[dmn->state]);
+               flog_err(WATCHFRR_ERR_CONNECTION,
+                         "%s: no longer unresponsive (now %s), "
+                         "wakeup should have been cancelled!",
+                         dmn->name, state_str[dmn->state]);
        else {
                SET_WAKEUP_UNRESPONSIVE(dmn);
                try_restart(dmn);
@@ -873,10 +881,10 @@ static int wakeup_no_answer(struct thread *t_wakeup)
 
        dmn->t_wakeup = NULL;
        dmn->state = DAEMON_UNRESPONSIVE;
-       zlog_err(
-               "%s state -> unresponsive : no response yet to ping "
-               "sent %ld seconds ago",
-               dmn->name, gs.timeout);
+       flog_err(WATCHFRR_ERR_CONNECTION,
+                 "%s state -> unresponsive : no response yet to ping "
+                 "sent %ld seconds ago",
+                 dmn->name, gs.timeout);
        SET_WAKEUP_UNRESPONSIVE(dmn);
        try_restart(dmn);
        return 0;
@@ -894,7 +902,7 @@ static int wakeup_send_echo(struct thread *t_wakeup)
                char why[100 + sizeof(echocmd)];
                snprintf(why, sizeof(why),
                         "write '%s' returned %d instead of %u", echocmd,
-                        (int)rc, (u_int)sizeof(echocmd));
+                        (int)rc, (unsigned int)sizeof(echocmd));
                daemon_down(dmn, why);
        } else {
                gettimeofday(&dmn->echo_sent, NULL);
@@ -905,6 +913,16 @@ static int wakeup_send_echo(struct thread *t_wakeup)
        return 0;
 }
 
+bool check_all_up(void)
+{
+       struct daemon *dmn;
+
+       for (dmn = gs.daemons; dmn; dmn = dmn->next)
+               if (dmn->state != DAEMON_UP)
+                       return false;
+       return true;
+}
+
 static void sigint(void)
 {
        zlog_notice("Terminating on signal");
@@ -973,20 +991,33 @@ FRR_DAEMON_INFO(watchfrr, WATCHFRR,
 
                .privs = &watchfrr_privs, )
 
+#define DEPRECATED_OPTIONS "aAezR:"
+
 int main(int argc, char **argv)
 {
        int opt;
-       const char *pidfile = DEFAULT_PIDFILE;
+       const char *pidfile = pidfile_default;
        const char *special = "zebra";
        const char *blankstr = NULL;
 
+       snprintf(pidfile_default, sizeof(pidfile_default), "%s/watchfrr.pid",
+                frr_vtydir);
+
        frr_preinit(&watchfrr_di, argc, argv);
        progname = watchfrr_di.progname;
 
-       frr_opt_add("b:dk:l:i:p:r:S:s:t:T:", longopts, "");
+       frr_opt_add("b:dk:l:i:p:r:S:s:t:T:" DEPRECATED_OPTIONS, longopts, "");
 
        gs.restart.name = "all";
        while ((opt = frr_getopt(argc, argv, NULL)) != EOF) {
+               if (opt && opt < 128 && strchr(DEPRECATED_OPTIONS, opt)) {
+                       fprintf(stderr,
+                               "The -%c option no longer exists.\n"
+                               "Please refer to the watchfrr(8) man page.\n",
+                               opt);
+                       exit(1);
+               }
+
                switch (opt) {
                case 0:
                        break;
@@ -1132,6 +1163,7 @@ int main(int argc, char **argv)
        gs.restart.interval = gs.min_restart_interval;
 
        master = frr_init();
+       watchfrr_error_init();
 
        zlog_set_level(ZLOG_DEST_MONITOR, ZLOG_DISABLED);
        if (watchfrr_di.daemon_mode) {
@@ -1157,7 +1189,7 @@ int main(int argc, char **argv)
 
                        if (!(dmn = (struct daemon *)calloc(1, sizeof(*dmn)))) {
                                fprintf(stderr, "calloc(1,%u) failed: %s\n",
-                                       (u_int)sizeof(*dmn),
+                                       (unsigned int)sizeof(*dmn),
                                        safe_strerror(errno));
                                return 1;
                        }