]> git.proxmox.com Git - mirror_frr.git/blobdiff - watchfrr/watchfrr.c
Merge pull request #2834 from dslicenc/import-vrf-fixes
[mirror_frr.git] / watchfrr / watchfrr.c
index a4af0e36206638afeb960a79ec08eca7a43966ec..c6e7505979fbc9546c5e0cd438abacc5f5fe28e0 100644 (file)
@@ -27,6 +27,7 @@
 #include "command.h"
 #include "memory_vty.h"
 #include "libfrr.h"
+#include "lib_errors.h"
 
 #include <getopt.h>
 #include <sys/un.h>
@@ -35,6 +36,7 @@
 #include <systemd.h>
 
 #include "watchfrr.h"
+#include "watchfrr_errors.h"
 
 #ifndef MIN
 #define MIN(X,Y) (((X) <= (Y)) ? (X) : (Y))
@@ -45,7 +47,7 @@
 #define FUZZY(X) ((X)+JITTER((X)/20))
 
 #define DEFAULT_PERIOD         5
-#define DEFAULT_TIMEOUT                10
+#define DEFAULT_TIMEOUT                90
 #define DEFAULT_RESTART_TIMEOUT        20
 #define DEFAULT_LOGLEVEL       LOG_INFO
 #define DEFAULT_MIN_RESTART    60
@@ -138,7 +140,7 @@ struct daemon {
        daemon_state_t state;
        int fd;
        struct timeval echo_sent;
-       u_int connect_tries;
+       unsigned int connect_tries;
        struct thread *t_wakeup;
        struct thread *t_read;
        struct thread *t_write;
@@ -247,8 +249,9 @@ static pid_t run_background(char *shell_cmd)
 
        switch (child = fork()) {
        case -1:
-               zlog_err("fork failed, cannot run command [%s]: %s", shell_cmd,
-                        safe_strerror(errno));
+               flog_err_sys(LIB_ERR_SYSTEM_CALL,
+                            "fork failed, cannot run command [%s]: %s",
+                            shell_cmd, safe_strerror(errno));
                return -1;
        case 0:
                /* Child process. */
@@ -262,14 +265,16 @@ static pid_t run_background(char *shell_cmd)
                        char dashc[] = "-c";
                        char *const argv[4] = {shell, dashc, shell_cmd, NULL};
                        execv("/bin/sh", argv);
-                       zlog_err("execv(/bin/sh -c '%s') failed: %s", shell_cmd,
-                                safe_strerror(errno));
+                       flog_err_sys(LIB_ERR_SYSTEM_CALL,
+                                    "execv(/bin/sh -c '%s') failed: %s",
+                                    shell_cmd, safe_strerror(errno));
                        _exit(127);
                }
        default:
                /* Parent process: we will reap the child later. */
-               zlog_err("Forked background command [pid %d]: %s", (int)child,
-                        shell_cmd);
+               flog_err_sys(LIB_ERR_SYSTEM_CALL,
+                            "Forked background command [pid %d]: %s",
+                            (int)child, shell_cmd);
                return child;
        }
 }
@@ -326,7 +331,8 @@ static void sigchild(void)
 
        switch (child = waitpid(-1, &status, WNOHANG)) {
        case -1:
-               zlog_err("waitpid failed: %s", safe_strerror(errno));
+               flog_err_sys(LIB_ERR_SYSTEM_CALL, "waitpid failed: %s",
+                            safe_strerror(errno));
                return;
        case 0:
                zlog_warn("SIGCHLD received, but waitpid did not reap a child");
@@ -349,7 +355,8 @@ static void sigchild(void)
                 * completed. */
                gettimeofday(&restart->time, NULL);
        } else {
-               zlog_err(
+               flog_err_sys(
+                       LIB_ERR_SYSTEM_CALL,
                        "waitpid returned status for an unknown child process %d",
                        (int)child);
                name = "(unknown)";
@@ -370,8 +377,10 @@ static void sigchild(void)
                        zlog_debug("%s %s process %d exited normally", what,
                                   name, (int)child);
        } else
-               zlog_err("cannot interpret %s %s process %d wait status 0x%x",
-                        what, name, (int)child, status);
+               flog_err_sys(
+                       LIB_ERR_SYSTEM_CALL,
+                       "cannot interpret %s %s process %d wait status 0x%x",
+                       what, name, (int)child, status);
        phase_check();
 }
 
@@ -481,8 +490,9 @@ static int wakeup_init(struct thread *t_wakeup)
        dmn->t_wakeup = NULL;
        if (try_connect(dmn) < 0) {
                SET_WAKEUP_DOWN(dmn);
-               zlog_err("%s state -> down : initial connection attempt failed",
-                        dmn->name);
+               flog_err(WATCHFRR_ERR_CONNECTION,
+                         "%s state -> down : initial connection attempt failed",
+                         dmn->name);
                dmn->state = DAEMON_DOWN;
        }
        return 0;
@@ -491,7 +501,8 @@ static int wakeup_init(struct thread *t_wakeup)
 static void daemon_down(struct daemon *dmn, const char *why)
 {
        if (IS_UP(dmn) || (dmn->state == DAEMON_INIT))
-               zlog_err("%s state -> down : %s", dmn->name, why);
+               flog_err(WATCHFRR_ERR_CONNECTION,
+                         "%s state -> down : %s", dmn->name, why);
        else if (gs.loglevel > LOG_DEBUG)
                zlog_debug("%s still down : %s", dmn->name, why);
        if (IS_UP(dmn))
@@ -552,7 +563,7 @@ static int handle_read(struct thread *t_read)
                snprintf(why, sizeof(why),
                         "read returned bad echo response of %d bytes "
                         "(expecting %u): %.*s",
-                        (int)rc, (u_int)sizeof(resp), (int)rc, buf);
+                        (int)rc, (unsigned int)sizeof(resp), (int)rc, buf);
                daemon_down(dmn, why);
                return 0;
        }
@@ -684,21 +695,23 @@ static int try_connect(struct daemon *dmn)
           of creating a socket. */
        if (access(addr.sun_path, W_OK) < 0) {
                if (errno != ENOENT)
-                       zlog_err("%s: access to socket %s denied: %s",
-                                dmn->name, addr.sun_path,
-                                safe_strerror(errno));
+                       flog_err_sys(LIB_ERR_SYSTEM_CALL,
+                                    "%s: access to socket %s denied: %s",
+                                    dmn->name, addr.sun_path,
+                                    safe_strerror(errno));
                return -1;
        }
 
        if ((sock = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) {
-               zlog_err("%s(%s): cannot make socket: %s", __func__,
-                        addr.sun_path, safe_strerror(errno));
+               flog_err_sys(LIB_ERR_SOCKET, "%s(%s): cannot make socket: %s",
+                            __func__, addr.sun_path, safe_strerror(errno));
                return -1;
        }
 
        if (set_nonblocking(sock) < 0 || set_cloexec(sock) < 0) {
-               zlog_err("%s(%s): set_nonblocking/cloexec(%d) failed", __func__,
-                        addr.sun_path, sock);
+               flog_err_sys(LIB_ERR_SYSTEM_CALL,
+                            "%s(%s): set_nonblocking/cloexec(%d) failed",
+                            __func__, addr.sun_path, sock);
                close(sock);
                return -1;
        }
@@ -735,8 +748,9 @@ static int try_connect(struct daemon *dmn)
 static int phase_hanging(struct thread *t_hanging)
 {
        gs.t_phase_hanging = NULL;
-       zlog_err("Phase [%s] hanging for %ld seconds, aborting phased restart",
-                phase_str[gs.phase], PHASE_TIMEOUT);
+       flog_err(WATCHFRR_ERR_CONNECTION,
+                 "Phase [%s] hanging for %ld seconds, aborting phased restart",
+                 phase_str[gs.phase], PHASE_TIMEOUT);
        gs.phase = PHASE_NONE;
        return 0;
 }
@@ -850,10 +864,10 @@ static int wakeup_unresponsive(struct thread *t_wakeup)
 
        dmn->t_wakeup = NULL;
        if (dmn->state != DAEMON_UNRESPONSIVE)
-               zlog_err(
-                       "%s: no longer unresponsive (now %s), "
-                       "wakeup should have been cancelled!",
-                       dmn->name, state_str[dmn->state]);
+               flog_err(WATCHFRR_ERR_CONNECTION,
+                         "%s: no longer unresponsive (now %s), "
+                         "wakeup should have been cancelled!",
+                         dmn->name, state_str[dmn->state]);
        else {
                SET_WAKEUP_UNRESPONSIVE(dmn);
                try_restart(dmn);
@@ -867,10 +881,10 @@ static int wakeup_no_answer(struct thread *t_wakeup)
 
        dmn->t_wakeup = NULL;
        dmn->state = DAEMON_UNRESPONSIVE;
-       zlog_err(
-               "%s state -> unresponsive : no response yet to ping "
-               "sent %ld seconds ago",
-               dmn->name, gs.timeout);
+       flog_err(WATCHFRR_ERR_CONNECTION,
+                 "%s state -> unresponsive : no response yet to ping "
+                 "sent %ld seconds ago",
+                 dmn->name, gs.timeout);
        SET_WAKEUP_UNRESPONSIVE(dmn);
        try_restart(dmn);
        return 0;
@@ -888,7 +902,7 @@ static int wakeup_send_echo(struct thread *t_wakeup)
                char why[100 + sizeof(echocmd)];
                snprintf(why, sizeof(why),
                         "write '%s' returned %d instead of %u", echocmd,
-                        (int)rc, (u_int)sizeof(echocmd));
+                        (int)rc, (unsigned int)sizeof(echocmd));
                daemon_down(dmn, why);
        } else {
                gettimeofday(&dmn->echo_sent, NULL);
@@ -899,6 +913,16 @@ static int wakeup_send_echo(struct thread *t_wakeup)
        return 0;
 }
 
+bool check_all_up(void)
+{
+       struct daemon *dmn;
+
+       for (dmn = gs.daemons; dmn; dmn = dmn->next)
+               if (dmn->state != DAEMON_UP)
+                       return false;
+       return true;
+}
+
 static void sigint(void)
 {
        zlog_notice("Terminating on signal");
@@ -1139,6 +1163,7 @@ int main(int argc, char **argv)
        gs.restart.interval = gs.min_restart_interval;
 
        master = frr_init();
+       watchfrr_error_init();
 
        zlog_set_level(ZLOG_DEST_MONITOR, ZLOG_DISABLED);
        if (watchfrr_di.daemon_mode) {
@@ -1164,7 +1189,7 @@ int main(int argc, char **argv)
 
                        if (!(dmn = (struct daemon *)calloc(1, sizeof(*dmn)))) {
                                fprintf(stderr, "calloc(1,%u) failed: %s\n",
-                                       (u_int)sizeof(*dmn),
+                                       (unsigned int)sizeof(*dmn),
                                        safe_strerror(errno));
                                return 1;
                        }