]> git.proxmox.com Git - mirror_frr.git/blobdiff - watchfrr/watchfrr.c
Merge pull request #3485 from dslicenc/frr-reload-delete-vrf
[mirror_frr.git] / watchfrr / watchfrr.c
index 9bbe04c338c97888d5c6589f8132cd63a2320e87..e28da6db8c2deb31cddb0b98c57cb007a6cca1ff 100644 (file)
 
 #define PING_TOKEN     "PING"
 
+DEFINE_MGROUP(WATCHFRR, "watchfrr")
+DEFINE_MTYPE_STATIC(WATCHFRR, WATCHFRR_DAEMON, "watchfrr daemon entry")
+
 /* Needs to be global, referenced somewhere inside libfrr. */
 struct thread_master *master;
-static char pidfile_default[256];
 
 static bool watch_only = false;
 
 typedef enum {
        PHASE_NONE = 0,
+       PHASE_INIT,
        PHASE_STOPS_PENDING,
        PHASE_WAITING_DOWN,
        PHASE_ZEBRA_RESTART_PENDING,
@@ -70,7 +73,8 @@ typedef enum {
 } restart_phase_t;
 
 static const char *phase_str[] = {
-       "None",
+       "Idle",
+       "Startup",
        "Stop jobs running",
        "Waiting for other daemons to come down",
        "Zebra restart job running",
@@ -110,7 +114,7 @@ static struct global_state {
        int numpids;
        int numdown; /* # of daemons that are not UP or UNRESPONSIVE */
 } gs = {
-       .phase = PHASE_NONE,
+       .phase = PHASE_INIT,
        .vtydir = frr_vtydir,
        .period = 1000 * DEFAULT_PERIOD,
        .timeout = DEFAULT_TIMEOUT,
@@ -175,6 +179,7 @@ static int try_connect(struct daemon *dmn);
 static int wakeup_send_echo(struct thread *t_wakeup);
 static void try_restart(struct daemon *dmn);
 static void phase_check(void);
+static void restart_done(struct daemon *dmn);
 
 static const char *progname;
 static void printhelp(FILE *target)
@@ -230,7 +235,7 @@ Otherwise, the interval is doubled (but capped at the -M value).\n\n",
                name of the daemon should be substituted.\n\
     --dry      Do not start or restart anything, just log.\n\
 -p, --pid-file Set process identifier file name\n\
-               (default is %s).\n\
+               (default is %s/watchfrr.pid).\n\
 -b, --blank-string\n\
                When the supplied argument string is found in any of the\n\
                various shell command arguments (-r, -s, or -k), replace\n\
@@ -240,7 +245,7 @@ Otherwise, the interval is doubled (but capped at the -M value).\n\n",
 -h, --help     Display this help and exit\n",
                frr_vtydir, DEFAULT_LOGLEVEL, LOG_EMERG, LOG_DEBUG, LOG_DEBUG,
                DEFAULT_MIN_RESTART, DEFAULT_MAX_RESTART, DEFAULT_PERIOD,
-               DEFAULT_TIMEOUT, DEFAULT_RESTART_TIMEOUT, pidfile_default);
+               DEFAULT_TIMEOUT, DEFAULT_RESTART_TIMEOUT, frr_vtydir);
 }
 
 static pid_t run_background(char *shell_cmd)
@@ -331,6 +336,7 @@ static void sigchild(void)
        const char *name;
        const char *what;
        struct restart_info *restart;
+       struct daemon *dmn;
 
        switch (child = waitpid(-1, &status, WNOHANG)) {
        case -1:
@@ -376,9 +382,18 @@ static void sigchild(void)
                        zlog_warn(
                                "%s %s process %d exited with non-zero status %d",
                                what, name, (int)child, WEXITSTATUS(status));
-               else
+               else {
                        zlog_debug("%s %s process %d exited normally", what,
                                   name, (int)child);
+
+                       if (restart && restart != &gs.restart) {
+                               dmn = container_of(restart, struct daemon,
+                                                  restart);
+                               restart_done(dmn);
+                       } else if (restart)
+                               for (dmn = gs.daemons; dmn; dmn = dmn->next)
+                                       restart_done(dmn);
+               }
        } else
                flog_err_sys(
                        EC_LIB_SYSTEM_CALL,
@@ -492,15 +507,27 @@ static int wakeup_init(struct thread *t_wakeup)
 
        dmn->t_wakeup = NULL;
        if (try_connect(dmn) < 0) {
-               SET_WAKEUP_DOWN(dmn);
                flog_err(EC_WATCHFRR_CONNECTION,
                         "%s state -> down : initial connection attempt failed",
                         dmn->name);
                dmn->state = DAEMON_DOWN;
        }
+       phase_check();
        return 0;
 }
 
+static void restart_done(struct daemon *dmn)
+{
+       if (dmn->state != DAEMON_DOWN) {
+               zlog_warn("wtf?");
+               return;
+       }
+       if (dmn->t_wakeup)
+               THREAD_OFF(dmn->t_wakeup);
+       if (try_connect(dmn) < 0)
+               SET_WAKEUP_DOWN(dmn);
+}
+
 static void daemon_down(struct daemon *dmn, const char *why)
 {
        if (IS_UP(dmn) || (dmn->state == DAEMON_INIT))
@@ -609,12 +636,13 @@ static void daemon_send_ready(void)
        if (!sent && gs.numdown == 0) {
                FILE *fp;
 
+               zlog_notice("all daemons up, doing startup-complete notify");
+               frr_detach();
+
                fp = fopen(DAEMON_VTY_DIR "/watchfrr.started", "w");
                if (fp)
                        fclose(fp);
 #if defined HAVE_SYSTEMD
-               zlog_notice(
-                       "Watchfrr: Notifying Systemd we are up and running");
                systemd_send_started(master, 0);
 #endif
                sent = 1;
@@ -770,9 +798,25 @@ static void set_phase(restart_phase_t new_phase)
 
 static void phase_check(void)
 {
+       struct daemon *dmn;
+
        switch (gs.phase) {
        case PHASE_NONE:
                break;
+
+       case PHASE_INIT:
+               for (dmn = gs.daemons; dmn; dmn = dmn->next)
+                       if (dmn->state == DAEMON_INIT)
+                               return;
+
+               /* startup complete, everything out of INIT */
+               gs.phase = PHASE_NONE;
+               for (dmn = gs.daemons; dmn; dmn = dmn->next)
+                       if (dmn->state == DAEMON_DOWN) {
+                               SET_WAKEUP_DOWN(dmn);
+                               try_restart(dmn);
+                       }
+               break;
        case PHASE_STOPS_PENDING:
                if (gs.numpids)
                        break;
@@ -926,6 +970,31 @@ bool check_all_up(void)
        return true;
 }
 
+void watchfrr_status(struct vty *vty)
+{
+       struct daemon *dmn;
+       struct timeval delay;
+
+       vty_out(vty, "watchfrr global phase: %s\n", phase_str[gs.phase]);
+       if (gs.restart.pid)
+               vty_out(vty, "    global restart running, pid %ld\n",
+                       (long)gs.restart.pid);
+
+       for (dmn = gs.daemons; dmn; dmn = dmn->next) {
+               vty_out(vty, "  %-20s %s\n", dmn->name, state_str[dmn->state]);
+               if (dmn->restart.pid)
+                       vty_out(vty, "      restart running, pid %ld\n",
+                               (long)dmn->restart.pid);
+               else if (dmn->state == DAEMON_DOWN &&
+                       time_elapsed(&delay, &dmn->restart.time)->tv_sec
+                               < dmn->restart.interval)
+                       vty_out(vty, "      restarting in %ld seconds"
+                               " (%lds backoff interval)\n",
+                               dmn->restart.interval - delay.tv_sec,
+                               dmn->restart.interval);
+       }
+}
+
 static void sigint(void)
 {
        zlog_notice("Terminating on signal");
@@ -961,6 +1030,52 @@ static char *translate_blanks(const char *cmd, const char *blankstr)
        return res;
 }
 
+static void watchfrr_init(int argc, char **argv)
+{
+       const char *special = "zebra";
+       int i;
+       struct daemon *dmn, **add = &gs.daemons;
+       char alldaemons[512] = "", *p = alldaemons;
+
+       for (i = optind; i < argc; i++) {
+               dmn = XCALLOC(MTYPE_WATCHFRR_DAEMON, sizeof(*dmn));
+
+               dmn->name = dmn->restart.name = argv[i];
+               dmn->state = DAEMON_INIT;
+               gs.numdaemons++;
+               gs.numdown++;
+               dmn->fd = -1;
+               dmn->t_wakeup = NULL;
+               thread_add_timer_msec(master, wakeup_init, dmn, 0,
+                                     &dmn->t_wakeup);
+               dmn->restart.interval = gs.min_restart_interval;
+               *add = dmn;
+               add = &dmn->next;
+
+               if (!strcmp(dmn->name, special))
+                       gs.special = dmn;
+       }
+
+       if (!gs.daemons) {
+               fprintf(stderr,
+                       "Must specify one or more daemons to monitor.\n\n");
+               frr_help_exit(1);
+       }
+       if (!watch_only && !gs.special) {
+               fprintf(stderr, "\"%s\" daemon must be in daemon lists\n\n",
+                       special);
+               frr_help_exit(1);
+       }
+
+       for (dmn = gs.daemons; dmn; dmn = dmn->next) {
+               snprintf(p, alldaemons + sizeof(alldaemons) - p, "%s%s",
+                        (p == alldaemons) ? "" : " ", dmn->name);
+               p += strlen(p);
+       }
+       zlog_notice("%s %s watching [%s]%s", progname, FRR_VERSION, alldaemons,
+                   watch_only ? ", monitor mode" : "");
+}
+
 struct zebra_privs_t watchfrr_privs = {
 #ifdef VTY_GROUP
        .vty_group = VTY_GROUP,
@@ -984,7 +1099,8 @@ static struct quagga_signal_t watchfrr_signals[] = {
 
 FRR_DAEMON_INFO(watchfrr, WATCHFRR,
                .flags = FRR_NO_PRIVSEP | FRR_NO_TCPVTY | FRR_LIMITED_CLI
-                        | FRR_NO_CFG_PID_DRY | FRR_NO_ZCLIENT,
+                        | FRR_NO_CFG_PID_DRY | FRR_NO_ZCLIENT
+                        | FRR_DETACH_LATER,
 
                .printhelp = printhelp,
                .copyright = "Copyright 2004 Andrew J. Schorr",
@@ -999,13 +1115,8 @@ FRR_DAEMON_INFO(watchfrr, WATCHFRR,
 int main(int argc, char **argv)
 {
        int opt;
-       const char *pidfile = pidfile_default;
-       const char *special = "zebra";
        const char *blankstr = NULL;
 
-       snprintf(pidfile_default, sizeof(pidfile_default), "%s/watchfrr.pid",
-                frr_vtydir);
-
        frr_preinit(&watchfrr_di, argc, argv);
        progname = watchfrr_di.progname;
 
@@ -1087,7 +1198,7 @@ int main(int argc, char **argv)
                        gs.period = 1000 * period;
                } break;
                case 'p':
-                       pidfile = optarg;
+                       watchfrr_di.pid_file = optarg;
                        break;
                case 'r':
                        if (!valid_command(optarg)) {
@@ -1167,98 +1278,18 @@ int main(int argc, char **argv)
 
        master = frr_init();
        watchfrr_error_init();
+       watchfrr_init(argc, argv);
+       watchfrr_vty_init();
+
+       frr_config_fork();
 
        zlog_set_level(ZLOG_DEST_MONITOR, ZLOG_DISABLED);
-       if (watchfrr_di.daemon_mode) {
+       if (watchfrr_di.daemon_mode)
                zlog_set_level(ZLOG_DEST_SYSLOG, MIN(gs.loglevel, LOG_DEBUG));
-               if (daemon(0, 0) < 0) {
-                       fprintf(stderr, "Watchfrr daemon failed: %s",
-                               strerror(errno));
-                       exit(1);
-               }
-       } else
+       else
                zlog_set_level(ZLOG_DEST_STDOUT, MIN(gs.loglevel, LOG_DEBUG));
 
-       watchfrr_vty_init();
-
-       frr_vty_serv();
-
-       {
-               int i;
-               struct daemon *tail = NULL;
-
-               for (i = optind; i < argc; i++) {
-                       struct daemon *dmn;
-
-                       if (!(dmn = (struct daemon *)calloc(1, sizeof(*dmn)))) {
-                               fprintf(stderr, "calloc(1,%u) failed: %s\n",
-                                       (unsigned int)sizeof(*dmn),
-                                       safe_strerror(errno));
-                               return 1;
-                       }
-                       dmn->name = dmn->restart.name = argv[i];
-                       dmn->state = DAEMON_INIT;
-                       gs.numdaemons++;
-                       gs.numdown++;
-                       dmn->fd = -1;
-                       dmn->t_wakeup = NULL;
-                       thread_add_timer_msec(master, wakeup_init, dmn,
-                                             100 + (random() % 900),
-                                             &dmn->t_wakeup);
-                       dmn->restart.interval = gs.min_restart_interval;
-                       if (tail)
-                               tail->next = dmn;
-                       else
-                               gs.daemons = dmn;
-                       tail = dmn;
-
-                       if (!strcmp(dmn->name, special))
-                               gs.special = dmn;
-               }
-       }
-       if (!gs.daemons) {
-               fputs("Must specify one or more daemons to monitor.\n", stderr);
-               frr_help_exit(1);
-       }
-       if (!watch_only && !gs.special) {
-               fprintf(stderr, "\"%s\" daemon must be in daemon list\n",
-                       special);
-               frr_help_exit(1);
-       }
-
-       /* Make sure we're not already running. */
-       pid_output(pidfile);
-
-       /* Announce which daemons are being monitored. */
-       {
-               struct daemon *dmn;
-               size_t len = 0;
-
-               for (dmn = gs.daemons; dmn; dmn = dmn->next)
-                       len += strlen(dmn->name) + 1;
-
-               {
-                       char buf[len + 1];
-                       char *p = buf;
-
-                       for (dmn = gs.daemons; dmn; dmn = dmn->next) {
-                               if (p != buf)
-                                       *p++ = ' ';
-                               strcpy(p, dmn->name);
-                               p += strlen(p);
-                       }
-                       zlog_notice("%s %s watching [%s]%s", progname,
-                                   FRR_VERSION, buf,
-                                   watch_only ? ", monitor mode" : "");
-               }
-       }
-
-       {
-               struct thread thread;
-
-               while (thread_fetch(master, &thread))
-                       thread_call(&thread);
-       }
+       frr_run(master);
 
        systemd_send_stopping();
        /* Not reached. */