]> git.proxmox.com Git - mirror_frr.git/blob - watchfrr/watchfrr.c
Merge pull request #6027 from sarav511/vrfloop
[mirror_frr.git] / watchfrr / watchfrr.c
1 /*
2 * Monitor status of frr daemons and restart if necessary.
3 *
4 * Copyright (C) 2004 Andrew J. Schorr
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License along
17 * with this program; see the file COPYING; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #include <zebra.h>
22 #include <thread.h>
23 #include <log.h>
24 #include <network.h>
25 #include <sigevent.h>
26 #include <lib/version.h>
27 #include "command.h"
28 #include "libfrr.h"
29 #include "lib_errors.h"
30
31 #include <getopt.h>
32 #include <sys/un.h>
33 #include <sys/wait.h>
34 #include <memory.h>
35 #include <systemd.h>
36
37 #include "watchfrr.h"
38 #include "watchfrr_errors.h"
39
40 #ifndef MIN
41 #define MIN(X,Y) (((X) <= (Y)) ? (X) : (Y))
42 #endif
43
44 /* Macros to help randomize timers. */
45 #define JITTER(X) ((random() % ((X)+1))-((X)/2))
46 #define FUZZY(X) ((X)+JITTER((X)/20))
47
48 #define DEFAULT_PERIOD 5
49 #define DEFAULT_TIMEOUT 90
50 #define DEFAULT_RESTART_TIMEOUT 20
51 #define DEFAULT_LOGLEVEL LOG_INFO
52 #define DEFAULT_MIN_RESTART 60
53 #define DEFAULT_MAX_RESTART 600
54
55 #define DEFAULT_RESTART_CMD WATCHFRR_SH_PATH " restart %s"
56 #define DEFAULT_START_CMD WATCHFRR_SH_PATH " start %s"
57 #define DEFAULT_STOP_CMD WATCHFRR_SH_PATH " stop %s"
58
59 #define PING_TOKEN "PING"
60
61 DEFINE_MGROUP(WATCHFRR, "watchfrr")
62 DEFINE_MTYPE_STATIC(WATCHFRR, WATCHFRR_DAEMON, "watchfrr daemon entry")
63
64 /* Needs to be global, referenced somewhere inside libfrr. */
65 struct thread_master *master;
66
67 static bool watch_only = false;
68
69 typedef enum {
70 PHASE_NONE = 0,
71 PHASE_INIT,
72 PHASE_STOPS_PENDING,
73 PHASE_WAITING_DOWN,
74 PHASE_ZEBRA_RESTART_PENDING,
75 PHASE_WAITING_ZEBRA_UP
76 } restart_phase_t;
77
78 static const char *const phase_str[] = {
79 "Idle",
80 "Startup",
81 "Stop jobs running",
82 "Waiting for other daemons to come down",
83 "Zebra restart job running",
84 "Waiting for zebra to come up",
85 "Start jobs running",
86 };
87
88 #define PHASE_TIMEOUT (3*gs.restart_timeout)
89 #define STARTUP_TIMEOUT 55 * 1000
90
91 struct restart_info {
92 const char *name;
93 const char *what;
94 pid_t pid;
95 struct timeval time;
96 long interval;
97 struct thread *t_kill;
98 int kills;
99 };
100
101 static struct global_state {
102 restart_phase_t phase;
103 struct thread *t_phase_hanging;
104 struct thread *t_startup_timeout;
105 const char *vtydir;
106 long period;
107 long timeout;
108 long restart_timeout;
109 long min_restart_interval;
110 long max_restart_interval;
111 struct daemon *daemons;
112 const char *restart_command;
113 const char *start_command;
114 const char *stop_command;
115 struct restart_info restart;
116 int loglevel;
117 struct daemon *special; /* points to zebra when doing phased restart */
118 int numdaemons;
119 int numpids;
120 int numdown; /* # of daemons that are not UP or UNRESPONSIVE */
121 } gs = {
122 .phase = PHASE_INIT,
123 .vtydir = frr_vtydir,
124 .period = 1000 * DEFAULT_PERIOD,
125 .timeout = DEFAULT_TIMEOUT,
126 .restart_timeout = DEFAULT_RESTART_TIMEOUT,
127 .loglevel = DEFAULT_LOGLEVEL,
128 .min_restart_interval = DEFAULT_MIN_RESTART,
129 .max_restart_interval = DEFAULT_MAX_RESTART,
130 .restart_command = DEFAULT_RESTART_CMD,
131 .start_command = DEFAULT_START_CMD,
132 .stop_command = DEFAULT_STOP_CMD,
133 };
134
135 typedef enum {
136 DAEMON_INIT,
137 DAEMON_DOWN,
138 DAEMON_CONNECTING,
139 DAEMON_UP,
140 DAEMON_UNRESPONSIVE
141 } daemon_state_t;
142
143 #define IS_UP(DMN) \
144 (((DMN)->state == DAEMON_UP) || ((DMN)->state == DAEMON_UNRESPONSIVE))
145
146 static const char *const state_str[] = {
147 "Init", "Down", "Connecting", "Up", "Unresponsive",
148 };
149
150 struct daemon {
151 const char *name;
152 daemon_state_t state;
153 int fd;
154 struct timeval echo_sent;
155 unsigned int connect_tries;
156 struct thread *t_wakeup;
157 struct thread *t_read;
158 struct thread *t_write;
159 struct daemon *next;
160 struct restart_info restart;
161
162 /*
163 * For a given daemon, if we've turned on ignore timeouts
164 * ignore the timeout value and assume everything is ok
165 * This is for daemon debugging w/ gdb after we have started
166 * FRR and realize we have something that needs to be looked
167 * at
168 */
169 bool ignore_timeout;
170 };
171
172 #define OPTION_MINRESTART 2000
173 #define OPTION_MAXRESTART 2001
174 #define OPTION_DRY 2002
175
176 static const struct option longopts[] = {
177 {"daemon", no_argument, NULL, 'd'},
178 {"statedir", required_argument, NULL, 'S'},
179 {"loglevel", required_argument, NULL, 'l'},
180 {"interval", required_argument, NULL, 'i'},
181 {"timeout", required_argument, NULL, 't'},
182 {"restart-timeout", required_argument, NULL, 'T'},
183 {"restart", required_argument, NULL, 'r'},
184 {"start-command", required_argument, NULL, 's'},
185 {"kill-command", required_argument, NULL, 'k'},
186 {"dry", no_argument, NULL, OPTION_DRY},
187 {"min-restart-interval", required_argument, NULL, OPTION_MINRESTART},
188 {"max-restart-interval", required_argument, NULL, OPTION_MAXRESTART},
189 {"pid-file", required_argument, NULL, 'p'},
190 {"blank-string", required_argument, NULL, 'b'},
191 {"help", no_argument, NULL, 'h'},
192 {"version", no_argument, NULL, 'v'},
193 {NULL, 0, NULL, 0}};
194
195 static int try_connect(struct daemon *dmn);
196 static int wakeup_send_echo(struct thread *t_wakeup);
197 static void try_restart(struct daemon *dmn);
198 static void phase_check(void);
199 static void restart_done(struct daemon *dmn);
200
201 static const char *progname;
202
203 void watchfrr_set_ignore_daemon(struct vty *vty, const char *dname, bool ignore)
204 {
205 struct daemon *dmn;
206
207 for (dmn = gs.daemons; dmn; dmn = dmn->next) {
208 if (strncmp(dmn->name, dname, strlen(dmn->name)) == 0)
209 break;
210 }
211
212 if (dmn) {
213 dmn->ignore_timeout = ignore;
214 vty_out(vty, "%s switching to %s\n", dmn->name,
215 ignore ? "ignore" : "watch");
216 } else
217 vty_out(vty, "%s is not configured for running at the moment",
218 dname);
219 }
220
221 static void printhelp(FILE *target)
222 {
223 fprintf(target,
224 "Usage : %s [OPTION...] <daemon name> ...\n\n\
225 Watchdog program to monitor status of frr daemons and try to restart\n\
226 them if they are down or unresponsive. It determines whether a daemon is\n\
227 up based on whether it can connect to the daemon's vty unix stream socket.\n\
228 It then repeatedly sends echo commands over that socket to determine whether\n\
229 the daemon is responsive. If the daemon crashes, we will receive an EOF\n\
230 on the socket connection and know immediately that the daemon is down.\n\n\
231 The daemons to be monitored should be listed on the command line.\n\n\
232 In order to avoid attempting to restart the daemons in a fast loop,\n\
233 the -m and -M options allow you to control the minimum delay between\n\
234 restart commands. The minimum restart delay is recalculated each time\n\
235 a restart is attempted: if the time since the last restart attempt exceeds\n\
236 twice the -M value, then the restart delay is set to the -m value.\n\
237 Otherwise, the interval is doubled (but capped at the -M value).\n\n",
238 progname);
239
240 fprintf(target,
241 "Options:\n\
242 -d, --daemon Run in daemon mode. In this mode, error messages are sent\n\
243 to syslog instead of stdout.\n\
244 -S, --statedir Set the vty socket directory (default is %s)\n\
245 -l, --loglevel Set the logging level (default is %d).\n\
246 The value should range from %d (LOG_EMERG) to %d (LOG_DEBUG),\n\
247 but it can be set higher than %d if extra-verbose debugging\n\
248 messages are desired.\n\
249 --min-restart-interval\n\
250 Set the minimum seconds to wait between invocations of daemon\n\
251 restart commands (default is %d).\n\
252 --max-restart-interval\n\
253 Set the maximum seconds to wait between invocations of daemon\n\
254 restart commands (default is %d).\n\
255 -i, --interval Set the status polling interval in seconds (default is %d)\n\
256 -t, --timeout Set the unresponsiveness timeout in seconds (default is %d)\n\
257 -T, --restart-timeout\n\
258 Set the restart (kill) timeout in seconds (default is %d).\n\
259 If any background jobs are still running after this much\n\
260 time has elapsed, they will be killed.\n\
261 -r, --restart Supply a Bourne shell command to use to restart a single\n\
262 daemon. The command string should include '%%s' where the\n\
263 name of the daemon should be substituted.\n\
264 (default: '%s')\n\
265 -s, --start-command\n\
266 Supply a Bourne shell to command to use to start a single\n\
267 daemon. The command string should include '%%s' where the\n\
268 name of the daemon should be substituted.\n\
269 (default: '%s')\n\
270 -k, --kill-command\n\
271 Supply a Bourne shell to command to use to stop a single\n\
272 daemon. The command string should include '%%s' where the\n\
273 name of the daemon should be substituted.\n\
274 (default: '%s')\n\
275 --dry Do not start or restart anything, just log.\n\
276 -p, --pid-file Set process identifier file name\n\
277 (default is %s/watchfrr.pid).\n\
278 -b, --blank-string\n\
279 When the supplied argument string is found in any of the\n\
280 various shell command arguments (-r, -s, or -k), replace\n\
281 it with a space. This is an ugly hack to circumvent problems\n\
282 passing command-line arguments with embedded spaces.\n\
283 -v, --version Print program version\n\
284 -h, --help Display this help and exit\n",
285 frr_vtydir, DEFAULT_LOGLEVEL, LOG_EMERG, LOG_DEBUG, LOG_DEBUG,
286 DEFAULT_MIN_RESTART, DEFAULT_MAX_RESTART, DEFAULT_PERIOD,
287 DEFAULT_TIMEOUT, DEFAULT_RESTART_TIMEOUT,
288 DEFAULT_RESTART_CMD, DEFAULT_START_CMD, DEFAULT_STOP_CMD,
289 frr_vtydir);
290 }
291
292 static pid_t run_background(char *shell_cmd)
293 {
294 pid_t child;
295
296 switch (child = fork()) {
297 case -1:
298 flog_err_sys(EC_LIB_SYSTEM_CALL,
299 "fork failed, cannot run command [%s]: %s",
300 shell_cmd, safe_strerror(errno));
301 return -1;
302 case 0:
303 /* Child process. */
304 /* Use separate process group so child processes can be killed
305 * easily. */
306 if (setpgid(0, 0) < 0)
307 zlog_warn("warning: setpgid(0,0) failed: %s",
308 safe_strerror(errno));
309 {
310 char shell[] = "sh";
311 char dashc[] = "-c";
312 char *const argv[4] = {shell, dashc, shell_cmd, NULL};
313 execv("/bin/sh", argv);
314 flog_err_sys(EC_LIB_SYSTEM_CALL,
315 "execv(/bin/sh -c '%s') failed: %s",
316 shell_cmd, safe_strerror(errno));
317 _exit(127);
318 }
319 default:
320 /* Parent process: we will reap the child later. */
321 zlog_info("Forked background command [pid %d]: %s", (int)child,
322 shell_cmd);
323 return child;
324 }
325 }
326
327 static struct timeval *time_elapsed(struct timeval *result,
328 const struct timeval *start_time)
329 {
330 gettimeofday(result, NULL);
331 result->tv_sec -= start_time->tv_sec;
332 result->tv_usec -= start_time->tv_usec;
333 while (result->tv_usec < 0) {
334 result->tv_usec += 1000000L;
335 result->tv_sec--;
336 }
337 return result;
338 }
339
340 static int restart_kill(struct thread *t_kill)
341 {
342 struct restart_info *restart = THREAD_ARG(t_kill);
343 struct timeval delay;
344
345 time_elapsed(&delay, &restart->time);
346 zlog_warn(
347 "Warning: %s %s child process %d still running after "
348 "%ld seconds, sending signal %d",
349 restart->what, restart->name, (int)restart->pid,
350 (long)delay.tv_sec, (restart->kills ? SIGKILL : SIGTERM));
351 kill(-restart->pid, (restart->kills ? SIGKILL : SIGTERM));
352 restart->kills++;
353 restart->t_kill = NULL;
354 thread_add_timer(master, restart_kill, restart, gs.restart_timeout,
355 &restart->t_kill);
356 return 0;
357 }
358
359 static struct restart_info *find_child(pid_t child)
360 {
361 struct daemon *dmn;
362 if (gs.restart.pid == child)
363 return &gs.restart;
364
365 for (dmn = gs.daemons; dmn; dmn = dmn->next) {
366 if (dmn->restart.pid == child)
367 return &dmn->restart;
368 }
369 return NULL;
370 }
371
372 static void sigchild(void)
373 {
374 pid_t child;
375 int status;
376 const char *name;
377 const char *what;
378 struct restart_info *restart;
379 struct daemon *dmn;
380
381 switch (child = waitpid(-1, &status, WNOHANG)) {
382 case -1:
383 flog_err_sys(EC_LIB_SYSTEM_CALL, "waitpid failed: %s",
384 safe_strerror(errno));
385 return;
386 case 0:
387 zlog_warn("SIGCHLD received, but waitpid did not reap a child");
388 return;
389 }
390
391 if (child == integrated_write_pid) {
392 integrated_write_sigchld(status);
393 return;
394 }
395
396 if ((restart = find_child(child)) != NULL) {
397 name = restart->name;
398 what = restart->what;
399 restart->pid = 0;
400 gs.numpids--;
401 thread_cancel(restart->t_kill);
402 restart->t_kill = NULL;
403 /* Update restart time to reflect the time the command
404 * completed. */
405 gettimeofday(&restart->time, NULL);
406 } else {
407 flog_err_sys(
408 EC_LIB_SYSTEM_CALL,
409 "waitpid returned status for an unknown child process %d",
410 (int)child);
411 name = "(unknown)";
412 what = "background";
413 }
414 if (WIFSTOPPED(status))
415 zlog_warn("warning: %s %s process %d is stopped", what, name,
416 (int)child);
417 else if (WIFSIGNALED(status))
418 zlog_warn("%s %s process %d terminated due to signal %d", what,
419 name, (int)child, WTERMSIG(status));
420 else if (WIFEXITED(status)) {
421 if (WEXITSTATUS(status) != 0)
422 zlog_warn(
423 "%s %s process %d exited with non-zero status %d",
424 what, name, (int)child, WEXITSTATUS(status));
425 else {
426 zlog_debug("%s %s process %d exited normally", what,
427 name, (int)child);
428
429 if (restart && restart != &gs.restart) {
430 dmn = container_of(restart, struct daemon,
431 restart);
432 restart_done(dmn);
433 } else if (restart)
434 for (dmn = gs.daemons; dmn; dmn = dmn->next)
435 restart_done(dmn);
436 }
437 } else
438 flog_err_sys(
439 EC_LIB_SYSTEM_CALL,
440 "cannot interpret %s %s process %d wait status 0x%x",
441 what, name, (int)child, status);
442 phase_check();
443 }
444
445 static int run_job(struct restart_info *restart, const char *cmdtype,
446 const char *command, int force, int update_interval)
447 {
448 struct timeval delay;
449
450 if (gs.loglevel > LOG_DEBUG + 1)
451 zlog_debug("attempting to %s %s", cmdtype, restart->name);
452
453 if (restart->pid) {
454 if (gs.loglevel > LOG_DEBUG + 1)
455 zlog_debug(
456 "cannot %s %s, previous pid %d still running",
457 cmdtype, restart->name, (int)restart->pid);
458 return -1;
459 }
460
461 #if defined HAVE_SYSTEMD
462 char buffer[512];
463
464 snprintf(buffer, sizeof(buffer), "restarting %s", restart->name);
465 systemd_send_status(buffer);
466 #endif
467
468 /* Note: time_elapsed test must come before the force test, since we
469 need
470 to make sure that delay is initialized for use below in updating the
471 restart interval. */
472 if ((time_elapsed(&delay, &restart->time)->tv_sec < restart->interval)
473 && !force) {
474
475 if (gs.loglevel > LOG_DEBUG + 1)
476 zlog_debug(
477 "postponing %s %s: "
478 "elapsed time %ld < retry interval %ld",
479 cmdtype, restart->name, (long)delay.tv_sec,
480 restart->interval);
481 return -1;
482 }
483
484 gettimeofday(&restart->time, NULL);
485 restart->kills = 0;
486 {
487 char cmd[strlen(command) + strlen(restart->name) + 1];
488 snprintf(cmd, sizeof(cmd), command, restart->name);
489 if ((restart->pid = run_background(cmd)) > 0) {
490 restart->t_kill = NULL;
491 thread_add_timer(master, restart_kill, restart,
492 gs.restart_timeout, &restart->t_kill);
493 restart->what = cmdtype;
494 gs.numpids++;
495 } else
496 restart->pid = 0;
497 }
498
499 #if defined HAVE_SYSTEMD
500 systemd_send_status("FRR Operational");
501 #endif
502 /* Calculate the new restart interval. */
503 if (update_interval) {
504 if (delay.tv_sec > 2 * gs.max_restart_interval)
505 restart->interval = gs.min_restart_interval;
506 else if ((restart->interval *= 2) > gs.max_restart_interval)
507 restart->interval = gs.max_restart_interval;
508 if (gs.loglevel > LOG_DEBUG + 1)
509 zlog_debug("restart %s interval is now %ld",
510 restart->name, restart->interval);
511 }
512 return restart->pid;
513 }
514
515 #define SET_READ_HANDLER(DMN) \
516 do { \
517 (DMN)->t_read = NULL; \
518 thread_add_read(master, handle_read, (DMN), (DMN)->fd, \
519 &(DMN)->t_read); \
520 } while (0);
521
522 #define SET_WAKEUP_DOWN(DMN) \
523 do { \
524 (DMN)->t_wakeup = NULL; \
525 thread_add_timer_msec(master, wakeup_down, (DMN), \
526 FUZZY(gs.period), &(DMN)->t_wakeup); \
527 } while (0);
528
529 #define SET_WAKEUP_UNRESPONSIVE(DMN) \
530 do { \
531 (DMN)->t_wakeup = NULL; \
532 thread_add_timer_msec(master, wakeup_unresponsive, (DMN), \
533 FUZZY(gs.period), &(DMN)->t_wakeup); \
534 } while (0);
535
536 #define SET_WAKEUP_ECHO(DMN) \
537 do { \
538 (DMN)->t_wakeup = NULL; \
539 thread_add_timer_msec(master, wakeup_send_echo, (DMN), \
540 FUZZY(gs.period), &(DMN)->t_wakeup); \
541 } while (0);
542
543 static int wakeup_down(struct thread *t_wakeup)
544 {
545 struct daemon *dmn = THREAD_ARG(t_wakeup);
546
547 dmn->t_wakeup = NULL;
548 if (try_connect(dmn) < 0)
549 SET_WAKEUP_DOWN(dmn);
550 if ((dmn->connect_tries > 1) && (dmn->state != DAEMON_UP))
551 try_restart(dmn);
552 return 0;
553 }
554
555 static int wakeup_init(struct thread *t_wakeup)
556 {
557 struct daemon *dmn = THREAD_ARG(t_wakeup);
558
559 dmn->t_wakeup = NULL;
560 if (try_connect(dmn) < 0) {
561 zlog_info(
562 "%s state -> down : initial connection attempt failed",
563 dmn->name);
564 dmn->state = DAEMON_DOWN;
565 }
566 phase_check();
567 return 0;
568 }
569
570 static void restart_done(struct daemon *dmn)
571 {
572 if (dmn->state != DAEMON_DOWN) {
573 zlog_warn(
574 "Daemon: %s: is in %s state but expected it to be in DAEMON_DOWN state",
575 dmn->name, state_str[dmn->state]);
576 return;
577 }
578 if (dmn->t_wakeup)
579 THREAD_OFF(dmn->t_wakeup);
580 if (try_connect(dmn) < 0)
581 SET_WAKEUP_DOWN(dmn);
582 }
583
584 static void daemon_down(struct daemon *dmn, const char *why)
585 {
586 if (IS_UP(dmn) || (dmn->state == DAEMON_INIT))
587 flog_err(EC_WATCHFRR_CONNECTION, "%s state -> down : %s",
588 dmn->name, why);
589 else if (gs.loglevel > LOG_DEBUG)
590 zlog_debug("%s still down : %s", dmn->name, why);
591 if (IS_UP(dmn))
592 gs.numdown++;
593 dmn->state = DAEMON_DOWN;
594 if (dmn->fd >= 0) {
595 close(dmn->fd);
596 dmn->fd = -1;
597 }
598 THREAD_OFF(dmn->t_read);
599 THREAD_OFF(dmn->t_write);
600 THREAD_OFF(dmn->t_wakeup);
601 if (try_connect(dmn) < 0)
602 SET_WAKEUP_DOWN(dmn);
603 phase_check();
604 }
605
606 static int handle_read(struct thread *t_read)
607 {
608 struct daemon *dmn = THREAD_ARG(t_read);
609 static const char resp[sizeof(PING_TOKEN) + 4] = PING_TOKEN "\n";
610 char buf[sizeof(resp) + 100];
611 ssize_t rc;
612 struct timeval delay;
613
614 dmn->t_read = NULL;
615 if ((rc = read(dmn->fd, buf, sizeof(buf))) < 0) {
616 char why[100];
617
618 if (ERRNO_IO_RETRY(errno)) {
619 /* Pretend it never happened. */
620 SET_READ_HANDLER(dmn);
621 return 0;
622 }
623 snprintf(why, sizeof(why), "unexpected read error: %s",
624 safe_strerror(errno));
625 daemon_down(dmn, why);
626 return 0;
627 }
628 if (rc == 0) {
629 daemon_down(dmn, "read returned EOF");
630 return 0;
631 }
632 if (!dmn->echo_sent.tv_sec) {
633 char why[sizeof(buf) + 100];
634 snprintf(why, sizeof(why),
635 "unexpected read returns %d bytes: %.*s", (int)rc,
636 (int)rc, buf);
637 daemon_down(dmn, why);
638 return 0;
639 }
640
641 /* We are expecting an echo response: is there any chance that the
642 response would not be returned entirely in the first read? That
643 seems inconceivable... */
644 if ((rc != sizeof(resp)) || memcmp(buf, resp, sizeof(resp))) {
645 char why[100 + sizeof(buf)];
646 snprintf(why, sizeof(why),
647 "read returned bad echo response of %d bytes "
648 "(expecting %u): %.*s",
649 (int)rc, (unsigned int)sizeof(resp), (int)rc, buf);
650 daemon_down(dmn, why);
651 return 0;
652 }
653
654 time_elapsed(&delay, &dmn->echo_sent);
655 dmn->echo_sent.tv_sec = 0;
656 if (dmn->state == DAEMON_UNRESPONSIVE) {
657 if (delay.tv_sec < gs.timeout) {
658 dmn->state = DAEMON_UP;
659 zlog_warn(
660 "%s state -> up : echo response received after %ld.%06ld "
661 "seconds",
662 dmn->name, (long)delay.tv_sec,
663 (long)delay.tv_usec);
664 } else
665 zlog_warn(
666 "%s: slow echo response finally received after %ld.%06ld "
667 "seconds",
668 dmn->name, (long)delay.tv_sec,
669 (long)delay.tv_usec);
670 } else if (gs.loglevel > LOG_DEBUG + 1)
671 zlog_debug("%s: echo response received after %ld.%06ld seconds",
672 dmn->name, (long)delay.tv_sec, (long)delay.tv_usec);
673
674 SET_READ_HANDLER(dmn);
675 if (dmn->t_wakeup)
676 thread_cancel(dmn->t_wakeup);
677 SET_WAKEUP_ECHO(dmn);
678
679 return 0;
680 }
681
682 /*
683 * Wait till we notice that all daemons are ready before
684 * we send we are ready to systemd
685 */
686 static void daemon_send_ready(int exitcode)
687 {
688 FILE *fp;
689 static int sent = 0;
690 char started[1024];
691
692 if (sent)
693 return;
694
695 if (exitcode == 0)
696 zlog_notice("all daemons up, doing startup-complete notify");
697 else if (gs.numdown < gs.numdaemons)
698 flog_err(EC_WATCHFRR_CONNECTION,
699 "startup did not complete within timeout"
700 " (%d/%d daemons running)",
701 gs.numdaemons - gs.numdown, gs.numdaemons);
702 else {
703 flog_err(EC_WATCHFRR_CONNECTION,
704 "all configured daemons failed to start"
705 " -- exiting watchfrr");
706 exit(exitcode);
707
708 }
709
710 frr_detach();
711
712 snprintf(started, sizeof(started), "%s%s", frr_vtydir,
713 "watchfrr.started");
714 fp = fopen(started, "w");
715 if (fp)
716 fclose(fp);
717 #if defined HAVE_SYSTEMD
718 systemd_send_started(master, 0);
719 systemd_send_status("FRR Operational");
720 #endif
721 sent = 1;
722 }
723
724 static void daemon_up(struct daemon *dmn, const char *why)
725 {
726 dmn->state = DAEMON_UP;
727 gs.numdown--;
728 dmn->connect_tries = 0;
729 zlog_notice("%s state -> up : %s", dmn->name, why);
730 if (gs.numdown == 0)
731 daemon_send_ready(0);
732 SET_WAKEUP_ECHO(dmn);
733 phase_check();
734 }
735
736 static int check_connect(struct thread *t_write)
737 {
738 struct daemon *dmn = THREAD_ARG(t_write);
739 int sockerr;
740 socklen_t reslen = sizeof(sockerr);
741
742 dmn->t_write = NULL;
743 if (getsockopt(dmn->fd, SOL_SOCKET, SO_ERROR, (char *)&sockerr, &reslen)
744 < 0) {
745 zlog_warn("%s: check_connect: getsockopt failed: %s", dmn->name,
746 safe_strerror(errno));
747 daemon_down(dmn,
748 "getsockopt failed checking connection success");
749 return 0;
750 }
751 if ((reslen == sizeof(sockerr)) && sockerr) {
752 char why[100];
753 snprintf(
754 why, sizeof(why),
755 "getsockopt reports that connection attempt failed: %s",
756 safe_strerror(sockerr));
757 daemon_down(dmn, why);
758 return 0;
759 }
760
761 daemon_up(dmn, "delayed connect succeeded");
762 return 0;
763 }
764
765 static int wakeup_connect_hanging(struct thread *t_wakeup)
766 {
767 struct daemon *dmn = THREAD_ARG(t_wakeup);
768 char why[100];
769
770 dmn->t_wakeup = NULL;
771 snprintf(why, sizeof(why),
772 "connection attempt timed out after %ld seconds", gs.timeout);
773 daemon_down(dmn, why);
774 return 0;
775 }
776
777 /* Making connection to protocol daemon. */
778 static int try_connect(struct daemon *dmn)
779 {
780 int sock;
781 struct sockaddr_un addr;
782 socklen_t len;
783
784 if (gs.loglevel > LOG_DEBUG + 1)
785 zlog_debug("%s: attempting to connect", dmn->name);
786 dmn->connect_tries++;
787
788 memset(&addr, 0, sizeof(struct sockaddr_un));
789 addr.sun_family = AF_UNIX;
790 snprintf(addr.sun_path, sizeof(addr.sun_path), "%s/%s.vty", gs.vtydir,
791 dmn->name);
792 #ifdef HAVE_STRUCT_SOCKADDR_UN_SUN_LEN
793 len = addr.sun_len = SUN_LEN(&addr);
794 #else
795 len = sizeof(addr.sun_family) + strlen(addr.sun_path);
796 #endif /* HAVE_STRUCT_SOCKADDR_UN_SUN_LEN */
797
798 /* Quick check to see if we might succeed before we go to the trouble
799 of creating a socket. */
800 if (access(addr.sun_path, W_OK) < 0) {
801 if (errno != ENOENT)
802 flog_err_sys(EC_LIB_SYSTEM_CALL,
803 "%s: access to socket %s denied: %s",
804 dmn->name, addr.sun_path,
805 safe_strerror(errno));
806 return -1;
807 }
808
809 if ((sock = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) {
810 flog_err_sys(EC_LIB_SOCKET, "%s(%s): cannot make socket: %s",
811 __func__, addr.sun_path, safe_strerror(errno));
812 return -1;
813 }
814
815 if (set_nonblocking(sock) < 0 || set_cloexec(sock) < 0) {
816 flog_err_sys(EC_LIB_SYSTEM_CALL,
817 "%s(%s): set_nonblocking/cloexec(%d) failed",
818 __func__, addr.sun_path, sock);
819 close(sock);
820 return -1;
821 }
822
823 if (connect(sock, (struct sockaddr *)&addr, len) < 0) {
824 if ((errno != EINPROGRESS) && (errno != EWOULDBLOCK)) {
825 if (gs.loglevel > LOG_DEBUG)
826 zlog_debug("%s(%s): connect failed: %s",
827 __func__, addr.sun_path,
828 safe_strerror(errno));
829 close(sock);
830 return -1;
831 }
832 if (gs.loglevel > LOG_DEBUG)
833 zlog_debug("%s: connection in progress", dmn->name);
834 dmn->state = DAEMON_CONNECTING;
835 dmn->fd = sock;
836 dmn->t_write = NULL;
837 thread_add_write(master, check_connect, dmn, dmn->fd,
838 &dmn->t_write);
839 dmn->t_wakeup = NULL;
840 thread_add_timer(master, wakeup_connect_hanging, dmn,
841 gs.timeout, &dmn->t_wakeup);
842 SET_READ_HANDLER(dmn);
843 return 0;
844 }
845
846 dmn->fd = sock;
847 SET_READ_HANDLER(dmn);
848 daemon_up(dmn, "connect succeeded");
849 return 1;
850 }
851
852 static int phase_hanging(struct thread *t_hanging)
853 {
854 gs.t_phase_hanging = NULL;
855 flog_err(EC_WATCHFRR_CONNECTION,
856 "Phase [%s] hanging for %ld seconds, aborting phased restart",
857 phase_str[gs.phase], PHASE_TIMEOUT);
858 gs.phase = PHASE_NONE;
859 return 0;
860 }
861
862 static void set_phase(restart_phase_t new_phase)
863 {
864 gs.phase = new_phase;
865 if (gs.t_phase_hanging)
866 thread_cancel(gs.t_phase_hanging);
867 gs.t_phase_hanging = NULL;
868 thread_add_timer(master, phase_hanging, NULL, PHASE_TIMEOUT,
869 &gs.t_phase_hanging);
870 }
871
872 static void phase_check(void)
873 {
874 struct daemon *dmn;
875
876 switch (gs.phase) {
877 case PHASE_NONE:
878 break;
879
880 case PHASE_INIT:
881 for (dmn = gs.daemons; dmn; dmn = dmn->next)
882 if (dmn->state == DAEMON_INIT)
883 return;
884
885 /* startup complete, everything out of INIT */
886 gs.phase = PHASE_NONE;
887 for (dmn = gs.daemons; dmn; dmn = dmn->next)
888 if (dmn->state == DAEMON_DOWN) {
889 SET_WAKEUP_DOWN(dmn);
890 try_restart(dmn);
891 }
892 break;
893 case PHASE_STOPS_PENDING:
894 if (gs.numpids)
895 break;
896 zlog_info(
897 "Phased restart: all routing daemon stop jobs have completed.");
898 set_phase(PHASE_WAITING_DOWN);
899
900 /*FALLTHRU*/
901 case PHASE_WAITING_DOWN:
902 if (gs.numdown + IS_UP(gs.special) < gs.numdaemons)
903 break;
904 zlog_info("Phased restart: all routing daemons now down.");
905 run_job(&gs.special->restart, "restart", gs.restart_command, 1,
906 1);
907 set_phase(PHASE_ZEBRA_RESTART_PENDING);
908
909 /*FALLTHRU*/
910 case PHASE_ZEBRA_RESTART_PENDING:
911 if (gs.special->restart.pid)
912 break;
913 zlog_info("Phased restart: %s restart job completed.",
914 gs.special->name);
915 set_phase(PHASE_WAITING_ZEBRA_UP);
916
917 /*FALLTHRU*/
918 case PHASE_WAITING_ZEBRA_UP:
919 if (!IS_UP(gs.special))
920 break;
921 zlog_info("Phased restart: %s is now up.", gs.special->name);
922 {
923 struct daemon *dmn;
924 for (dmn = gs.daemons; dmn; dmn = dmn->next) {
925 if (dmn != gs.special)
926 run_job(&dmn->restart, "start",
927 gs.start_command, 1, 0);
928 }
929 }
930 gs.phase = PHASE_NONE;
931 THREAD_OFF(gs.t_phase_hanging);
932 zlog_notice("Phased global restart has completed.");
933 break;
934 }
935 }
936
937 static void try_restart(struct daemon *dmn)
938 {
939 if (watch_only)
940 return;
941
942 if (dmn != gs.special) {
943 if ((gs.special->state == DAEMON_UP)
944 && (gs.phase == PHASE_NONE))
945 run_job(&dmn->restart, "restart", gs.restart_command, 0,
946 1);
947 else
948 zlog_debug(
949 "%s: postponing restart attempt because master %s daemon "
950 "not up [%s], or phased restart in progress",
951 dmn->name, gs.special->name,
952 state_str[gs.special->state]);
953 return;
954 }
955
956 if ((gs.phase != PHASE_NONE) || gs.numpids) {
957 if (gs.loglevel > LOG_DEBUG + 1)
958 zlog_debug(
959 "postponing phased global restart: restart already in "
960 "progress [%s], or outstanding child processes [%d]",
961 phase_str[gs.phase], gs.numpids);
962 return;
963 }
964 /* Is it too soon for a restart? */
965 {
966 struct timeval delay;
967 if (time_elapsed(&delay, &gs.special->restart.time)->tv_sec
968 < gs.special->restart.interval) {
969 if (gs.loglevel > LOG_DEBUG + 1)
970 zlog_debug(
971 "postponing phased global restart: "
972 "elapsed time %ld < retry interval %ld",
973 (long)delay.tv_sec,
974 gs.special->restart.interval);
975 return;
976 }
977 }
978 run_job(&gs.restart, "restart", gs.restart_command, 0, 1);
979 }
980
981 static int wakeup_unresponsive(struct thread *t_wakeup)
982 {
983 struct daemon *dmn = THREAD_ARG(t_wakeup);
984
985 dmn->t_wakeup = NULL;
986 if (dmn->state != DAEMON_UNRESPONSIVE)
987 flog_err(EC_WATCHFRR_CONNECTION,
988 "%s: no longer unresponsive (now %s), "
989 "wakeup should have been cancelled!",
990 dmn->name, state_str[dmn->state]);
991 else {
992 SET_WAKEUP_UNRESPONSIVE(dmn);
993 try_restart(dmn);
994 }
995 return 0;
996 }
997
998 static int wakeup_no_answer(struct thread *t_wakeup)
999 {
1000 struct daemon *dmn = THREAD_ARG(t_wakeup);
1001
1002 dmn->t_wakeup = NULL;
1003 dmn->state = DAEMON_UNRESPONSIVE;
1004 if (dmn->ignore_timeout)
1005 return 0;
1006 flog_err(EC_WATCHFRR_CONNECTION,
1007 "%s state -> unresponsive : no response yet to ping "
1008 "sent %ld seconds ago",
1009 dmn->name, gs.timeout);
1010 SET_WAKEUP_UNRESPONSIVE(dmn);
1011 try_restart(dmn);
1012 return 0;
1013 }
1014
1015 static int wakeup_send_echo(struct thread *t_wakeup)
1016 {
1017 static const char echocmd[] = "echo " PING_TOKEN;
1018 ssize_t rc;
1019 struct daemon *dmn = THREAD_ARG(t_wakeup);
1020
1021 dmn->t_wakeup = NULL;
1022 if (((rc = write(dmn->fd, echocmd, sizeof(echocmd))) < 0)
1023 || ((size_t)rc != sizeof(echocmd))) {
1024 char why[100 + sizeof(echocmd)];
1025 snprintf(why, sizeof(why),
1026 "write '%s' returned %d instead of %u", echocmd,
1027 (int)rc, (unsigned int)sizeof(echocmd));
1028 daemon_down(dmn, why);
1029 } else {
1030 gettimeofday(&dmn->echo_sent, NULL);
1031 dmn->t_wakeup = NULL;
1032 thread_add_timer(master, wakeup_no_answer, dmn, gs.timeout,
1033 &dmn->t_wakeup);
1034 }
1035 return 0;
1036 }
1037
1038 bool check_all_up(void)
1039 {
1040 struct daemon *dmn;
1041
1042 for (dmn = gs.daemons; dmn; dmn = dmn->next)
1043 if (dmn->state != DAEMON_UP)
1044 return false;
1045 return true;
1046 }
1047
1048 void watchfrr_status(struct vty *vty)
1049 {
1050 struct daemon *dmn;
1051 struct timeval delay;
1052
1053 vty_out(vty, "watchfrr global phase: %s\n", phase_str[gs.phase]);
1054 if (gs.restart.pid)
1055 vty_out(vty, " global restart running, pid %ld\n",
1056 (long)gs.restart.pid);
1057
1058 for (dmn = gs.daemons; dmn; dmn = dmn->next) {
1059 vty_out(vty, " %-20s %s%s", dmn->name, state_str[dmn->state],
1060 dmn->ignore_timeout ? "/Ignoring Timeout\n" : "\n");
1061 if (dmn->restart.pid)
1062 vty_out(vty, " restart running, pid %ld\n",
1063 (long)dmn->restart.pid);
1064 else if (dmn->state == DAEMON_DOWN &&
1065 time_elapsed(&delay, &dmn->restart.time)->tv_sec
1066 < dmn->restart.interval)
1067 vty_out(vty, " restarting in %jd seconds"
1068 " (%jds backoff interval)\n",
1069 (intmax_t)dmn->restart.interval
1070 - (intmax_t)delay.tv_sec,
1071 (intmax_t)dmn->restart.interval);
1072 }
1073 }
1074
1075 static void sigint(void)
1076 {
1077 zlog_notice("Terminating on signal");
1078 systemd_send_stopping();
1079 exit(0);
1080 }
1081
1082 static int valid_command(const char *cmd)
1083 {
1084 char *p;
1085
1086 return ((p = strchr(cmd, '%')) != NULL) && (*(p + 1) == 's')
1087 && !strchr(p + 1, '%');
1088 }
1089
1090 /* This is an ugly hack to circumvent problems with passing command-line
1091 arguments that contain spaces. The fix is to use a configuration file. */
1092 static char *translate_blanks(const char *cmd, const char *blankstr)
1093 {
1094 char *res;
1095 char *p;
1096 size_t bslen = strlen(blankstr);
1097
1098 if (!(res = strdup(cmd))) {
1099 perror("strdup");
1100 exit(1);
1101 }
1102 while ((p = strstr(res, blankstr)) != NULL) {
1103 *p = ' ';
1104 if (bslen != 1)
1105 memmove(p + 1, p + bslen, strlen(p + bslen) + 1);
1106 }
1107 return res;
1108 }
1109
1110 static int startup_timeout(struct thread *t_wakeup)
1111 {
1112 daemon_send_ready(1);
1113 return 0;
1114 }
1115
1116 static void watchfrr_init(int argc, char **argv)
1117 {
1118 const char *special = "zebra";
1119 int i;
1120 struct daemon *dmn, **add = &gs.daemons;
1121 char alldaemons[512] = "", *p = alldaemons;
1122
1123 thread_add_timer_msec(master, startup_timeout, NULL, STARTUP_TIMEOUT,
1124 &gs.t_startup_timeout);
1125
1126 for (i = optind; i < argc; i++) {
1127 dmn = XCALLOC(MTYPE_WATCHFRR_DAEMON, sizeof(*dmn));
1128
1129 dmn->name = dmn->restart.name = argv[i];
1130 dmn->state = DAEMON_INIT;
1131 gs.numdaemons++;
1132 gs.numdown++;
1133 dmn->fd = -1;
1134 dmn->t_wakeup = NULL;
1135 thread_add_timer_msec(master, wakeup_init, dmn, 0,
1136 &dmn->t_wakeup);
1137 dmn->restart.interval = gs.min_restart_interval;
1138 *add = dmn;
1139 add = &dmn->next;
1140
1141 if (!strcmp(dmn->name, special))
1142 gs.special = dmn;
1143 }
1144
1145 if (!gs.daemons) {
1146 fprintf(stderr,
1147 "Must specify one or more daemons to monitor.\n\n");
1148 frr_help_exit(1);
1149 }
1150 if (!watch_only && !gs.special) {
1151 fprintf(stderr, "\"%s\" daemon must be in daemon lists\n\n",
1152 special);
1153 frr_help_exit(1);
1154 }
1155
1156 for (dmn = gs.daemons; dmn; dmn = dmn->next) {
1157 snprintf(p, alldaemons + sizeof(alldaemons) - p, "%s%s",
1158 (p == alldaemons) ? "" : " ", dmn->name);
1159 p += strlen(p);
1160 }
1161 zlog_notice("%s %s watching [%s]%s", progname, FRR_VERSION, alldaemons,
1162 watch_only ? ", monitor mode" : "");
1163 }
1164
1165 struct zebra_privs_t watchfrr_privs = {
1166 #ifdef VTY_GROUP
1167 .vty_group = VTY_GROUP,
1168 #endif
1169 };
1170
1171 static struct quagga_signal_t watchfrr_signals[] = {
1172 {
1173 .signal = SIGINT,
1174 .handler = sigint,
1175 },
1176 {
1177 .signal = SIGTERM,
1178 .handler = sigint,
1179 },
1180 {
1181 .signal = SIGCHLD,
1182 .handler = sigchild,
1183 },
1184 };
1185
1186 FRR_DAEMON_INFO(watchfrr, WATCHFRR,
1187 .flags = FRR_NO_PRIVSEP | FRR_NO_TCPVTY | FRR_LIMITED_CLI
1188 | FRR_NO_CFG_PID_DRY | FRR_NO_ZCLIENT
1189 | FRR_DETACH_LATER,
1190
1191 .printhelp = printhelp,
1192 .copyright = "Copyright 2004 Andrew J. Schorr",
1193
1194 .signals = watchfrr_signals,
1195 .n_signals = array_size(watchfrr_signals),
1196
1197 .privs = &watchfrr_privs, )
1198
1199 #define DEPRECATED_OPTIONS "aAezR:"
1200
1201 int main(int argc, char **argv)
1202 {
1203 int opt;
1204 const char *blankstr = NULL;
1205
1206 frr_preinit(&watchfrr_di, argc, argv);
1207 progname = watchfrr_di.progname;
1208
1209 frr_opt_add("b:dk:l:i:p:r:S:s:t:T:" DEPRECATED_OPTIONS, longopts, "");
1210
1211 gs.restart.name = "all";
1212 while ((opt = frr_getopt(argc, argv, NULL)) != EOF) {
1213 if (opt && opt < 128 && strchr(DEPRECATED_OPTIONS, opt)) {
1214 fprintf(stderr,
1215 "The -%c option no longer exists.\n"
1216 "Please refer to the watchfrr(8) man page.\n",
1217 opt);
1218 exit(1);
1219 }
1220
1221 switch (opt) {
1222 case 0:
1223 break;
1224 case 'b':
1225 blankstr = optarg;
1226 break;
1227 case OPTION_DRY:
1228 watch_only = true;
1229 break;
1230 case 'k':
1231 if (!valid_command(optarg)) {
1232 fprintf(stderr,
1233 "Invalid kill command, must contain '%%s': %s\n",
1234 optarg);
1235 frr_help_exit(1);
1236 }
1237 gs.stop_command = optarg;
1238 break;
1239 case 'l': {
1240 char garbage[3];
1241 if ((sscanf(optarg, "%d%1s", &gs.loglevel, garbage)
1242 != 1)
1243 || (gs.loglevel < LOG_EMERG)) {
1244 fprintf(stderr,
1245 "Invalid loglevel argument: %s\n",
1246 optarg);
1247 frr_help_exit(1);
1248 }
1249 } break;
1250 case OPTION_MINRESTART: {
1251 char garbage[3];
1252 if ((sscanf(optarg, "%ld%1s", &gs.min_restart_interval,
1253 garbage)
1254 != 1)
1255 || (gs.min_restart_interval < 0)) {
1256 fprintf(stderr,
1257 "Invalid min_restart_interval argument: %s\n",
1258 optarg);
1259 frr_help_exit(1);
1260 }
1261 } break;
1262 case OPTION_MAXRESTART: {
1263 char garbage[3];
1264 if ((sscanf(optarg, "%ld%1s", &gs.max_restart_interval,
1265 garbage)
1266 != 1)
1267 || (gs.max_restart_interval < 0)) {
1268 fprintf(stderr,
1269 "Invalid max_restart_interval argument: %s\n",
1270 optarg);
1271 frr_help_exit(1);
1272 }
1273 } break;
1274 case 'i': {
1275 char garbage[3];
1276 int period;
1277 if ((sscanf(optarg, "%d%1s", &period, garbage) != 1)
1278 || (gs.period < 1)) {
1279 fprintf(stderr,
1280 "Invalid interval argument: %s\n",
1281 optarg);
1282 frr_help_exit(1);
1283 }
1284 gs.period = 1000 * period;
1285 } break;
1286 case 'p':
1287 watchfrr_di.pid_file = optarg;
1288 break;
1289 case 'r':
1290 if (!valid_command(optarg)) {
1291 fprintf(stderr,
1292 "Invalid restart command, must contain '%%s': %s\n",
1293 optarg);
1294 frr_help_exit(1);
1295 }
1296 gs.restart_command = optarg;
1297 break;
1298 case 's':
1299 if (!valid_command(optarg)) {
1300 fprintf(stderr,
1301 "Invalid start command, must contain '%%s': %s\n",
1302 optarg);
1303 frr_help_exit(1);
1304 }
1305 gs.start_command = optarg;
1306 break;
1307 case 'S':
1308 gs.vtydir = optarg;
1309 break;
1310 case 't': {
1311 char garbage[3];
1312 if ((sscanf(optarg, "%ld%1s", &gs.timeout, garbage)
1313 != 1)
1314 || (gs.timeout < 1)) {
1315 fprintf(stderr,
1316 "Invalid timeout argument: %s\n",
1317 optarg);
1318 frr_help_exit(1);
1319 }
1320 } break;
1321 case 'T': {
1322 char garbage[3];
1323 if ((sscanf(optarg, "%ld%1s", &gs.restart_timeout,
1324 garbage)
1325 != 1)
1326 || (gs.restart_timeout < 1)) {
1327 fprintf(stderr,
1328 "Invalid restart timeout argument: %s\n",
1329 optarg);
1330 frr_help_exit(1);
1331 }
1332 } break;
1333 default:
1334 fputs("Invalid option.\n", stderr);
1335 frr_help_exit(1);
1336 }
1337 }
1338
1339 if (watch_only
1340 && (gs.start_command || gs.stop_command || gs.restart_command)) {
1341 fputs("Options -r/-s/-k are not used when --dry is active.\n",
1342 stderr);
1343 }
1344 if (!watch_only
1345 && (!gs.restart_command || !gs.start_command || !gs.stop_command)) {
1346 fprintf(stderr,
1347 "Options -s (start), -k (kill), and -r (restart) are required.\n");
1348 frr_help_exit(1);
1349 }
1350
1351 if (blankstr) {
1352 if (gs.restart_command)
1353 gs.restart_command =
1354 translate_blanks(gs.restart_command, blankstr);
1355 if (gs.start_command)
1356 gs.start_command =
1357 translate_blanks(gs.start_command, blankstr);
1358 if (gs.stop_command)
1359 gs.stop_command =
1360 translate_blanks(gs.stop_command, blankstr);
1361 }
1362
1363 gs.restart.interval = gs.min_restart_interval;
1364
1365 master = frr_init();
1366 watchfrr_error_init();
1367 watchfrr_init(argc, argv);
1368 watchfrr_vty_init();
1369
1370 frr_config_fork();
1371
1372 zlog_set_level(ZLOG_DEST_MONITOR, ZLOG_DISABLED);
1373 if (watchfrr_di.daemon_mode)
1374 zlog_set_level(ZLOG_DEST_SYSLOG, MIN(gs.loglevel, LOG_DEBUG));
1375 else
1376 zlog_set_level(ZLOG_DEST_STDOUT, MIN(gs.loglevel, LOG_DEBUG));
1377
1378 frr_run(master);
1379
1380 systemd_send_stopping();
1381 /* Not reached. */
1382 return 0;
1383 }