]> git.proxmox.com Git - mirror_frr.git/blob - watchfrr/watchfrr.c
Merge pull request #5528 from opensourcerouting/bmp-dns-fixing
[mirror_frr.git] / watchfrr / watchfrr.c
1 /*
2 * Monitor status of frr daemons and restart if necessary.
3 *
4 * Copyright (C) 2004 Andrew J. Schorr
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License along
17 * with this program; see the file COPYING; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #include <zebra.h>
22 #include <thread.h>
23 #include <log.h>
24 #include <network.h>
25 #include <sigevent.h>
26 #include <lib/version.h>
27 #include "command.h"
28 #include "libfrr.h"
29 #include "lib_errors.h"
30
31 #include <getopt.h>
32 #include <sys/un.h>
33 #include <sys/wait.h>
34 #include <memory.h>
35 #include <systemd.h>
36
37 #include "watchfrr.h"
38 #include "watchfrr_errors.h"
39
40 #ifndef MIN
41 #define MIN(X,Y) (((X) <= (Y)) ? (X) : (Y))
42 #endif
43
44 /* Macros to help randomize timers. */
45 #define JITTER(X) ((random() % ((X)+1))-((X)/2))
46 #define FUZZY(X) ((X)+JITTER((X)/20))
47
48 #define DEFAULT_PERIOD 5
49 #define DEFAULT_TIMEOUT 90
50 #define DEFAULT_RESTART_TIMEOUT 20
51 #define DEFAULT_LOGLEVEL LOG_INFO
52 #define DEFAULT_MIN_RESTART 60
53 #define DEFAULT_MAX_RESTART 600
54
55 #define DEFAULT_RESTART_CMD WATCHFRR_SH_PATH " restart %s"
56 #define DEFAULT_START_CMD WATCHFRR_SH_PATH " start %s"
57 #define DEFAULT_STOP_CMD WATCHFRR_SH_PATH " stop %s"
58
59 #define PING_TOKEN "PING"
60
61 DEFINE_MGROUP(WATCHFRR, "watchfrr")
62 DEFINE_MTYPE_STATIC(WATCHFRR, WATCHFRR_DAEMON, "watchfrr daemon entry")
63
64 /* Needs to be global, referenced somewhere inside libfrr. */
65 struct thread_master *master;
66
67 static bool watch_only = false;
68
69 typedef enum {
70 PHASE_NONE = 0,
71 PHASE_INIT,
72 PHASE_STOPS_PENDING,
73 PHASE_WAITING_DOWN,
74 PHASE_ZEBRA_RESTART_PENDING,
75 PHASE_WAITING_ZEBRA_UP
76 } restart_phase_t;
77
78 static const char *const phase_str[] = {
79 "Idle",
80 "Startup",
81 "Stop jobs running",
82 "Waiting for other daemons to come down",
83 "Zebra restart job running",
84 "Waiting for zebra to come up",
85 "Start jobs running",
86 };
87
88 #define PHASE_TIMEOUT (3*gs.restart_timeout)
89 #define STARTUP_TIMEOUT 55 * 1000
90
91 struct restart_info {
92 const char *name;
93 const char *what;
94 pid_t pid;
95 struct timeval time;
96 long interval;
97 struct thread *t_kill;
98 int kills;
99 };
100
101 static struct global_state {
102 restart_phase_t phase;
103 struct thread *t_phase_hanging;
104 struct thread *t_startup_timeout;
105 const char *vtydir;
106 long period;
107 long timeout;
108 long restart_timeout;
109 long min_restart_interval;
110 long max_restart_interval;
111 struct daemon *daemons;
112 const char *restart_command;
113 const char *start_command;
114 const char *stop_command;
115 struct restart_info restart;
116 int loglevel;
117 struct daemon *special; /* points to zebra when doing phased restart */
118 int numdaemons;
119 int numpids;
120 int numdown; /* # of daemons that are not UP or UNRESPONSIVE */
121 } gs = {
122 .phase = PHASE_INIT,
123 .vtydir = frr_vtydir,
124 .period = 1000 * DEFAULT_PERIOD,
125 .timeout = DEFAULT_TIMEOUT,
126 .restart_timeout = DEFAULT_RESTART_TIMEOUT,
127 .loglevel = DEFAULT_LOGLEVEL,
128 .min_restart_interval = DEFAULT_MIN_RESTART,
129 .max_restart_interval = DEFAULT_MAX_RESTART,
130 .restart_command = DEFAULT_RESTART_CMD,
131 .start_command = DEFAULT_START_CMD,
132 .stop_command = DEFAULT_STOP_CMD,
133 };
134
135 typedef enum {
136 DAEMON_INIT,
137 DAEMON_DOWN,
138 DAEMON_CONNECTING,
139 DAEMON_UP,
140 DAEMON_UNRESPONSIVE
141 } daemon_state_t;
142
143 #define IS_UP(DMN) \
144 (((DMN)->state == DAEMON_UP) || ((DMN)->state == DAEMON_UNRESPONSIVE))
145
146 static const char *const state_str[] = {
147 "Init", "Down", "Connecting", "Up", "Unresponsive",
148 };
149
150 struct daemon {
151 const char *name;
152 daemon_state_t state;
153 int fd;
154 struct timeval echo_sent;
155 unsigned int connect_tries;
156 struct thread *t_wakeup;
157 struct thread *t_read;
158 struct thread *t_write;
159 struct daemon *next;
160 struct restart_info restart;
161
162 /*
163 * For a given daemon, if we've turned on ignore timeouts
164 * ignore the timeout value and assume everything is ok
165 * This is for daemon debugging w/ gdb after we have started
166 * FRR and realize we have something that needs to be looked
167 * at
168 */
169 bool ignore_timeout;
170 };
171
172 #define OPTION_MINRESTART 2000
173 #define OPTION_MAXRESTART 2001
174 #define OPTION_DRY 2002
175
176 static const struct option longopts[] = {
177 {"daemon", no_argument, NULL, 'd'},
178 {"statedir", required_argument, NULL, 'S'},
179 {"loglevel", required_argument, NULL, 'l'},
180 {"interval", required_argument, NULL, 'i'},
181 {"timeout", required_argument, NULL, 't'},
182 {"restart-timeout", required_argument, NULL, 'T'},
183 {"restart", required_argument, NULL, 'r'},
184 {"start-command", required_argument, NULL, 's'},
185 {"kill-command", required_argument, NULL, 'k'},
186 {"dry", no_argument, NULL, OPTION_DRY},
187 {"min-restart-interval", required_argument, NULL, OPTION_MINRESTART},
188 {"max-restart-interval", required_argument, NULL, OPTION_MAXRESTART},
189 {"pid-file", required_argument, NULL, 'p'},
190 {"blank-string", required_argument, NULL, 'b'},
191 {"help", no_argument, NULL, 'h'},
192 {"version", no_argument, NULL, 'v'},
193 {NULL, 0, NULL, 0}};
194
195 static int try_connect(struct daemon *dmn);
196 static int wakeup_send_echo(struct thread *t_wakeup);
197 static void try_restart(struct daemon *dmn);
198 static void phase_check(void);
199 static void restart_done(struct daemon *dmn);
200
201 static const char *progname;
202
203 void watchfrr_set_ignore_daemon(struct vty *vty, const char *dname, bool ignore)
204 {
205 struct daemon *dmn;
206
207 for (dmn = gs.daemons; dmn; dmn = dmn->next) {
208 if (strncmp(dmn->name, dname, strlen(dmn->name)) == 0)
209 break;
210 }
211
212 if (dmn) {
213 dmn->ignore_timeout = ignore;
214 vty_out(vty, "%s switching to %s\n", dmn->name,
215 ignore ? "ignore" : "watch");
216 } else
217 vty_out(vty, "%s is not configured for running at the moment",
218 dname);
219 }
220
221 static void printhelp(FILE *target)
222 {
223 fprintf(target,
224 "Usage : %s [OPTION...] <daemon name> ...\n\n\
225 Watchdog program to monitor status of frr daemons and try to restart\n\
226 them if they are down or unresponsive. It determines whether a daemon is\n\
227 up based on whether it can connect to the daemon's vty unix stream socket.\n\
228 It then repeatedly sends echo commands over that socket to determine whether\n\
229 the daemon is responsive. If the daemon crashes, we will receive an EOF\n\
230 on the socket connection and know immediately that the daemon is down.\n\n\
231 The daemons to be monitored should be listed on the command line.\n\n\
232 In order to avoid attempting to restart the daemons in a fast loop,\n\
233 the -m and -M options allow you to control the minimum delay between\n\
234 restart commands. The minimum restart delay is recalculated each time\n\
235 a restart is attempted: if the time since the last restart attempt exceeds\n\
236 twice the -M value, then the restart delay is set to the -m value.\n\
237 Otherwise, the interval is doubled (but capped at the -M value).\n\n",
238 progname);
239
240 fprintf(target,
241 "Options:\n\
242 -d, --daemon Run in daemon mode. In this mode, error messages are sent\n\
243 to syslog instead of stdout.\n\
244 -S, --statedir Set the vty socket directory (default is %s)\n\
245 -l, --loglevel Set the logging level (default is %d).\n\
246 The value should range from %d (LOG_EMERG) to %d (LOG_DEBUG),\n\
247 but it can be set higher than %d if extra-verbose debugging\n\
248 messages are desired.\n\
249 --min-restart-interval\n\
250 Set the minimum seconds to wait between invocations of daemon\n\
251 restart commands (default is %d).\n\
252 --max-restart-interval\n\
253 Set the maximum seconds to wait between invocations of daemon\n\
254 restart commands (default is %d).\n\
255 -i, --interval Set the status polling interval in seconds (default is %d)\n\
256 -t, --timeout Set the unresponsiveness timeout in seconds (default is %d)\n\
257 -T, --restart-timeout\n\
258 Set the restart (kill) timeout in seconds (default is %d).\n\
259 If any background jobs are still running after this much\n\
260 time has elapsed, they will be killed.\n\
261 -r, --restart Supply a Bourne shell command to use to restart a single\n\
262 daemon. The command string should include '%%s' where the\n\
263 name of the daemon should be substituted.\n\
264 (default: '%s')\n\
265 -s, --start-command\n\
266 Supply a Bourne shell to command to use to start a single\n\
267 daemon. The command string should include '%%s' where the\n\
268 name of the daemon should be substituted.\n\
269 (default: '%s')\n\
270 -k, --kill-command\n\
271 Supply a Bourne shell to command to use to stop a single\n\
272 daemon. The command string should include '%%s' where the\n\
273 name of the daemon should be substituted.\n\
274 (default: '%s')\n\
275 --dry Do not start or restart anything, just log.\n\
276 -p, --pid-file Set process identifier file name\n\
277 (default is %s/watchfrr.pid).\n\
278 -b, --blank-string\n\
279 When the supplied argument string is found in any of the\n\
280 various shell command arguments (-r, -s, or -k), replace\n\
281 it with a space. This is an ugly hack to circumvent problems\n\
282 passing command-line arguments with embedded spaces.\n\
283 -v, --version Print program version\n\
284 -h, --help Display this help and exit\n",
285 frr_vtydir, DEFAULT_LOGLEVEL, LOG_EMERG, LOG_DEBUG, LOG_DEBUG,
286 DEFAULT_MIN_RESTART, DEFAULT_MAX_RESTART, DEFAULT_PERIOD,
287 DEFAULT_TIMEOUT, DEFAULT_RESTART_TIMEOUT,
288 DEFAULT_RESTART_CMD, DEFAULT_START_CMD, DEFAULT_STOP_CMD,
289 frr_vtydir);
290 }
291
292 static pid_t run_background(char *shell_cmd)
293 {
294 pid_t child;
295
296 switch (child = fork()) {
297 case -1:
298 flog_err_sys(EC_LIB_SYSTEM_CALL,
299 "fork failed, cannot run command [%s]: %s",
300 shell_cmd, safe_strerror(errno));
301 return -1;
302 case 0:
303 /* Child process. */
304 /* Use separate process group so child processes can be killed
305 * easily. */
306 if (setpgid(0, 0) < 0)
307 zlog_warn("warning: setpgid(0,0) failed: %s",
308 safe_strerror(errno));
309 {
310 char shell[] = "sh";
311 char dashc[] = "-c";
312 char *const argv[4] = {shell, dashc, shell_cmd, NULL};
313 execv("/bin/sh", argv);
314 flog_err_sys(EC_LIB_SYSTEM_CALL,
315 "execv(/bin/sh -c '%s') failed: %s",
316 shell_cmd, safe_strerror(errno));
317 _exit(127);
318 }
319 default:
320 /* Parent process: we will reap the child later. */
321 flog_err_sys(EC_LIB_SYSTEM_CALL,
322 "Forked background command [pid %d]: %s",
323 (int)child, shell_cmd);
324 return child;
325 }
326 }
327
328 static struct timeval *time_elapsed(struct timeval *result,
329 const struct timeval *start_time)
330 {
331 gettimeofday(result, NULL);
332 result->tv_sec -= start_time->tv_sec;
333 result->tv_usec -= start_time->tv_usec;
334 while (result->tv_usec < 0) {
335 result->tv_usec += 1000000L;
336 result->tv_sec--;
337 }
338 return result;
339 }
340
341 static int restart_kill(struct thread *t_kill)
342 {
343 struct restart_info *restart = THREAD_ARG(t_kill);
344 struct timeval delay;
345
346 time_elapsed(&delay, &restart->time);
347 zlog_warn(
348 "Warning: %s %s child process %d still running after "
349 "%ld seconds, sending signal %d",
350 restart->what, restart->name, (int)restart->pid,
351 (long)delay.tv_sec, (restart->kills ? SIGKILL : SIGTERM));
352 kill(-restart->pid, (restart->kills ? SIGKILL : SIGTERM));
353 restart->kills++;
354 restart->t_kill = NULL;
355 thread_add_timer(master, restart_kill, restart, gs.restart_timeout,
356 &restart->t_kill);
357 return 0;
358 }
359
360 static struct restart_info *find_child(pid_t child)
361 {
362 struct daemon *dmn;
363 if (gs.restart.pid == child)
364 return &gs.restart;
365
366 for (dmn = gs.daemons; dmn; dmn = dmn->next) {
367 if (dmn->restart.pid == child)
368 return &dmn->restart;
369 }
370 return NULL;
371 }
372
373 static void sigchild(void)
374 {
375 pid_t child;
376 int status;
377 const char *name;
378 const char *what;
379 struct restart_info *restart;
380 struct daemon *dmn;
381
382 switch (child = waitpid(-1, &status, WNOHANG)) {
383 case -1:
384 flog_err_sys(EC_LIB_SYSTEM_CALL, "waitpid failed: %s",
385 safe_strerror(errno));
386 return;
387 case 0:
388 zlog_warn("SIGCHLD received, but waitpid did not reap a child");
389 return;
390 }
391
392 if (child == integrated_write_pid) {
393 integrated_write_sigchld(status);
394 return;
395 }
396
397 if ((restart = find_child(child)) != NULL) {
398 name = restart->name;
399 what = restart->what;
400 restart->pid = 0;
401 gs.numpids--;
402 thread_cancel(restart->t_kill);
403 restart->t_kill = NULL;
404 /* Update restart time to reflect the time the command
405 * completed. */
406 gettimeofday(&restart->time, NULL);
407 } else {
408 flog_err_sys(
409 EC_LIB_SYSTEM_CALL,
410 "waitpid returned status for an unknown child process %d",
411 (int)child);
412 name = "(unknown)";
413 what = "background";
414 }
415 if (WIFSTOPPED(status))
416 zlog_warn("warning: %s %s process %d is stopped", what, name,
417 (int)child);
418 else if (WIFSIGNALED(status))
419 zlog_warn("%s %s process %d terminated due to signal %d", what,
420 name, (int)child, WTERMSIG(status));
421 else if (WIFEXITED(status)) {
422 if (WEXITSTATUS(status) != 0)
423 zlog_warn(
424 "%s %s process %d exited with non-zero status %d",
425 what, name, (int)child, WEXITSTATUS(status));
426 else {
427 zlog_debug("%s %s process %d exited normally", what,
428 name, (int)child);
429
430 if (restart && restart != &gs.restart) {
431 dmn = container_of(restart, struct daemon,
432 restart);
433 restart_done(dmn);
434 } else if (restart)
435 for (dmn = gs.daemons; dmn; dmn = dmn->next)
436 restart_done(dmn);
437 }
438 } else
439 flog_err_sys(
440 EC_LIB_SYSTEM_CALL,
441 "cannot interpret %s %s process %d wait status 0x%x",
442 what, name, (int)child, status);
443 phase_check();
444 }
445
446 static int run_job(struct restart_info *restart, const char *cmdtype,
447 const char *command, int force, int update_interval)
448 {
449 struct timeval delay;
450
451 if (gs.loglevel > LOG_DEBUG + 1)
452 zlog_debug("attempting to %s %s", cmdtype, restart->name);
453
454 if (restart->pid) {
455 if (gs.loglevel > LOG_DEBUG + 1)
456 zlog_debug(
457 "cannot %s %s, previous pid %d still running",
458 cmdtype, restart->name, (int)restart->pid);
459 return -1;
460 }
461
462 #if defined HAVE_SYSTEMD
463 char buffer[512];
464
465 snprintf(buffer, sizeof(buffer), "restarting %s", restart->name);
466 systemd_send_status(buffer);
467 #endif
468
469 /* Note: time_elapsed test must come before the force test, since we
470 need
471 to make sure that delay is initialized for use below in updating the
472 restart interval. */
473 if ((time_elapsed(&delay, &restart->time)->tv_sec < restart->interval)
474 && !force) {
475
476 if (gs.loglevel > LOG_DEBUG + 1)
477 zlog_debug(
478 "postponing %s %s: "
479 "elapsed time %ld < retry interval %ld",
480 cmdtype, restart->name, (long)delay.tv_sec,
481 restart->interval);
482 return -1;
483 }
484
485 gettimeofday(&restart->time, NULL);
486 restart->kills = 0;
487 {
488 char cmd[strlen(command) + strlen(restart->name) + 1];
489 snprintf(cmd, sizeof(cmd), command, restart->name);
490 if ((restart->pid = run_background(cmd)) > 0) {
491 restart->t_kill = NULL;
492 thread_add_timer(master, restart_kill, restart,
493 gs.restart_timeout, &restart->t_kill);
494 restart->what = cmdtype;
495 gs.numpids++;
496 } else
497 restart->pid = 0;
498 }
499
500 #if defined HAVE_SYSTEMD
501 systemd_send_status("FRR Operational");
502 #endif
503 /* Calculate the new restart interval. */
504 if (update_interval) {
505 if (delay.tv_sec > 2 * gs.max_restart_interval)
506 restart->interval = gs.min_restart_interval;
507 else if ((restart->interval *= 2) > gs.max_restart_interval)
508 restart->interval = gs.max_restart_interval;
509 if (gs.loglevel > LOG_DEBUG + 1)
510 zlog_debug("restart %s interval is now %ld",
511 restart->name, restart->interval);
512 }
513 return restart->pid;
514 }
515
516 #define SET_READ_HANDLER(DMN) \
517 do { \
518 (DMN)->t_read = NULL; \
519 thread_add_read(master, handle_read, (DMN), (DMN)->fd, \
520 &(DMN)->t_read); \
521 } while (0);
522
523 #define SET_WAKEUP_DOWN(DMN) \
524 do { \
525 (DMN)->t_wakeup = NULL; \
526 thread_add_timer_msec(master, wakeup_down, (DMN), \
527 FUZZY(gs.period), &(DMN)->t_wakeup); \
528 } while (0);
529
530 #define SET_WAKEUP_UNRESPONSIVE(DMN) \
531 do { \
532 (DMN)->t_wakeup = NULL; \
533 thread_add_timer_msec(master, wakeup_unresponsive, (DMN), \
534 FUZZY(gs.period), &(DMN)->t_wakeup); \
535 } while (0);
536
537 #define SET_WAKEUP_ECHO(DMN) \
538 do { \
539 (DMN)->t_wakeup = NULL; \
540 thread_add_timer_msec(master, wakeup_send_echo, (DMN), \
541 FUZZY(gs.period), &(DMN)->t_wakeup); \
542 } while (0);
543
544 static int wakeup_down(struct thread *t_wakeup)
545 {
546 struct daemon *dmn = THREAD_ARG(t_wakeup);
547
548 dmn->t_wakeup = NULL;
549 if (try_connect(dmn) < 0)
550 SET_WAKEUP_DOWN(dmn);
551 if ((dmn->connect_tries > 1) && (dmn->state != DAEMON_UP))
552 try_restart(dmn);
553 return 0;
554 }
555
556 static int wakeup_init(struct thread *t_wakeup)
557 {
558 struct daemon *dmn = THREAD_ARG(t_wakeup);
559
560 dmn->t_wakeup = NULL;
561 if (try_connect(dmn) < 0) {
562 flog_err(EC_WATCHFRR_CONNECTION,
563 "%s state -> down : initial connection attempt failed",
564 dmn->name);
565 dmn->state = DAEMON_DOWN;
566 }
567 phase_check();
568 return 0;
569 }
570
571 static void restart_done(struct daemon *dmn)
572 {
573 if (dmn->state != DAEMON_DOWN) {
574 zlog_warn(
575 "Daemon: %s: is in %s state but expected it to be in DAEMON_DOWN state",
576 dmn->name, state_str[dmn->state]);
577 return;
578 }
579 if (dmn->t_wakeup)
580 THREAD_OFF(dmn->t_wakeup);
581 if (try_connect(dmn) < 0)
582 SET_WAKEUP_DOWN(dmn);
583 }
584
585 static void daemon_down(struct daemon *dmn, const char *why)
586 {
587 if (IS_UP(dmn) || (dmn->state == DAEMON_INIT))
588 flog_err(EC_WATCHFRR_CONNECTION, "%s state -> down : %s",
589 dmn->name, why);
590 else if (gs.loglevel > LOG_DEBUG)
591 zlog_debug("%s still down : %s", dmn->name, why);
592 if (IS_UP(dmn))
593 gs.numdown++;
594 dmn->state = DAEMON_DOWN;
595 if (dmn->fd >= 0) {
596 close(dmn->fd);
597 dmn->fd = -1;
598 }
599 THREAD_OFF(dmn->t_read);
600 THREAD_OFF(dmn->t_write);
601 THREAD_OFF(dmn->t_wakeup);
602 if (try_connect(dmn) < 0)
603 SET_WAKEUP_DOWN(dmn);
604 phase_check();
605 }
606
607 static int handle_read(struct thread *t_read)
608 {
609 struct daemon *dmn = THREAD_ARG(t_read);
610 static const char resp[sizeof(PING_TOKEN) + 4] = PING_TOKEN "\n";
611 char buf[sizeof(resp) + 100];
612 ssize_t rc;
613 struct timeval delay;
614
615 dmn->t_read = NULL;
616 if ((rc = read(dmn->fd, buf, sizeof(buf))) < 0) {
617 char why[100];
618
619 if (ERRNO_IO_RETRY(errno)) {
620 /* Pretend it never happened. */
621 SET_READ_HANDLER(dmn);
622 return 0;
623 }
624 snprintf(why, sizeof(why), "unexpected read error: %s",
625 safe_strerror(errno));
626 daemon_down(dmn, why);
627 return 0;
628 }
629 if (rc == 0) {
630 daemon_down(dmn, "read returned EOF");
631 return 0;
632 }
633 if (!dmn->echo_sent.tv_sec) {
634 char why[sizeof(buf) + 100];
635 snprintf(why, sizeof(why),
636 "unexpected read returns %d bytes: %.*s", (int)rc,
637 (int)rc, buf);
638 daemon_down(dmn, why);
639 return 0;
640 }
641
642 /* We are expecting an echo response: is there any chance that the
643 response would not be returned entirely in the first read? That
644 seems inconceivable... */
645 if ((rc != sizeof(resp)) || memcmp(buf, resp, sizeof(resp))) {
646 char why[100 + sizeof(buf)];
647 snprintf(why, sizeof(why),
648 "read returned bad echo response of %d bytes "
649 "(expecting %u): %.*s",
650 (int)rc, (unsigned int)sizeof(resp), (int)rc, buf);
651 daemon_down(dmn, why);
652 return 0;
653 }
654
655 time_elapsed(&delay, &dmn->echo_sent);
656 dmn->echo_sent.tv_sec = 0;
657 if (dmn->state == DAEMON_UNRESPONSIVE) {
658 if (delay.tv_sec < gs.timeout) {
659 dmn->state = DAEMON_UP;
660 zlog_warn(
661 "%s state -> up : echo response received after %ld.%06ld "
662 "seconds",
663 dmn->name, (long)delay.tv_sec,
664 (long)delay.tv_usec);
665 } else
666 zlog_warn(
667 "%s: slow echo response finally received after %ld.%06ld "
668 "seconds",
669 dmn->name, (long)delay.tv_sec,
670 (long)delay.tv_usec);
671 } else if (gs.loglevel > LOG_DEBUG + 1)
672 zlog_debug("%s: echo response received after %ld.%06ld seconds",
673 dmn->name, (long)delay.tv_sec, (long)delay.tv_usec);
674
675 SET_READ_HANDLER(dmn);
676 if (dmn->t_wakeup)
677 thread_cancel(dmn->t_wakeup);
678 SET_WAKEUP_ECHO(dmn);
679
680 return 0;
681 }
682
683 /*
684 * Wait till we notice that all daemons are ready before
685 * we send we are ready to systemd
686 */
687 static void daemon_send_ready(int exitcode)
688 {
689 FILE *fp;
690 static int sent = 0;
691 char started[1024];
692
693 if (sent)
694 return;
695
696 if (exitcode == 0)
697 zlog_notice("all daemons up, doing startup-complete notify");
698 else if (gs.numdown < gs.numdaemons)
699 flog_err(EC_WATCHFRR_CONNECTION,
700 "startup did not complete within timeout"
701 " (%d/%d daemons running)",
702 gs.numdaemons - gs.numdown, gs.numdaemons);
703 else {
704 flog_err(EC_WATCHFRR_CONNECTION,
705 "all configured daemons failed to start"
706 " -- exiting watchfrr");
707 exit(exitcode);
708
709 }
710
711 frr_detach();
712
713 snprintf(started, sizeof(started), "%s%s", frr_vtydir,
714 "watchfrr.started");
715 fp = fopen(started, "w");
716 if (fp)
717 fclose(fp);
718 #if defined HAVE_SYSTEMD
719 systemd_send_started(master, 0);
720 systemd_send_status("FRR Operational");
721 #endif
722 sent = 1;
723 }
724
725 static void daemon_up(struct daemon *dmn, const char *why)
726 {
727 dmn->state = DAEMON_UP;
728 gs.numdown--;
729 dmn->connect_tries = 0;
730 zlog_notice("%s state -> up : %s", dmn->name, why);
731 if (gs.numdown == 0)
732 daemon_send_ready(0);
733 SET_WAKEUP_ECHO(dmn);
734 phase_check();
735 }
736
737 static int check_connect(struct thread *t_write)
738 {
739 struct daemon *dmn = THREAD_ARG(t_write);
740 int sockerr;
741 socklen_t reslen = sizeof(sockerr);
742
743 dmn->t_write = NULL;
744 if (getsockopt(dmn->fd, SOL_SOCKET, SO_ERROR, (char *)&sockerr, &reslen)
745 < 0) {
746 zlog_warn("%s: check_connect: getsockopt failed: %s", dmn->name,
747 safe_strerror(errno));
748 daemon_down(dmn,
749 "getsockopt failed checking connection success");
750 return 0;
751 }
752 if ((reslen == sizeof(sockerr)) && sockerr) {
753 char why[100];
754 snprintf(
755 why, sizeof(why),
756 "getsockopt reports that connection attempt failed: %s",
757 safe_strerror(sockerr));
758 daemon_down(dmn, why);
759 return 0;
760 }
761
762 daemon_up(dmn, "delayed connect succeeded");
763 return 0;
764 }
765
766 static int wakeup_connect_hanging(struct thread *t_wakeup)
767 {
768 struct daemon *dmn = THREAD_ARG(t_wakeup);
769 char why[100];
770
771 dmn->t_wakeup = NULL;
772 snprintf(why, sizeof(why),
773 "connection attempt timed out after %ld seconds", gs.timeout);
774 daemon_down(dmn, why);
775 return 0;
776 }
777
778 /* Making connection to protocol daemon. */
779 static int try_connect(struct daemon *dmn)
780 {
781 int sock;
782 struct sockaddr_un addr;
783 socklen_t len;
784
785 if (gs.loglevel > LOG_DEBUG + 1)
786 zlog_debug("%s: attempting to connect", dmn->name);
787 dmn->connect_tries++;
788
789 memset(&addr, 0, sizeof(struct sockaddr_un));
790 addr.sun_family = AF_UNIX;
791 snprintf(addr.sun_path, sizeof(addr.sun_path), "%s/%s.vty", gs.vtydir,
792 dmn->name);
793 #ifdef HAVE_STRUCT_SOCKADDR_UN_SUN_LEN
794 len = addr.sun_len = SUN_LEN(&addr);
795 #else
796 len = sizeof(addr.sun_family) + strlen(addr.sun_path);
797 #endif /* HAVE_STRUCT_SOCKADDR_UN_SUN_LEN */
798
799 /* Quick check to see if we might succeed before we go to the trouble
800 of creating a socket. */
801 if (access(addr.sun_path, W_OK) < 0) {
802 if (errno != ENOENT)
803 flog_err_sys(EC_LIB_SYSTEM_CALL,
804 "%s: access to socket %s denied: %s",
805 dmn->name, addr.sun_path,
806 safe_strerror(errno));
807 return -1;
808 }
809
810 if ((sock = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) {
811 flog_err_sys(EC_LIB_SOCKET, "%s(%s): cannot make socket: %s",
812 __func__, addr.sun_path, safe_strerror(errno));
813 return -1;
814 }
815
816 if (set_nonblocking(sock) < 0 || set_cloexec(sock) < 0) {
817 flog_err_sys(EC_LIB_SYSTEM_CALL,
818 "%s(%s): set_nonblocking/cloexec(%d) failed",
819 __func__, addr.sun_path, sock);
820 close(sock);
821 return -1;
822 }
823
824 if (connect(sock, (struct sockaddr *)&addr, len) < 0) {
825 if ((errno != EINPROGRESS) && (errno != EWOULDBLOCK)) {
826 if (gs.loglevel > LOG_DEBUG)
827 zlog_debug("%s(%s): connect failed: %s",
828 __func__, addr.sun_path,
829 safe_strerror(errno));
830 close(sock);
831 return -1;
832 }
833 if (gs.loglevel > LOG_DEBUG)
834 zlog_debug("%s: connection in progress", dmn->name);
835 dmn->state = DAEMON_CONNECTING;
836 dmn->fd = sock;
837 dmn->t_write = NULL;
838 thread_add_write(master, check_connect, dmn, dmn->fd,
839 &dmn->t_write);
840 dmn->t_wakeup = NULL;
841 thread_add_timer(master, wakeup_connect_hanging, dmn,
842 gs.timeout, &dmn->t_wakeup);
843 SET_READ_HANDLER(dmn);
844 return 0;
845 }
846
847 dmn->fd = sock;
848 SET_READ_HANDLER(dmn);
849 daemon_up(dmn, "connect succeeded");
850 return 1;
851 }
852
853 static int phase_hanging(struct thread *t_hanging)
854 {
855 gs.t_phase_hanging = NULL;
856 flog_err(EC_WATCHFRR_CONNECTION,
857 "Phase [%s] hanging for %ld seconds, aborting phased restart",
858 phase_str[gs.phase], PHASE_TIMEOUT);
859 gs.phase = PHASE_NONE;
860 return 0;
861 }
862
863 static void set_phase(restart_phase_t new_phase)
864 {
865 gs.phase = new_phase;
866 if (gs.t_phase_hanging)
867 thread_cancel(gs.t_phase_hanging);
868 gs.t_phase_hanging = NULL;
869 thread_add_timer(master, phase_hanging, NULL, PHASE_TIMEOUT,
870 &gs.t_phase_hanging);
871 }
872
873 static void phase_check(void)
874 {
875 struct daemon *dmn;
876
877 switch (gs.phase) {
878 case PHASE_NONE:
879 break;
880
881 case PHASE_INIT:
882 for (dmn = gs.daemons; dmn; dmn = dmn->next)
883 if (dmn->state == DAEMON_INIT)
884 return;
885
886 /* startup complete, everything out of INIT */
887 gs.phase = PHASE_NONE;
888 for (dmn = gs.daemons; dmn; dmn = dmn->next)
889 if (dmn->state == DAEMON_DOWN) {
890 SET_WAKEUP_DOWN(dmn);
891 try_restart(dmn);
892 }
893 break;
894 case PHASE_STOPS_PENDING:
895 if (gs.numpids)
896 break;
897 zlog_info(
898 "Phased restart: all routing daemon stop jobs have completed.");
899 set_phase(PHASE_WAITING_DOWN);
900
901 /*FALLTHRU*/
902 case PHASE_WAITING_DOWN:
903 if (gs.numdown + IS_UP(gs.special) < gs.numdaemons)
904 break;
905 zlog_info("Phased restart: all routing daemons now down.");
906 run_job(&gs.special->restart, "restart", gs.restart_command, 1,
907 1);
908 set_phase(PHASE_ZEBRA_RESTART_PENDING);
909
910 /*FALLTHRU*/
911 case PHASE_ZEBRA_RESTART_PENDING:
912 if (gs.special->restart.pid)
913 break;
914 zlog_info("Phased restart: %s restart job completed.",
915 gs.special->name);
916 set_phase(PHASE_WAITING_ZEBRA_UP);
917
918 /*FALLTHRU*/
919 case PHASE_WAITING_ZEBRA_UP:
920 if (!IS_UP(gs.special))
921 break;
922 zlog_info("Phased restart: %s is now up.", gs.special->name);
923 {
924 struct daemon *dmn;
925 for (dmn = gs.daemons; dmn; dmn = dmn->next) {
926 if (dmn != gs.special)
927 run_job(&dmn->restart, "start",
928 gs.start_command, 1, 0);
929 }
930 }
931 gs.phase = PHASE_NONE;
932 THREAD_OFF(gs.t_phase_hanging);
933 zlog_notice("Phased global restart has completed.");
934 break;
935 }
936 }
937
938 static void try_restart(struct daemon *dmn)
939 {
940 if (watch_only)
941 return;
942
943 if (dmn != gs.special) {
944 if ((gs.special->state == DAEMON_UP)
945 && (gs.phase == PHASE_NONE))
946 run_job(&dmn->restart, "restart", gs.restart_command, 0,
947 1);
948 else
949 zlog_debug(
950 "%s: postponing restart attempt because master %s daemon "
951 "not up [%s], or phased restart in progress",
952 dmn->name, gs.special->name,
953 state_str[gs.special->state]);
954 return;
955 }
956
957 if ((gs.phase != PHASE_NONE) || gs.numpids) {
958 if (gs.loglevel > LOG_DEBUG + 1)
959 zlog_debug(
960 "postponing phased global restart: restart already in "
961 "progress [%s], or outstanding child processes [%d]",
962 phase_str[gs.phase], gs.numpids);
963 return;
964 }
965 /* Is it too soon for a restart? */
966 {
967 struct timeval delay;
968 if (time_elapsed(&delay, &gs.special->restart.time)->tv_sec
969 < gs.special->restart.interval) {
970 if (gs.loglevel > LOG_DEBUG + 1)
971 zlog_debug(
972 "postponing phased global restart: "
973 "elapsed time %ld < retry interval %ld",
974 (long)delay.tv_sec,
975 gs.special->restart.interval);
976 return;
977 }
978 }
979 run_job(&gs.restart, "restart", gs.restart_command, 0, 1);
980 }
981
982 static int wakeup_unresponsive(struct thread *t_wakeup)
983 {
984 struct daemon *dmn = THREAD_ARG(t_wakeup);
985
986 dmn->t_wakeup = NULL;
987 if (dmn->state != DAEMON_UNRESPONSIVE)
988 flog_err(EC_WATCHFRR_CONNECTION,
989 "%s: no longer unresponsive (now %s), "
990 "wakeup should have been cancelled!",
991 dmn->name, state_str[dmn->state]);
992 else {
993 SET_WAKEUP_UNRESPONSIVE(dmn);
994 try_restart(dmn);
995 }
996 return 0;
997 }
998
999 static int wakeup_no_answer(struct thread *t_wakeup)
1000 {
1001 struct daemon *dmn = THREAD_ARG(t_wakeup);
1002
1003 dmn->t_wakeup = NULL;
1004 dmn->state = DAEMON_UNRESPONSIVE;
1005 if (dmn->ignore_timeout)
1006 return 0;
1007 flog_err(EC_WATCHFRR_CONNECTION,
1008 "%s state -> unresponsive : no response yet to ping "
1009 "sent %ld seconds ago",
1010 dmn->name, gs.timeout);
1011 SET_WAKEUP_UNRESPONSIVE(dmn);
1012 try_restart(dmn);
1013 return 0;
1014 }
1015
1016 static int wakeup_send_echo(struct thread *t_wakeup)
1017 {
1018 static const char echocmd[] = "echo " PING_TOKEN;
1019 ssize_t rc;
1020 struct daemon *dmn = THREAD_ARG(t_wakeup);
1021
1022 dmn->t_wakeup = NULL;
1023 if (((rc = write(dmn->fd, echocmd, sizeof(echocmd))) < 0)
1024 || ((size_t)rc != sizeof(echocmd))) {
1025 char why[100 + sizeof(echocmd)];
1026 snprintf(why, sizeof(why),
1027 "write '%s' returned %d instead of %u", echocmd,
1028 (int)rc, (unsigned int)sizeof(echocmd));
1029 daemon_down(dmn, why);
1030 } else {
1031 gettimeofday(&dmn->echo_sent, NULL);
1032 dmn->t_wakeup = NULL;
1033 thread_add_timer(master, wakeup_no_answer, dmn, gs.timeout,
1034 &dmn->t_wakeup);
1035 }
1036 return 0;
1037 }
1038
1039 bool check_all_up(void)
1040 {
1041 struct daemon *dmn;
1042
1043 for (dmn = gs.daemons; dmn; dmn = dmn->next)
1044 if (dmn->state != DAEMON_UP)
1045 return false;
1046 return true;
1047 }
1048
1049 void watchfrr_status(struct vty *vty)
1050 {
1051 struct daemon *dmn;
1052 struct timeval delay;
1053
1054 vty_out(vty, "watchfrr global phase: %s\n", phase_str[gs.phase]);
1055 if (gs.restart.pid)
1056 vty_out(vty, " global restart running, pid %ld\n",
1057 (long)gs.restart.pid);
1058
1059 for (dmn = gs.daemons; dmn; dmn = dmn->next) {
1060 vty_out(vty, " %-20s %s%s", dmn->name, state_str[dmn->state],
1061 dmn->ignore_timeout ? "/Ignoring Timeout\n" : "\n");
1062 if (dmn->restart.pid)
1063 vty_out(vty, " restart running, pid %ld\n",
1064 (long)dmn->restart.pid);
1065 else if (dmn->state == DAEMON_DOWN &&
1066 time_elapsed(&delay, &dmn->restart.time)->tv_sec
1067 < dmn->restart.interval)
1068 vty_out(vty, " restarting in %jd seconds"
1069 " (%jds backoff interval)\n",
1070 (intmax_t)dmn->restart.interval
1071 - (intmax_t)delay.tv_sec,
1072 (intmax_t)dmn->restart.interval);
1073 }
1074 }
1075
1076 static void sigint(void)
1077 {
1078 zlog_notice("Terminating on signal");
1079 systemd_send_stopping();
1080 exit(0);
1081 }
1082
1083 static int valid_command(const char *cmd)
1084 {
1085 char *p;
1086
1087 return ((p = strchr(cmd, '%')) != NULL) && (*(p + 1) == 's')
1088 && !strchr(p + 1, '%');
1089 }
1090
1091 /* This is an ugly hack to circumvent problems with passing command-line
1092 arguments that contain spaces. The fix is to use a configuration file. */
1093 static char *translate_blanks(const char *cmd, const char *blankstr)
1094 {
1095 char *res;
1096 char *p;
1097 size_t bslen = strlen(blankstr);
1098
1099 if (!(res = strdup(cmd))) {
1100 perror("strdup");
1101 exit(1);
1102 }
1103 while ((p = strstr(res, blankstr)) != NULL) {
1104 *p = ' ';
1105 if (bslen != 1)
1106 memmove(p + 1, p + bslen, strlen(p + bslen) + 1);
1107 }
1108 return res;
1109 }
1110
1111 static int startup_timeout(struct thread *t_wakeup)
1112 {
1113 daemon_send_ready(1);
1114 return 0;
1115 }
1116
1117 static void watchfrr_init(int argc, char **argv)
1118 {
1119 const char *special = "zebra";
1120 int i;
1121 struct daemon *dmn, **add = &gs.daemons;
1122 char alldaemons[512] = "", *p = alldaemons;
1123
1124 thread_add_timer_msec(master, startup_timeout, NULL, STARTUP_TIMEOUT,
1125 &gs.t_startup_timeout);
1126
1127 for (i = optind; i < argc; i++) {
1128 dmn = XCALLOC(MTYPE_WATCHFRR_DAEMON, sizeof(*dmn));
1129
1130 dmn->name = dmn->restart.name = argv[i];
1131 dmn->state = DAEMON_INIT;
1132 gs.numdaemons++;
1133 gs.numdown++;
1134 dmn->fd = -1;
1135 dmn->t_wakeup = NULL;
1136 thread_add_timer_msec(master, wakeup_init, dmn, 0,
1137 &dmn->t_wakeup);
1138 dmn->restart.interval = gs.min_restart_interval;
1139 *add = dmn;
1140 add = &dmn->next;
1141
1142 if (!strcmp(dmn->name, special))
1143 gs.special = dmn;
1144 }
1145
1146 if (!gs.daemons) {
1147 fprintf(stderr,
1148 "Must specify one or more daemons to monitor.\n\n");
1149 frr_help_exit(1);
1150 }
1151 if (!watch_only && !gs.special) {
1152 fprintf(stderr, "\"%s\" daemon must be in daemon lists\n\n",
1153 special);
1154 frr_help_exit(1);
1155 }
1156
1157 for (dmn = gs.daemons; dmn; dmn = dmn->next) {
1158 snprintf(p, alldaemons + sizeof(alldaemons) - p, "%s%s",
1159 (p == alldaemons) ? "" : " ", dmn->name);
1160 p += strlen(p);
1161 }
1162 zlog_notice("%s %s watching [%s]%s", progname, FRR_VERSION, alldaemons,
1163 watch_only ? ", monitor mode" : "");
1164 }
1165
1166 struct zebra_privs_t watchfrr_privs = {
1167 #ifdef VTY_GROUP
1168 .vty_group = VTY_GROUP,
1169 #endif
1170 };
1171
1172 static struct quagga_signal_t watchfrr_signals[] = {
1173 {
1174 .signal = SIGINT,
1175 .handler = sigint,
1176 },
1177 {
1178 .signal = SIGTERM,
1179 .handler = sigint,
1180 },
1181 {
1182 .signal = SIGCHLD,
1183 .handler = sigchild,
1184 },
1185 };
1186
1187 FRR_DAEMON_INFO(watchfrr, WATCHFRR,
1188 .flags = FRR_NO_PRIVSEP | FRR_NO_TCPVTY | FRR_LIMITED_CLI
1189 | FRR_NO_CFG_PID_DRY | FRR_NO_ZCLIENT
1190 | FRR_DETACH_LATER,
1191
1192 .printhelp = printhelp,
1193 .copyright = "Copyright 2004 Andrew J. Schorr",
1194
1195 .signals = watchfrr_signals,
1196 .n_signals = array_size(watchfrr_signals),
1197
1198 .privs = &watchfrr_privs, )
1199
1200 #define DEPRECATED_OPTIONS "aAezR:"
1201
1202 int main(int argc, char **argv)
1203 {
1204 int opt;
1205 const char *blankstr = NULL;
1206
1207 frr_preinit(&watchfrr_di, argc, argv);
1208 progname = watchfrr_di.progname;
1209
1210 frr_opt_add("b:dk:l:i:p:r:S:s:t:T:" DEPRECATED_OPTIONS, longopts, "");
1211
1212 gs.restart.name = "all";
1213 while ((opt = frr_getopt(argc, argv, NULL)) != EOF) {
1214 if (opt && opt < 128 && strchr(DEPRECATED_OPTIONS, opt)) {
1215 fprintf(stderr,
1216 "The -%c option no longer exists.\n"
1217 "Please refer to the watchfrr(8) man page.\n",
1218 opt);
1219 exit(1);
1220 }
1221
1222 switch (opt) {
1223 case 0:
1224 break;
1225 case 'b':
1226 blankstr = optarg;
1227 break;
1228 case OPTION_DRY:
1229 watch_only = true;
1230 break;
1231 case 'k':
1232 if (!valid_command(optarg)) {
1233 fprintf(stderr,
1234 "Invalid kill command, must contain '%%s': %s\n",
1235 optarg);
1236 frr_help_exit(1);
1237 }
1238 gs.stop_command = optarg;
1239 break;
1240 case 'l': {
1241 char garbage[3];
1242 if ((sscanf(optarg, "%d%1s", &gs.loglevel, garbage)
1243 != 1)
1244 || (gs.loglevel < LOG_EMERG)) {
1245 fprintf(stderr,
1246 "Invalid loglevel argument: %s\n",
1247 optarg);
1248 frr_help_exit(1);
1249 }
1250 } break;
1251 case OPTION_MINRESTART: {
1252 char garbage[3];
1253 if ((sscanf(optarg, "%ld%1s", &gs.min_restart_interval,
1254 garbage)
1255 != 1)
1256 || (gs.min_restart_interval < 0)) {
1257 fprintf(stderr,
1258 "Invalid min_restart_interval argument: %s\n",
1259 optarg);
1260 frr_help_exit(1);
1261 }
1262 } break;
1263 case OPTION_MAXRESTART: {
1264 char garbage[3];
1265 if ((sscanf(optarg, "%ld%1s", &gs.max_restart_interval,
1266 garbage)
1267 != 1)
1268 || (gs.max_restart_interval < 0)) {
1269 fprintf(stderr,
1270 "Invalid max_restart_interval argument: %s\n",
1271 optarg);
1272 frr_help_exit(1);
1273 }
1274 } break;
1275 case 'i': {
1276 char garbage[3];
1277 int period;
1278 if ((sscanf(optarg, "%d%1s", &period, garbage) != 1)
1279 || (gs.period < 1)) {
1280 fprintf(stderr,
1281 "Invalid interval argument: %s\n",
1282 optarg);
1283 frr_help_exit(1);
1284 }
1285 gs.period = 1000 * period;
1286 } break;
1287 case 'p':
1288 watchfrr_di.pid_file = optarg;
1289 break;
1290 case 'r':
1291 if (!valid_command(optarg)) {
1292 fprintf(stderr,
1293 "Invalid restart command, must contain '%%s': %s\n",
1294 optarg);
1295 frr_help_exit(1);
1296 }
1297 gs.restart_command = optarg;
1298 break;
1299 case 's':
1300 if (!valid_command(optarg)) {
1301 fprintf(stderr,
1302 "Invalid start command, must contain '%%s': %s\n",
1303 optarg);
1304 frr_help_exit(1);
1305 }
1306 gs.start_command = optarg;
1307 break;
1308 case 'S':
1309 gs.vtydir = optarg;
1310 break;
1311 case 't': {
1312 char garbage[3];
1313 if ((sscanf(optarg, "%ld%1s", &gs.timeout, garbage)
1314 != 1)
1315 || (gs.timeout < 1)) {
1316 fprintf(stderr,
1317 "Invalid timeout argument: %s\n",
1318 optarg);
1319 frr_help_exit(1);
1320 }
1321 } break;
1322 case 'T': {
1323 char garbage[3];
1324 if ((sscanf(optarg, "%ld%1s", &gs.restart_timeout,
1325 garbage)
1326 != 1)
1327 || (gs.restart_timeout < 1)) {
1328 fprintf(stderr,
1329 "Invalid restart timeout argument: %s\n",
1330 optarg);
1331 frr_help_exit(1);
1332 }
1333 } break;
1334 default:
1335 fputs("Invalid option.\n", stderr);
1336 frr_help_exit(1);
1337 }
1338 }
1339
1340 if (watch_only
1341 && (gs.start_command || gs.stop_command || gs.restart_command)) {
1342 fputs("Options -r/-s/-k are not used when --dry is active.\n",
1343 stderr);
1344 }
1345 if (!watch_only
1346 && (!gs.restart_command || !gs.start_command || !gs.stop_command)) {
1347 fprintf(stderr,
1348 "Options -s (start), -k (kill), and -r (restart) are required.\n");
1349 frr_help_exit(1);
1350 }
1351
1352 if (blankstr) {
1353 if (gs.restart_command)
1354 gs.restart_command =
1355 translate_blanks(gs.restart_command, blankstr);
1356 if (gs.start_command)
1357 gs.start_command =
1358 translate_blanks(gs.start_command, blankstr);
1359 if (gs.stop_command)
1360 gs.stop_command =
1361 translate_blanks(gs.stop_command, blankstr);
1362 }
1363
1364 gs.restart.interval = gs.min_restart_interval;
1365
1366 master = frr_init();
1367 watchfrr_error_init();
1368 watchfrr_init(argc, argv);
1369 watchfrr_vty_init();
1370
1371 frr_config_fork();
1372
1373 zlog_set_level(ZLOG_DEST_MONITOR, ZLOG_DISABLED);
1374 if (watchfrr_di.daemon_mode)
1375 zlog_set_level(ZLOG_DEST_SYSLOG, MIN(gs.loglevel, LOG_DEBUG));
1376 else
1377 zlog_set_level(ZLOG_DEST_STDOUT, MIN(gs.loglevel, LOG_DEBUG));
1378
1379 frr_run(master);
1380
1381 systemd_send_stopping();
1382 /* Not reached. */
1383 return 0;
1384 }