]> git.proxmox.com Git - mirror_frr.git/blob - watchfrr/watchfrr.c
Merge pull request #6700 from deastoe/ospf6-interface-decimal-area
[mirror_frr.git] / watchfrr / watchfrr.c
1 /*
2 * Monitor status of frr daemons and restart if necessary.
3 *
4 * Copyright (C) 2004 Andrew J. Schorr
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License along
17 * with this program; see the file COPYING; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #include <zebra.h>
22 #include <thread.h>
23 #include <log.h>
24 #include <network.h>
25 #include <sigevent.h>
26 #include <lib/version.h>
27 #include "command.h"
28 #include "libfrr.h"
29 #include "lib_errors.h"
30 #include "zlog_targets.h"
31 #include "network.h"
32
33 #include <getopt.h>
34 #include <sys/un.h>
35 #include <sys/wait.h>
36 #include <memory.h>
37 #include <systemd.h>
38
39 #include "watchfrr.h"
40 #include "watchfrr_errors.h"
41
42 #ifndef MIN
43 #define MIN(X,Y) (((X) <= (Y)) ? (X) : (Y))
44 #endif
45
46 /* Macros to help randomize timers. */
47 #define JITTER(X) ((frr_weak_random() % ((X)+1))-((X)/2))
48 #define FUZZY(X) ((X)+JITTER((X)/20))
49
50 #define DEFAULT_PERIOD 5
51 #define DEFAULT_TIMEOUT 90
52 #define DEFAULT_RESTART_TIMEOUT 20
53 #define DEFAULT_LOGLEVEL LOG_INFO
54 #define DEFAULT_MIN_RESTART 60
55 #define DEFAULT_MAX_RESTART 600
56
57 #define DEFAULT_RESTART_CMD WATCHFRR_SH_PATH " restart %s"
58 #define DEFAULT_START_CMD WATCHFRR_SH_PATH " start %s"
59 #define DEFAULT_STOP_CMD WATCHFRR_SH_PATH " stop %s"
60
61 #define PING_TOKEN "PING"
62
63 DEFINE_MGROUP(WATCHFRR, "watchfrr")
64 DEFINE_MTYPE_STATIC(WATCHFRR, WATCHFRR_DAEMON, "watchfrr daemon entry")
65
66 /* Needs to be global, referenced somewhere inside libfrr. */
67 struct thread_master *master;
68
69 static bool watch_only = false;
70
71 typedef enum {
72 PHASE_NONE = 0,
73 PHASE_INIT,
74 PHASE_STOPS_PENDING,
75 PHASE_WAITING_DOWN,
76 PHASE_ZEBRA_RESTART_PENDING,
77 PHASE_WAITING_ZEBRA_UP
78 } restart_phase_t;
79
80 static const char *const phase_str[] = {
81 "Idle",
82 "Startup",
83 "Stop jobs running",
84 "Waiting for other daemons to come down",
85 "Zebra restart job running",
86 "Waiting for zebra to come up",
87 "Start jobs running",
88 };
89
90 #define PHASE_TIMEOUT (3*gs.restart_timeout)
91 #define STARTUP_TIMEOUT 55 * 1000
92
93 struct restart_info {
94 const char *name;
95 const char *what;
96 pid_t pid;
97 struct timeval time;
98 long interval;
99 struct thread *t_kill;
100 int kills;
101 };
102
103 static struct global_state {
104 restart_phase_t phase;
105 struct thread *t_phase_hanging;
106 struct thread *t_startup_timeout;
107 const char *vtydir;
108 long period;
109 long timeout;
110 long restart_timeout;
111 long min_restart_interval;
112 long max_restart_interval;
113 struct daemon *daemons;
114 const char *restart_command;
115 const char *start_command;
116 const char *stop_command;
117 struct restart_info restart;
118 int loglevel;
119 struct daemon *special; /* points to zebra when doing phased restart */
120 int numdaemons;
121 int numpids;
122 int numdown; /* # of daemons that are not UP or UNRESPONSIVE */
123 } gs = {
124 .phase = PHASE_INIT,
125 .vtydir = frr_vtydir,
126 .period = 1000 * DEFAULT_PERIOD,
127 .timeout = DEFAULT_TIMEOUT,
128 .restart_timeout = DEFAULT_RESTART_TIMEOUT,
129 .loglevel = DEFAULT_LOGLEVEL,
130 .min_restart_interval = DEFAULT_MIN_RESTART,
131 .max_restart_interval = DEFAULT_MAX_RESTART,
132 .restart_command = DEFAULT_RESTART_CMD,
133 .start_command = DEFAULT_START_CMD,
134 .stop_command = DEFAULT_STOP_CMD,
135 };
136
137 typedef enum {
138 DAEMON_INIT,
139 DAEMON_DOWN,
140 DAEMON_CONNECTING,
141 DAEMON_UP,
142 DAEMON_UNRESPONSIVE
143 } daemon_state_t;
144
145 #define IS_UP(DMN) \
146 (((DMN)->state == DAEMON_UP) || ((DMN)->state == DAEMON_UNRESPONSIVE))
147
148 static const char *const state_str[] = {
149 "Init", "Down", "Connecting", "Up", "Unresponsive",
150 };
151
152 struct daemon {
153 const char *name;
154 daemon_state_t state;
155 int fd;
156 struct timeval echo_sent;
157 unsigned int connect_tries;
158 struct thread *t_wakeup;
159 struct thread *t_read;
160 struct thread *t_write;
161 struct daemon *next;
162 struct restart_info restart;
163
164 /*
165 * For a given daemon, if we've turned on ignore timeouts
166 * ignore the timeout value and assume everything is ok
167 * This is for daemon debugging w/ gdb after we have started
168 * FRR and realize we have something that needs to be looked
169 * at
170 */
171 bool ignore_timeout;
172 };
173
174 #define OPTION_MINRESTART 2000
175 #define OPTION_MAXRESTART 2001
176 #define OPTION_DRY 2002
177
178 static const struct option longopts[] = {
179 {"daemon", no_argument, NULL, 'd'},
180 {"statedir", required_argument, NULL, 'S'},
181 {"loglevel", required_argument, NULL, 'l'},
182 {"interval", required_argument, NULL, 'i'},
183 {"timeout", required_argument, NULL, 't'},
184 {"restart-timeout", required_argument, NULL, 'T'},
185 {"restart", required_argument, NULL, 'r'},
186 {"start-command", required_argument, NULL, 's'},
187 {"kill-command", required_argument, NULL, 'k'},
188 {"dry", no_argument, NULL, OPTION_DRY},
189 {"min-restart-interval", required_argument, NULL, OPTION_MINRESTART},
190 {"max-restart-interval", required_argument, NULL, OPTION_MAXRESTART},
191 {"pid-file", required_argument, NULL, 'p'},
192 {"blank-string", required_argument, NULL, 'b'},
193 {"help", no_argument, NULL, 'h'},
194 {"version", no_argument, NULL, 'v'},
195 {NULL, 0, NULL, 0}};
196
197 static int try_connect(struct daemon *dmn);
198 static int wakeup_send_echo(struct thread *t_wakeup);
199 static void try_restart(struct daemon *dmn);
200 static void phase_check(void);
201 static void restart_done(struct daemon *dmn);
202
203 static const char *progname;
204
205 void watchfrr_set_ignore_daemon(struct vty *vty, const char *dname, bool ignore)
206 {
207 struct daemon *dmn;
208
209 for (dmn = gs.daemons; dmn; dmn = dmn->next) {
210 if (strncmp(dmn->name, dname, strlen(dmn->name)) == 0)
211 break;
212 }
213
214 if (dmn) {
215 dmn->ignore_timeout = ignore;
216 vty_out(vty, "%s switching to %s\n", dmn->name,
217 ignore ? "ignore" : "watch");
218 } else
219 vty_out(vty, "%s is not configured for running at the moment",
220 dname);
221 }
222
223 static void printhelp(FILE *target)
224 {
225 fprintf(target,
226 "Usage : %s [OPTION...] <daemon name> ...\n\n\
227 Watchdog program to monitor status of frr daemons and try to restart\n\
228 them if they are down or unresponsive. It determines whether a daemon is\n\
229 up based on whether it can connect to the daemon's vty unix stream socket.\n\
230 It then repeatedly sends echo commands over that socket to determine whether\n\
231 the daemon is responsive. If the daemon crashes, we will receive an EOF\n\
232 on the socket connection and know immediately that the daemon is down.\n\n\
233 The daemons to be monitored should be listed on the command line.\n\n\
234 In order to avoid attempting to restart the daemons in a fast loop,\n\
235 the -m and -M options allow you to control the minimum delay between\n\
236 restart commands. The minimum restart delay is recalculated each time\n\
237 a restart is attempted: if the time since the last restart attempt exceeds\n\
238 twice the -M value, then the restart delay is set to the -m value.\n\
239 Otherwise, the interval is doubled (but capped at the -M value).\n\n",
240 progname);
241
242 fprintf(target,
243 "Options:\n\
244 -d, --daemon Run in daemon mode. In this mode, error messages are sent\n\
245 to syslog instead of stdout.\n\
246 -S, --statedir Set the vty socket directory (default is %s)\n\
247 -l, --loglevel Set the logging level (default is %d).\n\
248 The value should range from %d (LOG_EMERG) to %d (LOG_DEBUG),\n\
249 but it can be set higher than %d if extra-verbose debugging\n\
250 messages are desired.\n\
251 --min-restart-interval\n\
252 Set the minimum seconds to wait between invocations of daemon\n\
253 restart commands (default is %d).\n\
254 --max-restart-interval\n\
255 Set the maximum seconds to wait between invocations of daemon\n\
256 restart commands (default is %d).\n\
257 -i, --interval Set the status polling interval in seconds (default is %d)\n\
258 -t, --timeout Set the unresponsiveness timeout in seconds (default is %d)\n\
259 -T, --restart-timeout\n\
260 Set the restart (kill) timeout in seconds (default is %d).\n\
261 If any background jobs are still running after this much\n\
262 time has elapsed, they will be killed.\n\
263 -r, --restart Supply a Bourne shell command to use to restart a single\n\
264 daemon. The command string should include '%%s' where the\n\
265 name of the daemon should be substituted.\n\
266 (default: '%s')\n\
267 -s, --start-command\n\
268 Supply a Bourne shell to command to use to start a single\n\
269 daemon. The command string should include '%%s' where the\n\
270 name of the daemon should be substituted.\n\
271 (default: '%s')\n\
272 -k, --kill-command\n\
273 Supply a Bourne shell to command to use to stop a single\n\
274 daemon. The command string should include '%%s' where the\n\
275 name of the daemon should be substituted.\n\
276 (default: '%s')\n\
277 --dry Do not start or restart anything, just log.\n\
278 -p, --pid-file Set process identifier file name\n\
279 (default is %s/watchfrr.pid).\n\
280 -b, --blank-string\n\
281 When the supplied argument string is found in any of the\n\
282 various shell command arguments (-r, -s, or -k), replace\n\
283 it with a space. This is an ugly hack to circumvent problems\n\
284 passing command-line arguments with embedded spaces.\n\
285 -v, --version Print program version\n\
286 -h, --help Display this help and exit\n",
287 frr_vtydir, DEFAULT_LOGLEVEL, LOG_EMERG, LOG_DEBUG, LOG_DEBUG,
288 DEFAULT_MIN_RESTART, DEFAULT_MAX_RESTART, DEFAULT_PERIOD,
289 DEFAULT_TIMEOUT, DEFAULT_RESTART_TIMEOUT,
290 DEFAULT_RESTART_CMD, DEFAULT_START_CMD, DEFAULT_STOP_CMD,
291 frr_vtydir);
292 }
293
294 static pid_t run_background(char *shell_cmd)
295 {
296 pid_t child;
297
298 switch (child = fork()) {
299 case -1:
300 flog_err_sys(EC_LIB_SYSTEM_CALL,
301 "fork failed, cannot run command [%s]: %s",
302 shell_cmd, safe_strerror(errno));
303 return -1;
304 case 0:
305 /* Child process. */
306 /* Use separate process group so child processes can be killed
307 * easily. */
308 if (setpgid(0, 0) < 0)
309 zlog_warn("warning: setpgid(0,0) failed: %s",
310 safe_strerror(errno));
311 {
312 char shell[] = "sh";
313 char dashc[] = "-c";
314 char *const argv[4] = {shell, dashc, shell_cmd, NULL};
315 execv("/bin/sh", argv);
316 flog_err_sys(EC_LIB_SYSTEM_CALL,
317 "execv(/bin/sh -c '%s') failed: %s",
318 shell_cmd, safe_strerror(errno));
319 _exit(127);
320 }
321 default:
322 /* Parent process: we will reap the child later. */
323 zlog_info("Forked background command [pid %d]: %s", (int)child,
324 shell_cmd);
325 return child;
326 }
327 }
328
329 static struct timeval *time_elapsed(struct timeval *result,
330 const struct timeval *start_time)
331 {
332 gettimeofday(result, NULL);
333 result->tv_sec -= start_time->tv_sec;
334 result->tv_usec -= start_time->tv_usec;
335 while (result->tv_usec < 0) {
336 result->tv_usec += 1000000L;
337 result->tv_sec--;
338 }
339 return result;
340 }
341
342 static int restart_kill(struct thread *t_kill)
343 {
344 struct restart_info *restart = THREAD_ARG(t_kill);
345 struct timeval delay;
346
347 time_elapsed(&delay, &restart->time);
348 zlog_warn(
349 "Warning: %s %s child process %d still running after %ld seconds, sending signal %d",
350 restart->what, restart->name, (int)restart->pid,
351 (long)delay.tv_sec, (restart->kills ? SIGKILL : SIGTERM));
352 kill(-restart->pid, (restart->kills ? SIGKILL : SIGTERM));
353 restart->kills++;
354 restart->t_kill = NULL;
355 thread_add_timer(master, restart_kill, restart, gs.restart_timeout,
356 &restart->t_kill);
357 return 0;
358 }
359
360 static struct restart_info *find_child(pid_t child)
361 {
362 struct daemon *dmn;
363 if (gs.restart.pid == child)
364 return &gs.restart;
365
366 for (dmn = gs.daemons; dmn; dmn = dmn->next) {
367 if (dmn->restart.pid == child)
368 return &dmn->restart;
369 }
370 return NULL;
371 }
372
373 static void sigchild(void)
374 {
375 pid_t child;
376 int status;
377 const char *name;
378 const char *what;
379 struct restart_info *restart;
380 struct daemon *dmn;
381
382 switch (child = waitpid(-1, &status, WNOHANG)) {
383 case -1:
384 flog_err_sys(EC_LIB_SYSTEM_CALL, "waitpid failed: %s",
385 safe_strerror(errno));
386 return;
387 case 0:
388 zlog_warn("SIGCHLD received, but waitpid did not reap a child");
389 return;
390 }
391
392 if (child == integrated_write_pid) {
393 integrated_write_sigchld(status);
394 return;
395 }
396
397 if ((restart = find_child(child)) != NULL) {
398 name = restart->name;
399 what = restart->what;
400 restart->pid = 0;
401 gs.numpids--;
402 thread_cancel(restart->t_kill);
403 restart->t_kill = NULL;
404 /* Update restart time to reflect the time the command
405 * completed. */
406 gettimeofday(&restart->time, NULL);
407 } else {
408 flog_err_sys(
409 EC_LIB_SYSTEM_CALL,
410 "waitpid returned status for an unknown child process %d",
411 (int)child);
412 name = "(unknown)";
413 what = "background";
414 }
415 if (WIFSTOPPED(status))
416 zlog_warn("warning: %s %s process %d is stopped", what, name,
417 (int)child);
418 else if (WIFSIGNALED(status))
419 zlog_warn("%s %s process %d terminated due to signal %d", what,
420 name, (int)child, WTERMSIG(status));
421 else if (WIFEXITED(status)) {
422 if (WEXITSTATUS(status) != 0)
423 zlog_warn(
424 "%s %s process %d exited with non-zero status %d",
425 what, name, (int)child, WEXITSTATUS(status));
426 else {
427 zlog_debug("%s %s process %d exited normally", what,
428 name, (int)child);
429
430 if (restart && restart != &gs.restart) {
431 dmn = container_of(restart, struct daemon,
432 restart);
433 restart_done(dmn);
434 } else if (restart)
435 for (dmn = gs.daemons; dmn; dmn = dmn->next)
436 restart_done(dmn);
437 }
438 } else
439 flog_err_sys(
440 EC_LIB_SYSTEM_CALL,
441 "cannot interpret %s %s process %d wait status 0x%x",
442 what, name, (int)child, status);
443 phase_check();
444 }
445
446 static int run_job(struct restart_info *restart, const char *cmdtype,
447 const char *command, int force, int update_interval)
448 {
449 struct timeval delay;
450
451 if (gs.loglevel > LOG_DEBUG + 1)
452 zlog_debug("attempting to %s %s", cmdtype, restart->name);
453
454 if (restart->pid) {
455 if (gs.loglevel > LOG_DEBUG + 1)
456 zlog_debug(
457 "cannot %s %s, previous pid %d still running",
458 cmdtype, restart->name, (int)restart->pid);
459 return -1;
460 }
461
462 #if defined HAVE_SYSTEMD
463 char buffer[512];
464
465 snprintf(buffer, sizeof(buffer), "restarting %s", restart->name);
466 systemd_send_status(buffer);
467 #endif
468
469 /* Note: time_elapsed test must come before the force test, since we
470 need
471 to make sure that delay is initialized for use below in updating the
472 restart interval. */
473 if ((time_elapsed(&delay, &restart->time)->tv_sec < restart->interval)
474 && !force) {
475
476 if (gs.loglevel > LOG_DEBUG + 1)
477 zlog_debug(
478 "postponing %s %s: elapsed time %ld < retry interval %ld",
479 cmdtype, restart->name, (long)delay.tv_sec,
480 restart->interval);
481 return -1;
482 }
483
484 gettimeofday(&restart->time, NULL);
485 restart->kills = 0;
486 {
487 char cmd[strlen(command) + strlen(restart->name) + 1];
488 snprintf(cmd, sizeof(cmd), command, restart->name);
489 if ((restart->pid = run_background(cmd)) > 0) {
490 restart->t_kill = NULL;
491 thread_add_timer(master, restart_kill, restart,
492 gs.restart_timeout, &restart->t_kill);
493 restart->what = cmdtype;
494 gs.numpids++;
495 } else
496 restart->pid = 0;
497 }
498
499 #if defined HAVE_SYSTEMD
500 systemd_send_status("FRR Operational");
501 #endif
502 /* Calculate the new restart interval. */
503 if (update_interval) {
504 if (delay.tv_sec > 2 * gs.max_restart_interval)
505 restart->interval = gs.min_restart_interval;
506 else if ((restart->interval *= 2) > gs.max_restart_interval)
507 restart->interval = gs.max_restart_interval;
508 if (gs.loglevel > LOG_DEBUG + 1)
509 zlog_debug("restart %s interval is now %ld",
510 restart->name, restart->interval);
511 }
512 return restart->pid;
513 }
514
515 #define SET_READ_HANDLER(DMN) \
516 do { \
517 (DMN)->t_read = NULL; \
518 thread_add_read(master, handle_read, (DMN), (DMN)->fd, \
519 &(DMN)->t_read); \
520 } while (0);
521
522 #define SET_WAKEUP_DOWN(DMN) \
523 do { \
524 (DMN)->t_wakeup = NULL; \
525 thread_add_timer_msec(master, wakeup_down, (DMN), \
526 FUZZY(gs.period), &(DMN)->t_wakeup); \
527 } while (0);
528
529 #define SET_WAKEUP_UNRESPONSIVE(DMN) \
530 do { \
531 (DMN)->t_wakeup = NULL; \
532 thread_add_timer_msec(master, wakeup_unresponsive, (DMN), \
533 FUZZY(gs.period), &(DMN)->t_wakeup); \
534 } while (0);
535
536 #define SET_WAKEUP_ECHO(DMN) \
537 do { \
538 (DMN)->t_wakeup = NULL; \
539 thread_add_timer_msec(master, wakeup_send_echo, (DMN), \
540 FUZZY(gs.period), &(DMN)->t_wakeup); \
541 } while (0);
542
543 static int wakeup_down(struct thread *t_wakeup)
544 {
545 struct daemon *dmn = THREAD_ARG(t_wakeup);
546
547 dmn->t_wakeup = NULL;
548 if (try_connect(dmn) < 0)
549 SET_WAKEUP_DOWN(dmn);
550 if ((dmn->connect_tries > 1) && (dmn->state != DAEMON_UP))
551 try_restart(dmn);
552 return 0;
553 }
554
555 static int wakeup_init(struct thread *t_wakeup)
556 {
557 struct daemon *dmn = THREAD_ARG(t_wakeup);
558
559 dmn->t_wakeup = NULL;
560 if (try_connect(dmn) < 0) {
561 zlog_info(
562 "%s state -> down : initial connection attempt failed",
563 dmn->name);
564 dmn->state = DAEMON_DOWN;
565 }
566 phase_check();
567 return 0;
568 }
569
570 static void restart_done(struct daemon *dmn)
571 {
572 if (dmn->state != DAEMON_DOWN) {
573 zlog_warn(
574 "Daemon: %s: is in %s state but expected it to be in DAEMON_DOWN state",
575 dmn->name, state_str[dmn->state]);
576 return;
577 }
578 if (dmn->t_wakeup)
579 THREAD_OFF(dmn->t_wakeup);
580 if (try_connect(dmn) < 0)
581 SET_WAKEUP_DOWN(dmn);
582 }
583
584 static void daemon_down(struct daemon *dmn, const char *why)
585 {
586 if (IS_UP(dmn) || (dmn->state == DAEMON_INIT))
587 flog_err(EC_WATCHFRR_CONNECTION, "%s state -> down : %s",
588 dmn->name, why);
589 else if (gs.loglevel > LOG_DEBUG)
590 zlog_debug("%s still down : %s", dmn->name, why);
591 if (IS_UP(dmn))
592 gs.numdown++;
593 dmn->state = DAEMON_DOWN;
594 if (dmn->fd >= 0) {
595 close(dmn->fd);
596 dmn->fd = -1;
597 }
598 THREAD_OFF(dmn->t_read);
599 THREAD_OFF(dmn->t_write);
600 THREAD_OFF(dmn->t_wakeup);
601 if (try_connect(dmn) < 0)
602 SET_WAKEUP_DOWN(dmn);
603 phase_check();
604 }
605
606 static int handle_read(struct thread *t_read)
607 {
608 struct daemon *dmn = THREAD_ARG(t_read);
609 static const char resp[sizeof(PING_TOKEN) + 4] = PING_TOKEN "\n";
610 char buf[sizeof(resp) + 100];
611 ssize_t rc;
612 struct timeval delay;
613
614 dmn->t_read = NULL;
615 if ((rc = read(dmn->fd, buf, sizeof(buf))) < 0) {
616 char why[100];
617
618 if (ERRNO_IO_RETRY(errno)) {
619 /* Pretend it never happened. */
620 SET_READ_HANDLER(dmn);
621 return 0;
622 }
623 snprintf(why, sizeof(why), "unexpected read error: %s",
624 safe_strerror(errno));
625 daemon_down(dmn, why);
626 return 0;
627 }
628 if (rc == 0) {
629 daemon_down(dmn, "read returned EOF");
630 return 0;
631 }
632 if (!dmn->echo_sent.tv_sec) {
633 char why[sizeof(buf) + 100];
634 snprintf(why, sizeof(why),
635 "unexpected read returns %d bytes: %.*s", (int)rc,
636 (int)rc, buf);
637 daemon_down(dmn, why);
638 return 0;
639 }
640
641 /* We are expecting an echo response: is there any chance that the
642 response would not be returned entirely in the first read? That
643 seems inconceivable... */
644 if ((rc != sizeof(resp)) || memcmp(buf, resp, sizeof(resp))) {
645 char why[100 + sizeof(buf)];
646 snprintf(why, sizeof(why),
647 "read returned bad echo response of %d bytes (expecting %u): %.*s",
648 (int)rc, (unsigned int)sizeof(resp), (int)rc, buf);
649 daemon_down(dmn, why);
650 return 0;
651 }
652
653 time_elapsed(&delay, &dmn->echo_sent);
654 dmn->echo_sent.tv_sec = 0;
655 if (dmn->state == DAEMON_UNRESPONSIVE) {
656 if (delay.tv_sec < gs.timeout) {
657 dmn->state = DAEMON_UP;
658 zlog_warn(
659 "%s state -> up : echo response received after %ld.%06ld seconds",
660 dmn->name, (long)delay.tv_sec,
661 (long)delay.tv_usec);
662 } else
663 zlog_warn(
664 "%s: slow echo response finally received after %ld.%06ld seconds",
665 dmn->name, (long)delay.tv_sec,
666 (long)delay.tv_usec);
667 } else if (gs.loglevel > LOG_DEBUG + 1)
668 zlog_debug("%s: echo response received after %ld.%06ld seconds",
669 dmn->name, (long)delay.tv_sec, (long)delay.tv_usec);
670
671 SET_READ_HANDLER(dmn);
672 if (dmn->t_wakeup)
673 thread_cancel(dmn->t_wakeup);
674 SET_WAKEUP_ECHO(dmn);
675
676 return 0;
677 }
678
679 /*
680 * Wait till we notice that all daemons are ready before
681 * we send we are ready to systemd
682 */
683 static void daemon_send_ready(int exitcode)
684 {
685 FILE *fp;
686 static int sent = 0;
687 char started[1024];
688
689 if (sent)
690 return;
691
692 if (exitcode == 0)
693 zlog_notice("all daemons up, doing startup-complete notify");
694 else if (gs.numdown < gs.numdaemons)
695 flog_err(EC_WATCHFRR_CONNECTION,
696 "startup did not complete within timeout (%d/%d daemons running)",
697 gs.numdaemons - gs.numdown, gs.numdaemons);
698 else {
699 flog_err(EC_WATCHFRR_CONNECTION,
700 "all configured daemons failed to start -- exiting watchfrr");
701 exit(exitcode);
702
703 }
704
705 frr_detach();
706
707 snprintf(started, sizeof(started), "%s%s", frr_vtydir,
708 "watchfrr.started");
709 fp = fopen(started, "w");
710 if (fp)
711 fclose(fp);
712 #if defined HAVE_SYSTEMD
713 systemd_send_started(master, 0);
714 systemd_send_status("FRR Operational");
715 #endif
716 sent = 1;
717 }
718
719 static void daemon_up(struct daemon *dmn, const char *why)
720 {
721 dmn->state = DAEMON_UP;
722 gs.numdown--;
723 dmn->connect_tries = 0;
724 zlog_notice("%s state -> up : %s", dmn->name, why);
725 if (gs.numdown == 0)
726 daemon_send_ready(0);
727 SET_WAKEUP_ECHO(dmn);
728 phase_check();
729 }
730
731 static int check_connect(struct thread *t_write)
732 {
733 struct daemon *dmn = THREAD_ARG(t_write);
734 int sockerr;
735 socklen_t reslen = sizeof(sockerr);
736
737 dmn->t_write = NULL;
738 if (getsockopt(dmn->fd, SOL_SOCKET, SO_ERROR, (char *)&sockerr, &reslen)
739 < 0) {
740 zlog_warn("%s: check_connect: getsockopt failed: %s", dmn->name,
741 safe_strerror(errno));
742 daemon_down(dmn,
743 "getsockopt failed checking connection success");
744 return 0;
745 }
746 if ((reslen == sizeof(sockerr)) && sockerr) {
747 char why[100];
748 snprintf(
749 why, sizeof(why),
750 "getsockopt reports that connection attempt failed: %s",
751 safe_strerror(sockerr));
752 daemon_down(dmn, why);
753 return 0;
754 }
755
756 daemon_up(dmn, "delayed connect succeeded");
757 return 0;
758 }
759
760 static int wakeup_connect_hanging(struct thread *t_wakeup)
761 {
762 struct daemon *dmn = THREAD_ARG(t_wakeup);
763 char why[100];
764
765 dmn->t_wakeup = NULL;
766 snprintf(why, sizeof(why),
767 "connection attempt timed out after %ld seconds", gs.timeout);
768 daemon_down(dmn, why);
769 return 0;
770 }
771
772 /* Making connection to protocol daemon. */
773 static int try_connect(struct daemon *dmn)
774 {
775 int sock;
776 struct sockaddr_un addr;
777 socklen_t len;
778
779 if (gs.loglevel > LOG_DEBUG + 1)
780 zlog_debug("%s: attempting to connect", dmn->name);
781 dmn->connect_tries++;
782
783 memset(&addr, 0, sizeof(struct sockaddr_un));
784 addr.sun_family = AF_UNIX;
785 snprintf(addr.sun_path, sizeof(addr.sun_path), "%s/%s.vty", gs.vtydir,
786 dmn->name);
787 #ifdef HAVE_STRUCT_SOCKADDR_UN_SUN_LEN
788 len = addr.sun_len = SUN_LEN(&addr);
789 #else
790 len = sizeof(addr.sun_family) + strlen(addr.sun_path);
791 #endif /* HAVE_STRUCT_SOCKADDR_UN_SUN_LEN */
792
793 /* Quick check to see if we might succeed before we go to the trouble
794 of creating a socket. */
795 if (access(addr.sun_path, W_OK) < 0) {
796 if (errno != ENOENT)
797 flog_err_sys(EC_LIB_SYSTEM_CALL,
798 "%s: access to socket %s denied: %s",
799 dmn->name, addr.sun_path,
800 safe_strerror(errno));
801 return -1;
802 }
803
804 if ((sock = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) {
805 flog_err_sys(EC_LIB_SOCKET, "%s(%s): cannot make socket: %s",
806 __func__, addr.sun_path, safe_strerror(errno));
807 return -1;
808 }
809
810 if (set_nonblocking(sock) < 0 || set_cloexec(sock) < 0) {
811 flog_err_sys(EC_LIB_SYSTEM_CALL,
812 "%s(%s): set_nonblocking/cloexec(%d) failed",
813 __func__, addr.sun_path, sock);
814 close(sock);
815 return -1;
816 }
817
818 if (connect(sock, (struct sockaddr *)&addr, len) < 0) {
819 if ((errno != EINPROGRESS) && (errno != EWOULDBLOCK)) {
820 if (gs.loglevel > LOG_DEBUG)
821 zlog_debug("%s(%s): connect failed: %s",
822 __func__, addr.sun_path,
823 safe_strerror(errno));
824 close(sock);
825 return -1;
826 }
827 if (gs.loglevel > LOG_DEBUG)
828 zlog_debug("%s: connection in progress", dmn->name);
829 dmn->state = DAEMON_CONNECTING;
830 dmn->fd = sock;
831 dmn->t_write = NULL;
832 thread_add_write(master, check_connect, dmn, dmn->fd,
833 &dmn->t_write);
834 dmn->t_wakeup = NULL;
835 thread_add_timer(master, wakeup_connect_hanging, dmn,
836 gs.timeout, &dmn->t_wakeup);
837 SET_READ_HANDLER(dmn);
838 return 0;
839 }
840
841 dmn->fd = sock;
842 SET_READ_HANDLER(dmn);
843 daemon_up(dmn, "connect succeeded");
844 return 1;
845 }
846
847 static int phase_hanging(struct thread *t_hanging)
848 {
849 gs.t_phase_hanging = NULL;
850 flog_err(EC_WATCHFRR_CONNECTION,
851 "Phase [%s] hanging for %ld seconds, aborting phased restart",
852 phase_str[gs.phase], PHASE_TIMEOUT);
853 gs.phase = PHASE_NONE;
854 return 0;
855 }
856
857 static void set_phase(restart_phase_t new_phase)
858 {
859 gs.phase = new_phase;
860 if (gs.t_phase_hanging)
861 thread_cancel(gs.t_phase_hanging);
862 gs.t_phase_hanging = NULL;
863 thread_add_timer(master, phase_hanging, NULL, PHASE_TIMEOUT,
864 &gs.t_phase_hanging);
865 }
866
867 static void phase_check(void)
868 {
869 struct daemon *dmn;
870
871 switch (gs.phase) {
872 case PHASE_NONE:
873 break;
874
875 case PHASE_INIT:
876 for (dmn = gs.daemons; dmn; dmn = dmn->next)
877 if (dmn->state == DAEMON_INIT)
878 return;
879
880 /* startup complete, everything out of INIT */
881 gs.phase = PHASE_NONE;
882 for (dmn = gs.daemons; dmn; dmn = dmn->next)
883 if (dmn->state == DAEMON_DOWN) {
884 SET_WAKEUP_DOWN(dmn);
885 try_restart(dmn);
886 }
887 break;
888 case PHASE_STOPS_PENDING:
889 if (gs.numpids)
890 break;
891 zlog_info(
892 "Phased restart: all routing daemon stop jobs have completed.");
893 set_phase(PHASE_WAITING_DOWN);
894
895 /*FALLTHRU*/
896 case PHASE_WAITING_DOWN:
897 if (gs.numdown + IS_UP(gs.special) < gs.numdaemons)
898 break;
899 zlog_info("Phased restart: all routing daemons now down.");
900 run_job(&gs.special->restart, "restart", gs.restart_command, 1,
901 1);
902 set_phase(PHASE_ZEBRA_RESTART_PENDING);
903
904 /*FALLTHRU*/
905 case PHASE_ZEBRA_RESTART_PENDING:
906 if (gs.special->restart.pid)
907 break;
908 zlog_info("Phased restart: %s restart job completed.",
909 gs.special->name);
910 set_phase(PHASE_WAITING_ZEBRA_UP);
911
912 /*FALLTHRU*/
913 case PHASE_WAITING_ZEBRA_UP:
914 if (!IS_UP(gs.special))
915 break;
916 zlog_info("Phased restart: %s is now up.", gs.special->name);
917 {
918 struct daemon *dmn;
919 for (dmn = gs.daemons; dmn; dmn = dmn->next) {
920 if (dmn != gs.special)
921 run_job(&dmn->restart, "start",
922 gs.start_command, 1, 0);
923 }
924 }
925 gs.phase = PHASE_NONE;
926 THREAD_OFF(gs.t_phase_hanging);
927 zlog_notice("Phased global restart has completed.");
928 break;
929 }
930 }
931
932 static void try_restart(struct daemon *dmn)
933 {
934 if (watch_only)
935 return;
936
937 if (dmn != gs.special) {
938 if ((gs.special->state == DAEMON_UP)
939 && (gs.phase == PHASE_NONE))
940 run_job(&dmn->restart, "restart", gs.restart_command, 0,
941 1);
942 else
943 zlog_debug(
944 "%s: postponing restart attempt because master %s daemon not up [%s], or phased restart in progress",
945 dmn->name, gs.special->name,
946 state_str[gs.special->state]);
947 return;
948 }
949
950 if ((gs.phase != PHASE_NONE) || gs.numpids) {
951 if (gs.loglevel > LOG_DEBUG + 1)
952 zlog_debug(
953 "postponing phased global restart: restart already in progress [%s], or outstanding child processes [%d]",
954 phase_str[gs.phase], gs.numpids);
955 return;
956 }
957 /* Is it too soon for a restart? */
958 {
959 struct timeval delay;
960 if (time_elapsed(&delay, &gs.special->restart.time)->tv_sec
961 < gs.special->restart.interval) {
962 if (gs.loglevel > LOG_DEBUG + 1)
963 zlog_debug(
964 "postponing phased global restart: elapsed time %ld < retry interval %ld",
965 (long)delay.tv_sec,
966 gs.special->restart.interval);
967 return;
968 }
969 }
970 run_job(&gs.restart, "restart", gs.restart_command, 0, 1);
971 }
972
973 static int wakeup_unresponsive(struct thread *t_wakeup)
974 {
975 struct daemon *dmn = THREAD_ARG(t_wakeup);
976
977 dmn->t_wakeup = NULL;
978 if (dmn->state != DAEMON_UNRESPONSIVE)
979 flog_err(EC_WATCHFRR_CONNECTION,
980 "%s: no longer unresponsive (now %s), wakeup should have been cancelled!",
981 dmn->name, state_str[dmn->state]);
982 else {
983 SET_WAKEUP_UNRESPONSIVE(dmn);
984 try_restart(dmn);
985 }
986 return 0;
987 }
988
989 static int wakeup_no_answer(struct thread *t_wakeup)
990 {
991 struct daemon *dmn = THREAD_ARG(t_wakeup);
992
993 dmn->t_wakeup = NULL;
994 dmn->state = DAEMON_UNRESPONSIVE;
995 if (dmn->ignore_timeout)
996 return 0;
997 flog_err(EC_WATCHFRR_CONNECTION,
998 "%s state -> unresponsive : no response yet to ping sent %ld seconds ago",
999 dmn->name, gs.timeout);
1000 SET_WAKEUP_UNRESPONSIVE(dmn);
1001 try_restart(dmn);
1002 return 0;
1003 }
1004
1005 static int wakeup_send_echo(struct thread *t_wakeup)
1006 {
1007 static const char echocmd[] = "echo " PING_TOKEN;
1008 ssize_t rc;
1009 struct daemon *dmn = THREAD_ARG(t_wakeup);
1010
1011 dmn->t_wakeup = NULL;
1012 if (((rc = write(dmn->fd, echocmd, sizeof(echocmd))) < 0)
1013 || ((size_t)rc != sizeof(echocmd))) {
1014 char why[100 + sizeof(echocmd)];
1015 snprintf(why, sizeof(why),
1016 "write '%s' returned %d instead of %u", echocmd,
1017 (int)rc, (unsigned int)sizeof(echocmd));
1018 daemon_down(dmn, why);
1019 } else {
1020 gettimeofday(&dmn->echo_sent, NULL);
1021 dmn->t_wakeup = NULL;
1022 thread_add_timer(master, wakeup_no_answer, dmn, gs.timeout,
1023 &dmn->t_wakeup);
1024 }
1025 return 0;
1026 }
1027
1028 bool check_all_up(void)
1029 {
1030 struct daemon *dmn;
1031
1032 for (dmn = gs.daemons; dmn; dmn = dmn->next)
1033 if (dmn->state != DAEMON_UP)
1034 return false;
1035 return true;
1036 }
1037
1038 void watchfrr_status(struct vty *vty)
1039 {
1040 struct daemon *dmn;
1041 struct timeval delay;
1042
1043 vty_out(vty, "watchfrr global phase: %s\n", phase_str[gs.phase]);
1044 if (gs.restart.pid)
1045 vty_out(vty, " global restart running, pid %ld\n",
1046 (long)gs.restart.pid);
1047
1048 for (dmn = gs.daemons; dmn; dmn = dmn->next) {
1049 vty_out(vty, " %-20s %s%s", dmn->name, state_str[dmn->state],
1050 dmn->ignore_timeout ? "/Ignoring Timeout\n" : "\n");
1051 if (dmn->restart.pid)
1052 vty_out(vty, " restart running, pid %ld\n",
1053 (long)dmn->restart.pid);
1054 else if (dmn->state == DAEMON_DOWN &&
1055 time_elapsed(&delay, &dmn->restart.time)->tv_sec
1056 < dmn->restart.interval)
1057 vty_out(vty, " restarting in %jd seconds (%jds backoff interval)\n",
1058 (intmax_t)dmn->restart.interval
1059 - (intmax_t)delay.tv_sec,
1060 (intmax_t)dmn->restart.interval);
1061 }
1062 }
1063
1064 static void sigint(void)
1065 {
1066 zlog_notice("Terminating on signal");
1067 systemd_send_stopping();
1068 exit(0);
1069 }
1070
1071 static int valid_command(const char *cmd)
1072 {
1073 char *p;
1074
1075 return ((p = strchr(cmd, '%')) != NULL) && (*(p + 1) == 's')
1076 && !strchr(p + 1, '%');
1077 }
1078
1079 /* This is an ugly hack to circumvent problems with passing command-line
1080 arguments that contain spaces. The fix is to use a configuration file. */
1081 static char *translate_blanks(const char *cmd, const char *blankstr)
1082 {
1083 char *res;
1084 char *p;
1085 size_t bslen = strlen(blankstr);
1086
1087 if (!(res = strdup(cmd))) {
1088 perror("strdup");
1089 exit(1);
1090 }
1091 while ((p = strstr(res, blankstr)) != NULL) {
1092 *p = ' ';
1093 if (bslen != 1)
1094 memmove(p + 1, p + bslen, strlen(p + bslen) + 1);
1095 }
1096 return res;
1097 }
1098
1099 static int startup_timeout(struct thread *t_wakeup)
1100 {
1101 daemon_send_ready(1);
1102 return 0;
1103 }
1104
1105 static void watchfrr_init(int argc, char **argv)
1106 {
1107 const char *special = "zebra";
1108 int i;
1109 struct daemon *dmn, **add = &gs.daemons;
1110 char alldaemons[512] = "", *p = alldaemons;
1111
1112 thread_add_timer_msec(master, startup_timeout, NULL, STARTUP_TIMEOUT,
1113 &gs.t_startup_timeout);
1114
1115 for (i = optind; i < argc; i++) {
1116 dmn = XCALLOC(MTYPE_WATCHFRR_DAEMON, sizeof(*dmn));
1117
1118 dmn->name = dmn->restart.name = argv[i];
1119 dmn->state = DAEMON_INIT;
1120 gs.numdaemons++;
1121 gs.numdown++;
1122 dmn->fd = -1;
1123 dmn->t_wakeup = NULL;
1124 thread_add_timer_msec(master, wakeup_init, dmn, 0,
1125 &dmn->t_wakeup);
1126 dmn->restart.interval = gs.min_restart_interval;
1127 *add = dmn;
1128 add = &dmn->next;
1129
1130 if (!strcmp(dmn->name, special))
1131 gs.special = dmn;
1132 }
1133
1134 if (!gs.daemons) {
1135 fprintf(stderr,
1136 "Must specify one or more daemons to monitor.\n\n");
1137 frr_help_exit(1);
1138 }
1139 if (!watch_only && !gs.special) {
1140 fprintf(stderr, "\"%s\" daemon must be in daemon lists\n\n",
1141 special);
1142 frr_help_exit(1);
1143 }
1144
1145 for (dmn = gs.daemons; dmn; dmn = dmn->next) {
1146 snprintf(p, alldaemons + sizeof(alldaemons) - p, "%s%s",
1147 (p == alldaemons) ? "" : " ", dmn->name);
1148 p += strlen(p);
1149 }
1150 zlog_notice("%s %s watching [%s]%s", progname, FRR_VERSION, alldaemons,
1151 watch_only ? ", monitor mode" : "");
1152 }
1153
1154 struct zebra_privs_t watchfrr_privs = {
1155 #ifdef VTY_GROUP
1156 .vty_group = VTY_GROUP,
1157 #endif
1158 };
1159
1160 static struct quagga_signal_t watchfrr_signals[] = {
1161 {
1162 .signal = SIGINT,
1163 .handler = sigint,
1164 },
1165 {
1166 .signal = SIGTERM,
1167 .handler = sigint,
1168 },
1169 {
1170 .signal = SIGCHLD,
1171 .handler = sigchild,
1172 },
1173 };
1174
1175 FRR_DAEMON_INFO(watchfrr, WATCHFRR,
1176 .flags = FRR_NO_PRIVSEP | FRR_NO_TCPVTY | FRR_LIMITED_CLI
1177 | FRR_NO_CFG_PID_DRY | FRR_NO_ZCLIENT
1178 | FRR_DETACH_LATER,
1179
1180 .printhelp = printhelp,
1181 .copyright = "Copyright 2004 Andrew J. Schorr",
1182
1183 .signals = watchfrr_signals,
1184 .n_signals = array_size(watchfrr_signals),
1185
1186 .privs = &watchfrr_privs, )
1187
1188 #define DEPRECATED_OPTIONS "aAezR:"
1189
1190 int main(int argc, char **argv)
1191 {
1192 int opt;
1193 const char *blankstr = NULL;
1194
1195 frr_preinit(&watchfrr_di, argc, argv);
1196 progname = watchfrr_di.progname;
1197
1198 frr_opt_add("b:dk:l:i:p:r:S:s:t:T:" DEPRECATED_OPTIONS, longopts, "");
1199
1200 gs.restart.name = "all";
1201 while ((opt = frr_getopt(argc, argv, NULL)) != EOF) {
1202 if (opt && opt < 128 && strchr(DEPRECATED_OPTIONS, opt)) {
1203 fprintf(stderr,
1204 "The -%c option no longer exists.\n"
1205 "Please refer to the watchfrr(8) man page.\n",
1206 opt);
1207 exit(1);
1208 }
1209
1210 switch (opt) {
1211 case 0:
1212 break;
1213 case 'b':
1214 blankstr = optarg;
1215 break;
1216 case OPTION_DRY:
1217 watch_only = true;
1218 break;
1219 case 'k':
1220 if (!valid_command(optarg)) {
1221 fprintf(stderr,
1222 "Invalid kill command, must contain '%%s': %s\n",
1223 optarg);
1224 frr_help_exit(1);
1225 }
1226 gs.stop_command = optarg;
1227 break;
1228 case 'l': {
1229 char garbage[3];
1230 if ((sscanf(optarg, "%d%1s", &gs.loglevel, garbage)
1231 != 1)
1232 || (gs.loglevel < LOG_EMERG)) {
1233 fprintf(stderr,
1234 "Invalid loglevel argument: %s\n",
1235 optarg);
1236 frr_help_exit(1);
1237 }
1238 } break;
1239 case OPTION_MINRESTART: {
1240 char garbage[3];
1241 if ((sscanf(optarg, "%ld%1s", &gs.min_restart_interval,
1242 garbage)
1243 != 1)
1244 || (gs.min_restart_interval < 0)) {
1245 fprintf(stderr,
1246 "Invalid min_restart_interval argument: %s\n",
1247 optarg);
1248 frr_help_exit(1);
1249 }
1250 } break;
1251 case OPTION_MAXRESTART: {
1252 char garbage[3];
1253 if ((sscanf(optarg, "%ld%1s", &gs.max_restart_interval,
1254 garbage)
1255 != 1)
1256 || (gs.max_restart_interval < 0)) {
1257 fprintf(stderr,
1258 "Invalid max_restart_interval argument: %s\n",
1259 optarg);
1260 frr_help_exit(1);
1261 }
1262 } break;
1263 case 'i': {
1264 char garbage[3];
1265 int period;
1266 if ((sscanf(optarg, "%d%1s", &period, garbage) != 1)
1267 || (gs.period < 1)) {
1268 fprintf(stderr,
1269 "Invalid interval argument: %s\n",
1270 optarg);
1271 frr_help_exit(1);
1272 }
1273 gs.period = 1000 * period;
1274 } break;
1275 case 'p':
1276 watchfrr_di.pid_file = optarg;
1277 break;
1278 case 'r':
1279 if (!valid_command(optarg)) {
1280 fprintf(stderr,
1281 "Invalid restart command, must contain '%%s': %s\n",
1282 optarg);
1283 frr_help_exit(1);
1284 }
1285 gs.restart_command = optarg;
1286 break;
1287 case 's':
1288 if (!valid_command(optarg)) {
1289 fprintf(stderr,
1290 "Invalid start command, must contain '%%s': %s\n",
1291 optarg);
1292 frr_help_exit(1);
1293 }
1294 gs.start_command = optarg;
1295 break;
1296 case 'S':
1297 gs.vtydir = optarg;
1298 break;
1299 case 't': {
1300 char garbage[3];
1301 if ((sscanf(optarg, "%ld%1s", &gs.timeout, garbage)
1302 != 1)
1303 || (gs.timeout < 1)) {
1304 fprintf(stderr,
1305 "Invalid timeout argument: %s\n",
1306 optarg);
1307 frr_help_exit(1);
1308 }
1309 } break;
1310 case 'T': {
1311 char garbage[3];
1312 if ((sscanf(optarg, "%ld%1s", &gs.restart_timeout,
1313 garbage)
1314 != 1)
1315 || (gs.restart_timeout < 1)) {
1316 fprintf(stderr,
1317 "Invalid restart timeout argument: %s\n",
1318 optarg);
1319 frr_help_exit(1);
1320 }
1321 } break;
1322 default:
1323 fputs("Invalid option.\n", stderr);
1324 frr_help_exit(1);
1325 }
1326 }
1327
1328 if (watch_only
1329 && (gs.start_command || gs.stop_command || gs.restart_command)) {
1330 fputs("Options -r/-s/-k are not used when --dry is active.\n",
1331 stderr);
1332 }
1333 if (!watch_only
1334 && (!gs.restart_command || !gs.start_command || !gs.stop_command)) {
1335 fprintf(stderr,
1336 "Options -s (start), -k (kill), and -r (restart) are required.\n");
1337 frr_help_exit(1);
1338 }
1339
1340 if (blankstr) {
1341 if (gs.restart_command)
1342 gs.restart_command =
1343 translate_blanks(gs.restart_command, blankstr);
1344 if (gs.start_command)
1345 gs.start_command =
1346 translate_blanks(gs.start_command, blankstr);
1347 if (gs.stop_command)
1348 gs.stop_command =
1349 translate_blanks(gs.stop_command, blankstr);
1350 }
1351
1352 gs.restart.interval = gs.min_restart_interval;
1353
1354 master = frr_init();
1355 watchfrr_error_init();
1356 watchfrr_init(argc, argv);
1357 watchfrr_vty_init();
1358
1359 frr_config_fork();
1360
1361 if (watchfrr_di.daemon_mode)
1362 zlog_syslog_set_prio_min(MIN(gs.loglevel, LOG_DEBUG));
1363 else
1364 zlog_aux_init(NULL, MIN(gs.loglevel, LOG_DEBUG));
1365
1366 frr_run(master);
1367
1368 systemd_send_stopping();
1369 /* Not reached. */
1370 return 0;
1371 }