]> git.proxmox.com Git - mirror_frr.git/blob - watchfrr/watchfrr.c
Merge pull request #5389 from opensourcerouting/constify
[mirror_frr.git] / watchfrr / watchfrr.c
1 /*
2 * Monitor status of frr daemons and restart if necessary.
3 *
4 * Copyright (C) 2004 Andrew J. Schorr
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License along
17 * with this program; see the file COPYING; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #include <zebra.h>
22 #include <thread.h>
23 #include <log.h>
24 #include <network.h>
25 #include <sigevent.h>
26 #include <lib/version.h>
27 #include "command.h"
28 #include "memory_vty.h"
29 #include "libfrr.h"
30 #include "lib_errors.h"
31
32 #include <getopt.h>
33 #include <sys/un.h>
34 #include <sys/wait.h>
35 #include <memory.h>
36 #include <systemd.h>
37
38 #include "watchfrr.h"
39 #include "watchfrr_errors.h"
40
41 #ifndef MIN
42 #define MIN(X,Y) (((X) <= (Y)) ? (X) : (Y))
43 #endif
44
45 /* Macros to help randomize timers. */
46 #define JITTER(X) ((random() % ((X)+1))-((X)/2))
47 #define FUZZY(X) ((X)+JITTER((X)/20))
48
49 #define DEFAULT_PERIOD 5
50 #define DEFAULT_TIMEOUT 90
51 #define DEFAULT_RESTART_TIMEOUT 20
52 #define DEFAULT_LOGLEVEL LOG_INFO
53 #define DEFAULT_MIN_RESTART 60
54 #define DEFAULT_MAX_RESTART 600
55
56 #define DEFAULT_RESTART_CMD WATCHFRR_SH_PATH " restart %s"
57 #define DEFAULT_START_CMD WATCHFRR_SH_PATH " start %s"
58 #define DEFAULT_STOP_CMD WATCHFRR_SH_PATH " stop %s"
59
60 #define PING_TOKEN "PING"
61
62 DEFINE_MGROUP(WATCHFRR, "watchfrr")
63 DEFINE_MTYPE_STATIC(WATCHFRR, WATCHFRR_DAEMON, "watchfrr daemon entry")
64
65 /* Needs to be global, referenced somewhere inside libfrr. */
66 struct thread_master *master;
67
68 static bool watch_only = false;
69
70 typedef enum {
71 PHASE_NONE = 0,
72 PHASE_INIT,
73 PHASE_STOPS_PENDING,
74 PHASE_WAITING_DOWN,
75 PHASE_ZEBRA_RESTART_PENDING,
76 PHASE_WAITING_ZEBRA_UP
77 } restart_phase_t;
78
79 static const char *const phase_str[] = {
80 "Idle",
81 "Startup",
82 "Stop jobs running",
83 "Waiting for other daemons to come down",
84 "Zebra restart job running",
85 "Waiting for zebra to come up",
86 "Start jobs running",
87 };
88
89 #define PHASE_TIMEOUT (3*gs.restart_timeout)
90 #define STARTUP_TIMEOUT 55 * 1000
91
92 struct restart_info {
93 const char *name;
94 const char *what;
95 pid_t pid;
96 struct timeval time;
97 long interval;
98 struct thread *t_kill;
99 int kills;
100 };
101
102 static struct global_state {
103 restart_phase_t phase;
104 struct thread *t_phase_hanging;
105 struct thread *t_startup_timeout;
106 const char *vtydir;
107 long period;
108 long timeout;
109 long restart_timeout;
110 long min_restart_interval;
111 long max_restart_interval;
112 struct daemon *daemons;
113 const char *restart_command;
114 const char *start_command;
115 const char *stop_command;
116 struct restart_info restart;
117 int loglevel;
118 struct daemon *special; /* points to zebra when doing phased restart */
119 int numdaemons;
120 int numpids;
121 int numdown; /* # of daemons that are not UP or UNRESPONSIVE */
122 } gs = {
123 .phase = PHASE_INIT,
124 .vtydir = frr_vtydir,
125 .period = 1000 * DEFAULT_PERIOD,
126 .timeout = DEFAULT_TIMEOUT,
127 .restart_timeout = DEFAULT_RESTART_TIMEOUT,
128 .loglevel = DEFAULT_LOGLEVEL,
129 .min_restart_interval = DEFAULT_MIN_RESTART,
130 .max_restart_interval = DEFAULT_MAX_RESTART,
131 .restart_command = DEFAULT_RESTART_CMD,
132 .start_command = DEFAULT_START_CMD,
133 .stop_command = DEFAULT_STOP_CMD,
134 };
135
136 typedef enum {
137 DAEMON_INIT,
138 DAEMON_DOWN,
139 DAEMON_CONNECTING,
140 DAEMON_UP,
141 DAEMON_UNRESPONSIVE
142 } daemon_state_t;
143
144 #define IS_UP(DMN) \
145 (((DMN)->state == DAEMON_UP) || ((DMN)->state == DAEMON_UNRESPONSIVE))
146
147 static const char *const state_str[] = {
148 "Init", "Down", "Connecting", "Up", "Unresponsive",
149 };
150
151 struct daemon {
152 const char *name;
153 daemon_state_t state;
154 int fd;
155 struct timeval echo_sent;
156 unsigned int connect_tries;
157 struct thread *t_wakeup;
158 struct thread *t_read;
159 struct thread *t_write;
160 struct daemon *next;
161 struct restart_info restart;
162
163 /*
164 * For a given daemon, if we've turned on ignore timeouts
165 * ignore the timeout value and assume everything is ok
166 * This is for daemon debugging w/ gdb after we have started
167 * FRR and realize we have something that needs to be looked
168 * at
169 */
170 bool ignore_timeout;
171 };
172
173 #define OPTION_MINRESTART 2000
174 #define OPTION_MAXRESTART 2001
175 #define OPTION_DRY 2002
176
177 static const struct option longopts[] = {
178 {"daemon", no_argument, NULL, 'd'},
179 {"statedir", required_argument, NULL, 'S'},
180 {"loglevel", required_argument, NULL, 'l'},
181 {"interval", required_argument, NULL, 'i'},
182 {"timeout", required_argument, NULL, 't'},
183 {"restart-timeout", required_argument, NULL, 'T'},
184 {"restart", required_argument, NULL, 'r'},
185 {"start-command", required_argument, NULL, 's'},
186 {"kill-command", required_argument, NULL, 'k'},
187 {"dry", no_argument, NULL, OPTION_DRY},
188 {"min-restart-interval", required_argument, NULL, OPTION_MINRESTART},
189 {"max-restart-interval", required_argument, NULL, OPTION_MAXRESTART},
190 {"pid-file", required_argument, NULL, 'p'},
191 {"blank-string", required_argument, NULL, 'b'},
192 {"help", no_argument, NULL, 'h'},
193 {"version", no_argument, NULL, 'v'},
194 {NULL, 0, NULL, 0}};
195
196 static int try_connect(struct daemon *dmn);
197 static int wakeup_send_echo(struct thread *t_wakeup);
198 static void try_restart(struct daemon *dmn);
199 static void phase_check(void);
200 static void restart_done(struct daemon *dmn);
201
202 static const char *progname;
203
204 void watchfrr_set_ignore_daemon(struct vty *vty, const char *dname, bool ignore)
205 {
206 struct daemon *dmn;
207
208 for (dmn = gs.daemons; dmn; dmn = dmn->next) {
209 if (strncmp(dmn->name, dname, strlen(dmn->name)) == 0)
210 break;
211 }
212
213 if (dmn) {
214 dmn->ignore_timeout = ignore;
215 vty_out(vty, "%s switching to %s\n", dmn->name,
216 ignore ? "ignore" : "watch");
217 } else
218 vty_out(vty, "%s is not configured for running at the moment",
219 dname);
220 }
221
222 static void printhelp(FILE *target)
223 {
224 fprintf(target,
225 "Usage : %s [OPTION...] <daemon name> ...\n\n\
226 Watchdog program to monitor status of frr daemons and try to restart\n\
227 them if they are down or unresponsive. It determines whether a daemon is\n\
228 up based on whether it can connect to the daemon's vty unix stream socket.\n\
229 It then repeatedly sends echo commands over that socket to determine whether\n\
230 the daemon is responsive. If the daemon crashes, we will receive an EOF\n\
231 on the socket connection and know immediately that the daemon is down.\n\n\
232 The daemons to be monitored should be listed on the command line.\n\n\
233 In order to avoid attempting to restart the daemons in a fast loop,\n\
234 the -m and -M options allow you to control the minimum delay between\n\
235 restart commands. The minimum restart delay is recalculated each time\n\
236 a restart is attempted: if the time since the last restart attempt exceeds\n\
237 twice the -M value, then the restart delay is set to the -m value.\n\
238 Otherwise, the interval is doubled (but capped at the -M value).\n\n",
239 progname);
240
241 fprintf(target,
242 "Options:\n\
243 -d, --daemon Run in daemon mode. In this mode, error messages are sent\n\
244 to syslog instead of stdout.\n\
245 -S, --statedir Set the vty socket directory (default is %s)\n\
246 -l, --loglevel Set the logging level (default is %d).\n\
247 The value should range from %d (LOG_EMERG) to %d (LOG_DEBUG),\n\
248 but it can be set higher than %d if extra-verbose debugging\n\
249 messages are desired.\n\
250 --min-restart-interval\n\
251 Set the minimum seconds to wait between invocations of daemon\n\
252 restart commands (default is %d).\n\
253 --max-restart-interval\n\
254 Set the maximum seconds to wait between invocations of daemon\n\
255 restart commands (default is %d).\n\
256 -i, --interval Set the status polling interval in seconds (default is %d)\n\
257 -t, --timeout Set the unresponsiveness timeout in seconds (default is %d)\n\
258 -T, --restart-timeout\n\
259 Set the restart (kill) timeout in seconds (default is %d).\n\
260 If any background jobs are still running after this much\n\
261 time has elapsed, they will be killed.\n\
262 -r, --restart Supply a Bourne shell command to use to restart a single\n\
263 daemon. The command string should include '%%s' where the\n\
264 name of the daemon should be substituted.\n\
265 (default: '%s')\n\
266 -s, --start-command\n\
267 Supply a Bourne shell to command to use to start a single\n\
268 daemon. The command string should include '%%s' where the\n\
269 name of the daemon should be substituted.\n\
270 (default: '%s')\n\
271 -k, --kill-command\n\
272 Supply a Bourne shell to command to use to stop a single\n\
273 daemon. The command string should include '%%s' where the\n\
274 name of the daemon should be substituted.\n\
275 (default: '%s')\n\
276 --dry Do not start or restart anything, just log.\n\
277 -p, --pid-file Set process identifier file name\n\
278 (default is %s/watchfrr.pid).\n\
279 -b, --blank-string\n\
280 When the supplied argument string is found in any of the\n\
281 various shell command arguments (-r, -s, or -k), replace\n\
282 it with a space. This is an ugly hack to circumvent problems\n\
283 passing command-line arguments with embedded spaces.\n\
284 -v, --version Print program version\n\
285 -h, --help Display this help and exit\n",
286 frr_vtydir, DEFAULT_LOGLEVEL, LOG_EMERG, LOG_DEBUG, LOG_DEBUG,
287 DEFAULT_MIN_RESTART, DEFAULT_MAX_RESTART, DEFAULT_PERIOD,
288 DEFAULT_TIMEOUT, DEFAULT_RESTART_TIMEOUT,
289 DEFAULT_RESTART_CMD, DEFAULT_START_CMD, DEFAULT_STOP_CMD,
290 frr_vtydir);
291 }
292
293 static pid_t run_background(char *shell_cmd)
294 {
295 pid_t child;
296
297 switch (child = fork()) {
298 case -1:
299 flog_err_sys(EC_LIB_SYSTEM_CALL,
300 "fork failed, cannot run command [%s]: %s",
301 shell_cmd, safe_strerror(errno));
302 return -1;
303 case 0:
304 /* Child process. */
305 /* Use separate process group so child processes can be killed
306 * easily. */
307 if (setpgid(0, 0) < 0)
308 zlog_warn("warning: setpgid(0,0) failed: %s",
309 safe_strerror(errno));
310 {
311 char shell[] = "sh";
312 char dashc[] = "-c";
313 char *const argv[4] = {shell, dashc, shell_cmd, NULL};
314 execv("/bin/sh", argv);
315 flog_err_sys(EC_LIB_SYSTEM_CALL,
316 "execv(/bin/sh -c '%s') failed: %s",
317 shell_cmd, safe_strerror(errno));
318 _exit(127);
319 }
320 default:
321 /* Parent process: we will reap the child later. */
322 flog_err_sys(EC_LIB_SYSTEM_CALL,
323 "Forked background command [pid %d]: %s",
324 (int)child, shell_cmd);
325 return child;
326 }
327 }
328
329 static struct timeval *time_elapsed(struct timeval *result,
330 const struct timeval *start_time)
331 {
332 gettimeofday(result, NULL);
333 result->tv_sec -= start_time->tv_sec;
334 result->tv_usec -= start_time->tv_usec;
335 while (result->tv_usec < 0) {
336 result->tv_usec += 1000000L;
337 result->tv_sec--;
338 }
339 return result;
340 }
341
342 static int restart_kill(struct thread *t_kill)
343 {
344 struct restart_info *restart = THREAD_ARG(t_kill);
345 struct timeval delay;
346
347 time_elapsed(&delay, &restart->time);
348 zlog_warn(
349 "Warning: %s %s child process %d still running after "
350 "%ld seconds, sending signal %d",
351 restart->what, restart->name, (int)restart->pid,
352 (long)delay.tv_sec, (restart->kills ? SIGKILL : SIGTERM));
353 kill(-restart->pid, (restart->kills ? SIGKILL : SIGTERM));
354 restart->kills++;
355 restart->t_kill = NULL;
356 thread_add_timer(master, restart_kill, restart, gs.restart_timeout,
357 &restart->t_kill);
358 return 0;
359 }
360
361 static struct restart_info *find_child(pid_t child)
362 {
363 struct daemon *dmn;
364 if (gs.restart.pid == child)
365 return &gs.restart;
366
367 for (dmn = gs.daemons; dmn; dmn = dmn->next) {
368 if (dmn->restart.pid == child)
369 return &dmn->restart;
370 }
371 return NULL;
372 }
373
374 static void sigchild(void)
375 {
376 pid_t child;
377 int status;
378 const char *name;
379 const char *what;
380 struct restart_info *restart;
381 struct daemon *dmn;
382
383 switch (child = waitpid(-1, &status, WNOHANG)) {
384 case -1:
385 flog_err_sys(EC_LIB_SYSTEM_CALL, "waitpid failed: %s",
386 safe_strerror(errno));
387 return;
388 case 0:
389 zlog_warn("SIGCHLD received, but waitpid did not reap a child");
390 return;
391 }
392
393 if (child == integrated_write_pid) {
394 integrated_write_sigchld(status);
395 return;
396 }
397
398 if ((restart = find_child(child)) != NULL) {
399 name = restart->name;
400 what = restart->what;
401 restart->pid = 0;
402 gs.numpids--;
403 thread_cancel(restart->t_kill);
404 restart->t_kill = NULL;
405 /* Update restart time to reflect the time the command
406 * completed. */
407 gettimeofday(&restart->time, NULL);
408 } else {
409 flog_err_sys(
410 EC_LIB_SYSTEM_CALL,
411 "waitpid returned status for an unknown child process %d",
412 (int)child);
413 name = "(unknown)";
414 what = "background";
415 }
416 if (WIFSTOPPED(status))
417 zlog_warn("warning: %s %s process %d is stopped", what, name,
418 (int)child);
419 else if (WIFSIGNALED(status))
420 zlog_warn("%s %s process %d terminated due to signal %d", what,
421 name, (int)child, WTERMSIG(status));
422 else if (WIFEXITED(status)) {
423 if (WEXITSTATUS(status) != 0)
424 zlog_warn(
425 "%s %s process %d exited with non-zero status %d",
426 what, name, (int)child, WEXITSTATUS(status));
427 else {
428 zlog_debug("%s %s process %d exited normally", what,
429 name, (int)child);
430
431 if (restart && restart != &gs.restart) {
432 dmn = container_of(restart, struct daemon,
433 restart);
434 restart_done(dmn);
435 } else if (restart)
436 for (dmn = gs.daemons; dmn; dmn = dmn->next)
437 restart_done(dmn);
438 }
439 } else
440 flog_err_sys(
441 EC_LIB_SYSTEM_CALL,
442 "cannot interpret %s %s process %d wait status 0x%x",
443 what, name, (int)child, status);
444 phase_check();
445 }
446
447 static int run_job(struct restart_info *restart, const char *cmdtype,
448 const char *command, int force, int update_interval)
449 {
450 struct timeval delay;
451
452 if (gs.loglevel > LOG_DEBUG + 1)
453 zlog_debug("attempting to %s %s", cmdtype, restart->name);
454
455 if (restart->pid) {
456 if (gs.loglevel > LOG_DEBUG + 1)
457 zlog_debug(
458 "cannot %s %s, previous pid %d still running",
459 cmdtype, restart->name, (int)restart->pid);
460 return -1;
461 }
462
463 #if defined HAVE_SYSTEMD
464 char buffer[512];
465
466 snprintf(buffer, sizeof(buffer), "restarting %s", restart->name);
467 systemd_send_status(buffer);
468 #endif
469
470 /* Note: time_elapsed test must come before the force test, since we
471 need
472 to make sure that delay is initialized for use below in updating the
473 restart interval. */
474 if ((time_elapsed(&delay, &restart->time)->tv_sec < restart->interval)
475 && !force) {
476
477 if (gs.loglevel > LOG_DEBUG + 1)
478 zlog_debug(
479 "postponing %s %s: "
480 "elapsed time %ld < retry interval %ld",
481 cmdtype, restart->name, (long)delay.tv_sec,
482 restart->interval);
483 return -1;
484 }
485
486 gettimeofday(&restart->time, NULL);
487 restart->kills = 0;
488 {
489 char cmd[strlen(command) + strlen(restart->name) + 1];
490 snprintf(cmd, sizeof(cmd), command, restart->name);
491 if ((restart->pid = run_background(cmd)) > 0) {
492 restart->t_kill = NULL;
493 thread_add_timer(master, restart_kill, restart,
494 gs.restart_timeout, &restart->t_kill);
495 restart->what = cmdtype;
496 gs.numpids++;
497 } else
498 restart->pid = 0;
499 }
500
501 #if defined HAVE_SYSTEMD
502 systemd_send_status("FRR Operational");
503 #endif
504 /* Calculate the new restart interval. */
505 if (update_interval) {
506 if (delay.tv_sec > 2 * gs.max_restart_interval)
507 restart->interval = gs.min_restart_interval;
508 else if ((restart->interval *= 2) > gs.max_restart_interval)
509 restart->interval = gs.max_restart_interval;
510 if (gs.loglevel > LOG_DEBUG + 1)
511 zlog_debug("restart %s interval is now %ld",
512 restart->name, restart->interval);
513 }
514 return restart->pid;
515 }
516
517 #define SET_READ_HANDLER(DMN) \
518 do { \
519 (DMN)->t_read = NULL; \
520 thread_add_read(master, handle_read, (DMN), (DMN)->fd, \
521 &(DMN)->t_read); \
522 } while (0);
523
524 #define SET_WAKEUP_DOWN(DMN) \
525 do { \
526 (DMN)->t_wakeup = NULL; \
527 thread_add_timer_msec(master, wakeup_down, (DMN), \
528 FUZZY(gs.period), &(DMN)->t_wakeup); \
529 } while (0);
530
531 #define SET_WAKEUP_UNRESPONSIVE(DMN) \
532 do { \
533 (DMN)->t_wakeup = NULL; \
534 thread_add_timer_msec(master, wakeup_unresponsive, (DMN), \
535 FUZZY(gs.period), &(DMN)->t_wakeup); \
536 } while (0);
537
538 #define SET_WAKEUP_ECHO(DMN) \
539 do { \
540 (DMN)->t_wakeup = NULL; \
541 thread_add_timer_msec(master, wakeup_send_echo, (DMN), \
542 FUZZY(gs.period), &(DMN)->t_wakeup); \
543 } while (0);
544
545 static int wakeup_down(struct thread *t_wakeup)
546 {
547 struct daemon *dmn = THREAD_ARG(t_wakeup);
548
549 dmn->t_wakeup = NULL;
550 if (try_connect(dmn) < 0)
551 SET_WAKEUP_DOWN(dmn);
552 if ((dmn->connect_tries > 1) && (dmn->state != DAEMON_UP))
553 try_restart(dmn);
554 return 0;
555 }
556
557 static int wakeup_init(struct thread *t_wakeup)
558 {
559 struct daemon *dmn = THREAD_ARG(t_wakeup);
560
561 dmn->t_wakeup = NULL;
562 if (try_connect(dmn) < 0) {
563 flog_err(EC_WATCHFRR_CONNECTION,
564 "%s state -> down : initial connection attempt failed",
565 dmn->name);
566 dmn->state = DAEMON_DOWN;
567 }
568 phase_check();
569 return 0;
570 }
571
572 static void restart_done(struct daemon *dmn)
573 {
574 if (dmn->state != DAEMON_DOWN) {
575 zlog_warn(
576 "Daemon: %s: is in %s state but expected it to be in DAEMON_DOWN state",
577 dmn->name, state_str[dmn->state]);
578 return;
579 }
580 if (dmn->t_wakeup)
581 THREAD_OFF(dmn->t_wakeup);
582 if (try_connect(dmn) < 0)
583 SET_WAKEUP_DOWN(dmn);
584 }
585
586 static void daemon_down(struct daemon *dmn, const char *why)
587 {
588 if (IS_UP(dmn) || (dmn->state == DAEMON_INIT))
589 flog_err(EC_WATCHFRR_CONNECTION, "%s state -> down : %s",
590 dmn->name, why);
591 else if (gs.loglevel > LOG_DEBUG)
592 zlog_debug("%s still down : %s", dmn->name, why);
593 if (IS_UP(dmn))
594 gs.numdown++;
595 dmn->state = DAEMON_DOWN;
596 if (dmn->fd >= 0) {
597 close(dmn->fd);
598 dmn->fd = -1;
599 }
600 THREAD_OFF(dmn->t_read);
601 THREAD_OFF(dmn->t_write);
602 THREAD_OFF(dmn->t_wakeup);
603 if (try_connect(dmn) < 0)
604 SET_WAKEUP_DOWN(dmn);
605 phase_check();
606 }
607
608 static int handle_read(struct thread *t_read)
609 {
610 struct daemon *dmn = THREAD_ARG(t_read);
611 static const char resp[sizeof(PING_TOKEN) + 4] = PING_TOKEN "\n";
612 char buf[sizeof(resp) + 100];
613 ssize_t rc;
614 struct timeval delay;
615
616 dmn->t_read = NULL;
617 if ((rc = read(dmn->fd, buf, sizeof(buf))) < 0) {
618 char why[100];
619
620 if (ERRNO_IO_RETRY(errno)) {
621 /* Pretend it never happened. */
622 SET_READ_HANDLER(dmn);
623 return 0;
624 }
625 snprintf(why, sizeof(why), "unexpected read error: %s",
626 safe_strerror(errno));
627 daemon_down(dmn, why);
628 return 0;
629 }
630 if (rc == 0) {
631 daemon_down(dmn, "read returned EOF");
632 return 0;
633 }
634 if (!dmn->echo_sent.tv_sec) {
635 char why[sizeof(buf) + 100];
636 snprintf(why, sizeof(why),
637 "unexpected read returns %d bytes: %.*s", (int)rc,
638 (int)rc, buf);
639 daemon_down(dmn, why);
640 return 0;
641 }
642
643 /* We are expecting an echo response: is there any chance that the
644 response would not be returned entirely in the first read? That
645 seems inconceivable... */
646 if ((rc != sizeof(resp)) || memcmp(buf, resp, sizeof(resp))) {
647 char why[100 + sizeof(buf)];
648 snprintf(why, sizeof(why),
649 "read returned bad echo response of %d bytes "
650 "(expecting %u): %.*s",
651 (int)rc, (unsigned int)sizeof(resp), (int)rc, buf);
652 daemon_down(dmn, why);
653 return 0;
654 }
655
656 time_elapsed(&delay, &dmn->echo_sent);
657 dmn->echo_sent.tv_sec = 0;
658 if (dmn->state == DAEMON_UNRESPONSIVE) {
659 if (delay.tv_sec < gs.timeout) {
660 dmn->state = DAEMON_UP;
661 zlog_warn(
662 "%s state -> up : echo response received after %ld.%06ld "
663 "seconds",
664 dmn->name, (long)delay.tv_sec,
665 (long)delay.tv_usec);
666 } else
667 zlog_warn(
668 "%s: slow echo response finally received after %ld.%06ld "
669 "seconds",
670 dmn->name, (long)delay.tv_sec,
671 (long)delay.tv_usec);
672 } else if (gs.loglevel > LOG_DEBUG + 1)
673 zlog_debug("%s: echo response received after %ld.%06ld seconds",
674 dmn->name, (long)delay.tv_sec, (long)delay.tv_usec);
675
676 SET_READ_HANDLER(dmn);
677 if (dmn->t_wakeup)
678 thread_cancel(dmn->t_wakeup);
679 SET_WAKEUP_ECHO(dmn);
680
681 return 0;
682 }
683
684 /*
685 * Wait till we notice that all daemons are ready before
686 * we send we are ready to systemd
687 */
688 static void daemon_send_ready(int exitcode)
689 {
690 FILE *fp;
691 static int sent = 0;
692 char started[1024];
693
694 if (sent)
695 return;
696
697 if (exitcode == 0)
698 zlog_notice("all daemons up, doing startup-complete notify");
699 else if (gs.numdown < gs.numdaemons)
700 flog_err(EC_WATCHFRR_CONNECTION,
701 "startup did not complete within timeout"
702 " (%d/%d daemons running)",
703 gs.numdaemons - gs.numdown, gs.numdaemons);
704 else {
705 flog_err(EC_WATCHFRR_CONNECTION,
706 "all configured daemons failed to start"
707 " -- exiting watchfrr");
708 exit(exitcode);
709
710 }
711
712 frr_detach();
713
714 snprintf(started, sizeof(started), "%s%s", frr_vtydir,
715 "watchfrr.started");
716 fp = fopen(started, "w");
717 if (fp)
718 fclose(fp);
719 #if defined HAVE_SYSTEMD
720 systemd_send_started(master, 0);
721 systemd_send_status("FRR Operational");
722 #endif
723 sent = 1;
724 }
725
726 static void daemon_up(struct daemon *dmn, const char *why)
727 {
728 dmn->state = DAEMON_UP;
729 gs.numdown--;
730 dmn->connect_tries = 0;
731 zlog_notice("%s state -> up : %s", dmn->name, why);
732 if (gs.numdown == 0)
733 daemon_send_ready(0);
734 SET_WAKEUP_ECHO(dmn);
735 phase_check();
736 }
737
738 static int check_connect(struct thread *t_write)
739 {
740 struct daemon *dmn = THREAD_ARG(t_write);
741 int sockerr;
742 socklen_t reslen = sizeof(sockerr);
743
744 dmn->t_write = NULL;
745 if (getsockopt(dmn->fd, SOL_SOCKET, SO_ERROR, (char *)&sockerr, &reslen)
746 < 0) {
747 zlog_warn("%s: check_connect: getsockopt failed: %s", dmn->name,
748 safe_strerror(errno));
749 daemon_down(dmn,
750 "getsockopt failed checking connection success");
751 return 0;
752 }
753 if ((reslen == sizeof(sockerr)) && sockerr) {
754 char why[100];
755 snprintf(
756 why, sizeof(why),
757 "getsockopt reports that connection attempt failed: %s",
758 safe_strerror(sockerr));
759 daemon_down(dmn, why);
760 return 0;
761 }
762
763 daemon_up(dmn, "delayed connect succeeded");
764 return 0;
765 }
766
767 static int wakeup_connect_hanging(struct thread *t_wakeup)
768 {
769 struct daemon *dmn = THREAD_ARG(t_wakeup);
770 char why[100];
771
772 dmn->t_wakeup = NULL;
773 snprintf(why, sizeof(why),
774 "connection attempt timed out after %ld seconds", gs.timeout);
775 daemon_down(dmn, why);
776 return 0;
777 }
778
779 /* Making connection to protocol daemon. */
780 static int try_connect(struct daemon *dmn)
781 {
782 int sock;
783 struct sockaddr_un addr;
784 socklen_t len;
785
786 if (gs.loglevel > LOG_DEBUG + 1)
787 zlog_debug("%s: attempting to connect", dmn->name);
788 dmn->connect_tries++;
789
790 memset(&addr, 0, sizeof(struct sockaddr_un));
791 addr.sun_family = AF_UNIX;
792 snprintf(addr.sun_path, sizeof(addr.sun_path), "%s/%s.vty", gs.vtydir,
793 dmn->name);
794 #ifdef HAVE_STRUCT_SOCKADDR_UN_SUN_LEN
795 len = addr.sun_len = SUN_LEN(&addr);
796 #else
797 len = sizeof(addr.sun_family) + strlen(addr.sun_path);
798 #endif /* HAVE_STRUCT_SOCKADDR_UN_SUN_LEN */
799
800 /* Quick check to see if we might succeed before we go to the trouble
801 of creating a socket. */
802 if (access(addr.sun_path, W_OK) < 0) {
803 if (errno != ENOENT)
804 flog_err_sys(EC_LIB_SYSTEM_CALL,
805 "%s: access to socket %s denied: %s",
806 dmn->name, addr.sun_path,
807 safe_strerror(errno));
808 return -1;
809 }
810
811 if ((sock = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) {
812 flog_err_sys(EC_LIB_SOCKET, "%s(%s): cannot make socket: %s",
813 __func__, addr.sun_path, safe_strerror(errno));
814 return -1;
815 }
816
817 if (set_nonblocking(sock) < 0 || set_cloexec(sock) < 0) {
818 flog_err_sys(EC_LIB_SYSTEM_CALL,
819 "%s(%s): set_nonblocking/cloexec(%d) failed",
820 __func__, addr.sun_path, sock);
821 close(sock);
822 return -1;
823 }
824
825 if (connect(sock, (struct sockaddr *)&addr, len) < 0) {
826 if ((errno != EINPROGRESS) && (errno != EWOULDBLOCK)) {
827 if (gs.loglevel > LOG_DEBUG)
828 zlog_debug("%s(%s): connect failed: %s",
829 __func__, addr.sun_path,
830 safe_strerror(errno));
831 close(sock);
832 return -1;
833 }
834 if (gs.loglevel > LOG_DEBUG)
835 zlog_debug("%s: connection in progress", dmn->name);
836 dmn->state = DAEMON_CONNECTING;
837 dmn->fd = sock;
838 dmn->t_write = NULL;
839 thread_add_write(master, check_connect, dmn, dmn->fd,
840 &dmn->t_write);
841 dmn->t_wakeup = NULL;
842 thread_add_timer(master, wakeup_connect_hanging, dmn,
843 gs.timeout, &dmn->t_wakeup);
844 SET_READ_HANDLER(dmn);
845 return 0;
846 }
847
848 dmn->fd = sock;
849 SET_READ_HANDLER(dmn);
850 daemon_up(dmn, "connect succeeded");
851 return 1;
852 }
853
854 static int phase_hanging(struct thread *t_hanging)
855 {
856 gs.t_phase_hanging = NULL;
857 flog_err(EC_WATCHFRR_CONNECTION,
858 "Phase [%s] hanging for %ld seconds, aborting phased restart",
859 phase_str[gs.phase], PHASE_TIMEOUT);
860 gs.phase = PHASE_NONE;
861 return 0;
862 }
863
864 static void set_phase(restart_phase_t new_phase)
865 {
866 gs.phase = new_phase;
867 if (gs.t_phase_hanging)
868 thread_cancel(gs.t_phase_hanging);
869 gs.t_phase_hanging = NULL;
870 thread_add_timer(master, phase_hanging, NULL, PHASE_TIMEOUT,
871 &gs.t_phase_hanging);
872 }
873
874 static void phase_check(void)
875 {
876 struct daemon *dmn;
877
878 switch (gs.phase) {
879 case PHASE_NONE:
880 break;
881
882 case PHASE_INIT:
883 for (dmn = gs.daemons; dmn; dmn = dmn->next)
884 if (dmn->state == DAEMON_INIT)
885 return;
886
887 /* startup complete, everything out of INIT */
888 gs.phase = PHASE_NONE;
889 for (dmn = gs.daemons; dmn; dmn = dmn->next)
890 if (dmn->state == DAEMON_DOWN) {
891 SET_WAKEUP_DOWN(dmn);
892 try_restart(dmn);
893 }
894 break;
895 case PHASE_STOPS_PENDING:
896 if (gs.numpids)
897 break;
898 zlog_info(
899 "Phased restart: all routing daemon stop jobs have completed.");
900 set_phase(PHASE_WAITING_DOWN);
901
902 /*FALLTHRU*/
903 case PHASE_WAITING_DOWN:
904 if (gs.numdown + IS_UP(gs.special) < gs.numdaemons)
905 break;
906 zlog_info("Phased restart: all routing daemons now down.");
907 run_job(&gs.special->restart, "restart", gs.restart_command, 1,
908 1);
909 set_phase(PHASE_ZEBRA_RESTART_PENDING);
910
911 /*FALLTHRU*/
912 case PHASE_ZEBRA_RESTART_PENDING:
913 if (gs.special->restart.pid)
914 break;
915 zlog_info("Phased restart: %s restart job completed.",
916 gs.special->name);
917 set_phase(PHASE_WAITING_ZEBRA_UP);
918
919 /*FALLTHRU*/
920 case PHASE_WAITING_ZEBRA_UP:
921 if (!IS_UP(gs.special))
922 break;
923 zlog_info("Phased restart: %s is now up.", gs.special->name);
924 {
925 struct daemon *dmn;
926 for (dmn = gs.daemons; dmn; dmn = dmn->next) {
927 if (dmn != gs.special)
928 run_job(&dmn->restart, "start",
929 gs.start_command, 1, 0);
930 }
931 }
932 gs.phase = PHASE_NONE;
933 THREAD_OFF(gs.t_phase_hanging);
934 zlog_notice("Phased global restart has completed.");
935 break;
936 }
937 }
938
939 static void try_restart(struct daemon *dmn)
940 {
941 if (watch_only)
942 return;
943
944 if (dmn != gs.special) {
945 if ((gs.special->state == DAEMON_UP)
946 && (gs.phase == PHASE_NONE))
947 run_job(&dmn->restart, "restart", gs.restart_command, 0,
948 1);
949 else
950 zlog_debug(
951 "%s: postponing restart attempt because master %s daemon "
952 "not up [%s], or phased restart in progress",
953 dmn->name, gs.special->name,
954 state_str[gs.special->state]);
955 return;
956 }
957
958 if ((gs.phase != PHASE_NONE) || gs.numpids) {
959 if (gs.loglevel > LOG_DEBUG + 1)
960 zlog_debug(
961 "postponing phased global restart: restart already in "
962 "progress [%s], or outstanding child processes [%d]",
963 phase_str[gs.phase], gs.numpids);
964 return;
965 }
966 /* Is it too soon for a restart? */
967 {
968 struct timeval delay;
969 if (time_elapsed(&delay, &gs.special->restart.time)->tv_sec
970 < gs.special->restart.interval) {
971 if (gs.loglevel > LOG_DEBUG + 1)
972 zlog_debug(
973 "postponing phased global restart: "
974 "elapsed time %ld < retry interval %ld",
975 (long)delay.tv_sec,
976 gs.special->restart.interval);
977 return;
978 }
979 }
980 run_job(&gs.restart, "restart", gs.restart_command, 0, 1);
981 }
982
983 static int wakeup_unresponsive(struct thread *t_wakeup)
984 {
985 struct daemon *dmn = THREAD_ARG(t_wakeup);
986
987 dmn->t_wakeup = NULL;
988 if (dmn->state != DAEMON_UNRESPONSIVE)
989 flog_err(EC_WATCHFRR_CONNECTION,
990 "%s: no longer unresponsive (now %s), "
991 "wakeup should have been cancelled!",
992 dmn->name, state_str[dmn->state]);
993 else {
994 SET_WAKEUP_UNRESPONSIVE(dmn);
995 try_restart(dmn);
996 }
997 return 0;
998 }
999
1000 static int wakeup_no_answer(struct thread *t_wakeup)
1001 {
1002 struct daemon *dmn = THREAD_ARG(t_wakeup);
1003
1004 dmn->t_wakeup = NULL;
1005 dmn->state = DAEMON_UNRESPONSIVE;
1006 if (dmn->ignore_timeout)
1007 return 0;
1008 flog_err(EC_WATCHFRR_CONNECTION,
1009 "%s state -> unresponsive : no response yet to ping "
1010 "sent %ld seconds ago",
1011 dmn->name, gs.timeout);
1012 SET_WAKEUP_UNRESPONSIVE(dmn);
1013 try_restart(dmn);
1014 return 0;
1015 }
1016
1017 static int wakeup_send_echo(struct thread *t_wakeup)
1018 {
1019 static const char echocmd[] = "echo " PING_TOKEN;
1020 ssize_t rc;
1021 struct daemon *dmn = THREAD_ARG(t_wakeup);
1022
1023 dmn->t_wakeup = NULL;
1024 if (((rc = write(dmn->fd, echocmd, sizeof(echocmd))) < 0)
1025 || ((size_t)rc != sizeof(echocmd))) {
1026 char why[100 + sizeof(echocmd)];
1027 snprintf(why, sizeof(why),
1028 "write '%s' returned %d instead of %u", echocmd,
1029 (int)rc, (unsigned int)sizeof(echocmd));
1030 daemon_down(dmn, why);
1031 } else {
1032 gettimeofday(&dmn->echo_sent, NULL);
1033 dmn->t_wakeup = NULL;
1034 thread_add_timer(master, wakeup_no_answer, dmn, gs.timeout,
1035 &dmn->t_wakeup);
1036 }
1037 return 0;
1038 }
1039
1040 bool check_all_up(void)
1041 {
1042 struct daemon *dmn;
1043
1044 for (dmn = gs.daemons; dmn; dmn = dmn->next)
1045 if (dmn->state != DAEMON_UP)
1046 return false;
1047 return true;
1048 }
1049
1050 void watchfrr_status(struct vty *vty)
1051 {
1052 struct daemon *dmn;
1053 struct timeval delay;
1054
1055 vty_out(vty, "watchfrr global phase: %s\n", phase_str[gs.phase]);
1056 if (gs.restart.pid)
1057 vty_out(vty, " global restart running, pid %ld\n",
1058 (long)gs.restart.pid);
1059
1060 for (dmn = gs.daemons; dmn; dmn = dmn->next) {
1061 vty_out(vty, " %-20s %s%s", dmn->name, state_str[dmn->state],
1062 dmn->ignore_timeout ? "/Ignoring Timeout\n" : "\n");
1063 if (dmn->restart.pid)
1064 vty_out(vty, " restart running, pid %ld\n",
1065 (long)dmn->restart.pid);
1066 else if (dmn->state == DAEMON_DOWN &&
1067 time_elapsed(&delay, &dmn->restart.time)->tv_sec
1068 < dmn->restart.interval)
1069 vty_out(vty, " restarting in %jd seconds"
1070 " (%jds backoff interval)\n",
1071 (intmax_t)dmn->restart.interval
1072 - (intmax_t)delay.tv_sec,
1073 (intmax_t)dmn->restart.interval);
1074 }
1075 }
1076
1077 static void sigint(void)
1078 {
1079 zlog_notice("Terminating on signal");
1080 systemd_send_stopping();
1081 exit(0);
1082 }
1083
1084 static int valid_command(const char *cmd)
1085 {
1086 char *p;
1087
1088 return ((p = strchr(cmd, '%')) != NULL) && (*(p + 1) == 's')
1089 && !strchr(p + 1, '%');
1090 }
1091
1092 /* This is an ugly hack to circumvent problems with passing command-line
1093 arguments that contain spaces. The fix is to use a configuration file. */
1094 static char *translate_blanks(const char *cmd, const char *blankstr)
1095 {
1096 char *res;
1097 char *p;
1098 size_t bslen = strlen(blankstr);
1099
1100 if (!(res = strdup(cmd))) {
1101 perror("strdup");
1102 exit(1);
1103 }
1104 while ((p = strstr(res, blankstr)) != NULL) {
1105 *p = ' ';
1106 if (bslen != 1)
1107 memmove(p + 1, p + bslen, strlen(p + bslen) + 1);
1108 }
1109 return res;
1110 }
1111
1112 static int startup_timeout(struct thread *t_wakeup)
1113 {
1114 daemon_send_ready(1);
1115 return 0;
1116 }
1117
1118 static void watchfrr_init(int argc, char **argv)
1119 {
1120 const char *special = "zebra";
1121 int i;
1122 struct daemon *dmn, **add = &gs.daemons;
1123 char alldaemons[512] = "", *p = alldaemons;
1124
1125 thread_add_timer_msec(master, startup_timeout, NULL, STARTUP_TIMEOUT,
1126 &gs.t_startup_timeout);
1127
1128 for (i = optind; i < argc; i++) {
1129 dmn = XCALLOC(MTYPE_WATCHFRR_DAEMON, sizeof(*dmn));
1130
1131 dmn->name = dmn->restart.name = argv[i];
1132 dmn->state = DAEMON_INIT;
1133 gs.numdaemons++;
1134 gs.numdown++;
1135 dmn->fd = -1;
1136 dmn->t_wakeup = NULL;
1137 thread_add_timer_msec(master, wakeup_init, dmn, 0,
1138 &dmn->t_wakeup);
1139 dmn->restart.interval = gs.min_restart_interval;
1140 *add = dmn;
1141 add = &dmn->next;
1142
1143 if (!strcmp(dmn->name, special))
1144 gs.special = dmn;
1145 }
1146
1147 if (!gs.daemons) {
1148 fprintf(stderr,
1149 "Must specify one or more daemons to monitor.\n\n");
1150 frr_help_exit(1);
1151 }
1152 if (!watch_only && !gs.special) {
1153 fprintf(stderr, "\"%s\" daemon must be in daemon lists\n\n",
1154 special);
1155 frr_help_exit(1);
1156 }
1157
1158 for (dmn = gs.daemons; dmn; dmn = dmn->next) {
1159 snprintf(p, alldaemons + sizeof(alldaemons) - p, "%s%s",
1160 (p == alldaemons) ? "" : " ", dmn->name);
1161 p += strlen(p);
1162 }
1163 zlog_notice("%s %s watching [%s]%s", progname, FRR_VERSION, alldaemons,
1164 watch_only ? ", monitor mode" : "");
1165 }
1166
1167 struct zebra_privs_t watchfrr_privs = {
1168 #ifdef VTY_GROUP
1169 .vty_group = VTY_GROUP,
1170 #endif
1171 };
1172
1173 static struct quagga_signal_t watchfrr_signals[] = {
1174 {
1175 .signal = SIGINT,
1176 .handler = sigint,
1177 },
1178 {
1179 .signal = SIGTERM,
1180 .handler = sigint,
1181 },
1182 {
1183 .signal = SIGCHLD,
1184 .handler = sigchild,
1185 },
1186 };
1187
1188 FRR_DAEMON_INFO(watchfrr, WATCHFRR,
1189 .flags = FRR_NO_PRIVSEP | FRR_NO_TCPVTY | FRR_LIMITED_CLI
1190 | FRR_NO_CFG_PID_DRY | FRR_NO_ZCLIENT
1191 | FRR_DETACH_LATER,
1192
1193 .printhelp = printhelp,
1194 .copyright = "Copyright 2004 Andrew J. Schorr",
1195
1196 .signals = watchfrr_signals,
1197 .n_signals = array_size(watchfrr_signals),
1198
1199 .privs = &watchfrr_privs, )
1200
1201 #define DEPRECATED_OPTIONS "aAezR:"
1202
1203 int main(int argc, char **argv)
1204 {
1205 int opt;
1206 const char *blankstr = NULL;
1207
1208 frr_preinit(&watchfrr_di, argc, argv);
1209 progname = watchfrr_di.progname;
1210
1211 frr_opt_add("b:dk:l:i:p:r:S:s:t:T:" DEPRECATED_OPTIONS, longopts, "");
1212
1213 gs.restart.name = "all";
1214 while ((opt = frr_getopt(argc, argv, NULL)) != EOF) {
1215 if (opt && opt < 128 && strchr(DEPRECATED_OPTIONS, opt)) {
1216 fprintf(stderr,
1217 "The -%c option no longer exists.\n"
1218 "Please refer to the watchfrr(8) man page.\n",
1219 opt);
1220 exit(1);
1221 }
1222
1223 switch (opt) {
1224 case 0:
1225 break;
1226 case 'b':
1227 blankstr = optarg;
1228 break;
1229 case OPTION_DRY:
1230 watch_only = true;
1231 break;
1232 case 'k':
1233 if (!valid_command(optarg)) {
1234 fprintf(stderr,
1235 "Invalid kill command, must contain '%%s': %s\n",
1236 optarg);
1237 frr_help_exit(1);
1238 }
1239 gs.stop_command = optarg;
1240 break;
1241 case 'l': {
1242 char garbage[3];
1243 if ((sscanf(optarg, "%d%1s", &gs.loglevel, garbage)
1244 != 1)
1245 || (gs.loglevel < LOG_EMERG)) {
1246 fprintf(stderr,
1247 "Invalid loglevel argument: %s\n",
1248 optarg);
1249 frr_help_exit(1);
1250 }
1251 } break;
1252 case OPTION_MINRESTART: {
1253 char garbage[3];
1254 if ((sscanf(optarg, "%ld%1s", &gs.min_restart_interval,
1255 garbage)
1256 != 1)
1257 || (gs.min_restart_interval < 0)) {
1258 fprintf(stderr,
1259 "Invalid min_restart_interval argument: %s\n",
1260 optarg);
1261 frr_help_exit(1);
1262 }
1263 } break;
1264 case OPTION_MAXRESTART: {
1265 char garbage[3];
1266 if ((sscanf(optarg, "%ld%1s", &gs.max_restart_interval,
1267 garbage)
1268 != 1)
1269 || (gs.max_restart_interval < 0)) {
1270 fprintf(stderr,
1271 "Invalid max_restart_interval argument: %s\n",
1272 optarg);
1273 frr_help_exit(1);
1274 }
1275 } break;
1276 case 'i': {
1277 char garbage[3];
1278 int period;
1279 if ((sscanf(optarg, "%d%1s", &period, garbage) != 1)
1280 || (gs.period < 1)) {
1281 fprintf(stderr,
1282 "Invalid interval argument: %s\n",
1283 optarg);
1284 frr_help_exit(1);
1285 }
1286 gs.period = 1000 * period;
1287 } break;
1288 case 'p':
1289 watchfrr_di.pid_file = optarg;
1290 break;
1291 case 'r':
1292 if (!valid_command(optarg)) {
1293 fprintf(stderr,
1294 "Invalid restart command, must contain '%%s': %s\n",
1295 optarg);
1296 frr_help_exit(1);
1297 }
1298 gs.restart_command = optarg;
1299 break;
1300 case 's':
1301 if (!valid_command(optarg)) {
1302 fprintf(stderr,
1303 "Invalid start command, must contain '%%s': %s\n",
1304 optarg);
1305 frr_help_exit(1);
1306 }
1307 gs.start_command = optarg;
1308 break;
1309 case 'S':
1310 gs.vtydir = optarg;
1311 break;
1312 case 't': {
1313 char garbage[3];
1314 if ((sscanf(optarg, "%ld%1s", &gs.timeout, garbage)
1315 != 1)
1316 || (gs.timeout < 1)) {
1317 fprintf(stderr,
1318 "Invalid timeout argument: %s\n",
1319 optarg);
1320 frr_help_exit(1);
1321 }
1322 } break;
1323 case 'T': {
1324 char garbage[3];
1325 if ((sscanf(optarg, "%ld%1s", &gs.restart_timeout,
1326 garbage)
1327 != 1)
1328 || (gs.restart_timeout < 1)) {
1329 fprintf(stderr,
1330 "Invalid restart timeout argument: %s\n",
1331 optarg);
1332 frr_help_exit(1);
1333 }
1334 } break;
1335 default:
1336 fputs("Invalid option.\n", stderr);
1337 frr_help_exit(1);
1338 }
1339 }
1340
1341 if (watch_only
1342 && (gs.start_command || gs.stop_command || gs.restart_command)) {
1343 fputs("Options -r/-s/-k are not used when --dry is active.\n",
1344 stderr);
1345 }
1346 if (!watch_only
1347 && (!gs.restart_command || !gs.start_command || !gs.stop_command)) {
1348 fprintf(stderr,
1349 "Options -s (start), -k (kill), and -r (restart) are required.\n");
1350 frr_help_exit(1);
1351 }
1352
1353 if (blankstr) {
1354 if (gs.restart_command)
1355 gs.restart_command =
1356 translate_blanks(gs.restart_command, blankstr);
1357 if (gs.start_command)
1358 gs.start_command =
1359 translate_blanks(gs.start_command, blankstr);
1360 if (gs.stop_command)
1361 gs.stop_command =
1362 translate_blanks(gs.stop_command, blankstr);
1363 }
1364
1365 gs.restart.interval = gs.min_restart_interval;
1366
1367 master = frr_init();
1368 watchfrr_error_init();
1369 watchfrr_init(argc, argv);
1370 watchfrr_vty_init();
1371
1372 frr_config_fork();
1373
1374 zlog_set_level(ZLOG_DEST_MONITOR, ZLOG_DISABLED);
1375 if (watchfrr_di.daemon_mode)
1376 zlog_set_level(ZLOG_DEST_SYSLOG, MIN(gs.loglevel, LOG_DEBUG));
1377 else
1378 zlog_set_level(ZLOG_DEST_STDOUT, MIN(gs.loglevel, LOG_DEBUG));
1379
1380 frr_run(master);
1381
1382 systemd_send_stopping();
1383 /* Not reached. */
1384 return 0;
1385 }