]> git.proxmox.com Git - mirror_frr.git/blame - watchfrr/watchfrr.c
tools/frr: get rid of the bash array and "bB"
[mirror_frr.git] / watchfrr / watchfrr.c
CommitLineData
8b886ca7 1/*
896014f4
DL
2 * Monitor status of frr daemons and restart if necessary.
3 *
4 * Copyright (C) 2004 Andrew J. Schorr
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License along
17 * with this program; see the file COPYING; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
8b886ca7 19 */
20
a365534f 21#include <zebra.h>
8b886ca7 22#include <thread.h>
23#include <log.h>
52e66296 24#include <network.h>
8b886ca7 25#include <sigevent.h>
a365534f 26#include <lib/version.h>
95c4aff2 27#include "command.h"
87f44e2f 28#include "memory_vty.h"
4f04a76b 29#include "libfrr.h"
b647dc2a 30#include "lib_errors.h"
95c4aff2 31
6f594023 32#include <getopt.h>
a365534f 33#include <sys/un.h>
34#include <sys/wait.h>
837d16cc 35#include <memory.h>
651415bd 36#include <systemd.h>
8b886ca7 37
9473e340 38#include "watchfrr.h"
b647dc2a 39#include "watchfrr_errors.h"
95c4aff2 40
8b886ca7 41#ifndef MIN
42#define MIN(X,Y) (((X) <= (Y)) ? (X) : (Y))
43#endif
44
45/* Macros to help randomize timers. */
46#define JITTER(X) ((random() % ((X)+1))-((X)/2))
47#define FUZZY(X) ((X)+JITTER((X)/20))
48
49#define DEFAULT_PERIOD 5
0a64aff6 50#define DEFAULT_TIMEOUT 90
8b886ca7 51#define DEFAULT_RESTART_TIMEOUT 20
52#define DEFAULT_LOGLEVEL LOG_INFO
53#define DEFAULT_MIN_RESTART 60
54#define DEFAULT_MAX_RESTART 600
8b886ca7 55
56#define PING_TOKEN "PING"
57
0a7c7856
DL
58DEFINE_MGROUP(WATCHFRR, "watchfrr")
59DEFINE_MTYPE_STATIC(WATCHFRR, WATCHFRR_DAEMON, "watchfrr daemon entry")
60
55c72803 61/* Needs to be global, referenced somewhere inside libfrr. */
8b886ca7 62struct thread_master *master;
63
f168b713 64static bool watch_only = false;
8b886ca7 65
a6810074
DL
66typedef enum {
67 PHASE_NONE = 0,
68 PHASE_STOPS_PENDING,
69 PHASE_WAITING_DOWN,
70 PHASE_ZEBRA_RESTART_PENDING,
71 PHASE_WAITING_ZEBRA_UP
8b886ca7 72} restart_phase_t;
73
a6810074
DL
74static const char *phase_str[] = {
75 "None",
76 "Stop jobs running",
77 "Waiting for other daemons to come down",
78 "Zebra restart job running",
79 "Waiting for zebra to come up",
80 "Start jobs running",
8b886ca7 81};
82
83#define PHASE_TIMEOUT (3*gs.restart_timeout)
84
a6810074
DL
85struct restart_info {
86 const char *name;
87 const char *what;
88 pid_t pid;
89 struct timeval time;
90 long interval;
91 struct thread *t_kill;
92 int kills;
098e240f 93};
94
a6810074 95static struct global_state {
a6810074
DL
96 restart_phase_t phase;
97 struct thread *t_phase_hanging;
98 const char *vtydir;
99 long period;
100 long timeout;
101 long restart_timeout;
102 long min_restart_interval;
103 long max_restart_interval;
a6810074
DL
104 struct daemon *daemons;
105 const char *restart_command;
106 const char *start_command;
107 const char *stop_command;
108 struct restart_info restart;
a6810074 109 int loglevel;
d62a17ae 110 struct daemon *special; /* points to zebra when doing phased restart */
a6810074
DL
111 int numdaemons;
112 int numpids;
d62a17ae 113 int numdown; /* # of daemons that are not UP or UNRESPONSIVE */
8b886ca7 114} gs = {
d62a17ae 115 .phase = PHASE_NONE,
64a249ad 116 .vtydir = frr_vtydir,
d62a17ae 117 .period = 1000 * DEFAULT_PERIOD,
118 .timeout = DEFAULT_TIMEOUT,
119 .restart_timeout = DEFAULT_RESTART_TIMEOUT,
120 .loglevel = DEFAULT_LOGLEVEL,
121 .min_restart_interval = DEFAULT_MIN_RESTART,
122 .max_restart_interval = DEFAULT_MAX_RESTART,
d62a17ae 123};
a6810074
DL
124
125typedef enum {
126 DAEMON_INIT,
127 DAEMON_DOWN,
128 DAEMON_CONNECTING,
129 DAEMON_UP,
130 DAEMON_UNRESPONSIVE
8b886ca7 131} daemon_state_t;
132
d62a17ae 133#define IS_UP(DMN) \
134 (((DMN)->state == DAEMON_UP) || ((DMN)->state == DAEMON_UNRESPONSIVE))
8b886ca7 135
a6810074 136static const char *state_str[] = {
d62a17ae 137 "Init", "Down", "Connecting", "Up", "Unresponsive",
8b886ca7 138};
139
140struct daemon {
a6810074
DL
141 const char *name;
142 daemon_state_t state;
143 int fd;
144 struct timeval echo_sent;
d7c0a89a 145 unsigned int connect_tries;
a6810074
DL
146 struct thread *t_wakeup;
147 struct thread *t_read;
148 struct thread *t_write;
149 struct daemon *next;
150 struct restart_info restart;
8b886ca7 151};
152
9272302b
DL
153#define OPTION_MINRESTART 2000
154#define OPTION_MAXRESTART 2001
f168b713 155#define OPTION_DRY 2002
9272302b 156
a6810074
DL
157static const struct option longopts[] = {
158 {"daemon", no_argument, NULL, 'd'},
159 {"statedir", required_argument, NULL, 'S'},
a6810074
DL
160 {"loglevel", required_argument, NULL, 'l'},
161 {"interval", required_argument, NULL, 'i'},
162 {"timeout", required_argument, NULL, 't'},
163 {"restart-timeout", required_argument, NULL, 'T'},
164 {"restart", required_argument, NULL, 'r'},
165 {"start-command", required_argument, NULL, 's'},
166 {"kill-command", required_argument, NULL, 'k'},
f168b713 167 {"dry", no_argument, NULL, OPTION_DRY},
d62a17ae 168 {"min-restart-interval", required_argument, NULL, OPTION_MINRESTART},
169 {"max-restart-interval", required_argument, NULL, OPTION_MAXRESTART},
a6810074
DL
170 {"pid-file", required_argument, NULL, 'p'},
171 {"blank-string", required_argument, NULL, 'b'},
172 {"help", no_argument, NULL, 'h'},
173 {"version", no_argument, NULL, 'v'},
d62a17ae 174 {NULL, 0, NULL, 0}};
8b886ca7 175
176static int try_connect(struct daemon *dmn);
177static int wakeup_send_echo(struct thread *t_wakeup);
178static void try_restart(struct daemon *dmn);
179static void phase_check(void);
180
4f04a76b
DL
181static const char *progname;
182static void printhelp(FILE *target)
8b886ca7 183{
d62a17ae 184 fprintf(target,
185 "Usage : %s [OPTION...] <daemon name> ...\n\n\
9473e340 186Watchdog program to monitor status of frr daemons and try to restart\n\
8b886ca7 187them if they are down or unresponsive. It determines whether a daemon is\n\
188up based on whether it can connect to the daemon's vty unix stream socket.\n\
189It then repeatedly sends echo commands over that socket to determine whether\n\
190the daemon is responsive. If the daemon crashes, we will receive an EOF\n\
191on the socket connection and know immediately that the daemon is down.\n\n\
192The daemons to be monitored should be listed on the command line.\n\n\
8b886ca7 193In order to avoid attempting to restart the daemons in a fast loop,\n\
194the -m and -M options allow you to control the minimum delay between\n\
195restart commands. The minimum restart delay is recalculated each time\n\
196a restart is attempted: if the time since the last restart attempt exceeds\n\
197twice the -M value, then the restart delay is set to the -m value.\n\
d62a17ae 198Otherwise, the interval is doubled (but capped at the -M value).\n\n",
f168b713 199 progname);
e757c940 200
d62a17ae 201 fprintf(target,
202 "Options:\n\
8b886ca7 203-d, --daemon Run in daemon mode. In this mode, error messages are sent\n\
204 to syslog instead of stdout.\n\
205-S, --statedir Set the vty socket directory (default is %s)\n\
8b886ca7 206-l, --loglevel Set the logging level (default is %d).\n\
207 The value should range from %d (LOG_EMERG) to %d (LOG_DEBUG),\n\
208 but it can be set higher than %d if extra-verbose debugging\n\
209 messages are desired.\n\
9272302b 210 --min-restart-interval\n\
8b886ca7 211 Set the minimum seconds to wait between invocations of daemon\n\
212 restart commands (default is %d).\n\
9272302b 213 --max-restart-interval\n\
8b886ca7 214 Set the maximum seconds to wait between invocations of daemon\n\
215 restart commands (default is %d).\n\
216-i, --interval Set the status polling interval in seconds (default is %d)\n\
217-t, --timeout Set the unresponsiveness timeout in seconds (default is %d)\n\
218-T, --restart-timeout\n\
219 Set the restart (kill) timeout in seconds (default is %d).\n\
220 If any background jobs are still running after this much\n\
221 time has elapsed, they will be killed.\n\
222-r, --restart Supply a Bourne shell command to use to restart a single\n\
223 daemon. The command string should include '%%s' where the\n\
224 name of the daemon should be substituted.\n\
8b886ca7 225-s, --start-command\n\
226 Supply a Bourne shell to command to use to start a single\n\
227 daemon. The command string should include '%%s' where the\n\
228 name of the daemon should be substituted.\n\
229-k, --kill-command\n\
230 Supply a Bourne shell to command to use to stop a single\n\
231 daemon. The command string should include '%%s' where the\n\
232 name of the daemon should be substituted.\n\
f168b713 233 --dry Do not start or restart anything, just log.\n\
8b886ca7 234-p, --pid-file Set process identifier file name\n\
0a7c7856 235 (default is %s/watchfrr.pid).\n\
c8b40f86 236-b, --blank-string\n\
237 When the supplied argument string is found in any of the\n\
f168b713 238 various shell command arguments (-r, -s, or -k), replace\n\
c8b40f86 239 it with a space. This is an ugly hack to circumvent problems\n\
240 passing command-line arguments with embedded spaces.\n\
8b886ca7 241-v, --version Print program version\n\
d62a17ae 242-h, --help Display this help and exit\n",
64a249ad 243 frr_vtydir, DEFAULT_LOGLEVEL, LOG_EMERG, LOG_DEBUG, LOG_DEBUG,
d62a17ae 244 DEFAULT_MIN_RESTART, DEFAULT_MAX_RESTART, DEFAULT_PERIOD,
0a7c7856 245 DEFAULT_TIMEOUT, DEFAULT_RESTART_TIMEOUT, frr_vtydir);
8b886ca7 246}
247
a6810074 248static pid_t run_background(char *shell_cmd)
8b886ca7 249{
a6810074
DL
250 pid_t child;
251
252 switch (child = fork()) {
253 case -1:
450971aa 254 flog_err_sys(EC_LIB_SYSTEM_CALL,
09c866e3
QY
255 "fork failed, cannot run command [%s]: %s",
256 shell_cmd, safe_strerror(errno));
a6810074
DL
257 return -1;
258 case 0:
259 /* Child process. */
d62a17ae 260 /* Use separate process group so child processes can be killed
261 * easily. */
a6810074
DL
262 if (setpgid(0, 0) < 0)
263 zlog_warn("warning: setpgid(0,0) failed: %s",
264 safe_strerror(errno));
265 {
266 char shell[] = "sh";
267 char dashc[] = "-c";
d62a17ae 268 char *const argv[4] = {shell, dashc, shell_cmd, NULL};
a6810074 269 execv("/bin/sh", argv);
450971aa 270 flog_err_sys(EC_LIB_SYSTEM_CALL,
09c866e3
QY
271 "execv(/bin/sh -c '%s') failed: %s",
272 shell_cmd, safe_strerror(errno));
a6810074
DL
273 _exit(127);
274 }
275 default:
276 /* Parent process: we will reap the child later. */
450971aa 277 flog_err_sys(EC_LIB_SYSTEM_CALL,
09c866e3
QY
278 "Forked background command [pid %d]: %s",
279 (int)child, shell_cmd);
a6810074
DL
280 return child;
281 }
8b886ca7 282}
283
a6810074
DL
284static struct timeval *time_elapsed(struct timeval *result,
285 const struct timeval *start_time)
8b886ca7 286{
a6810074
DL
287 gettimeofday(result, NULL);
288 result->tv_sec -= start_time->tv_sec;
289 result->tv_usec -= start_time->tv_usec;
290 while (result->tv_usec < 0) {
291 result->tv_usec += 1000000L;
292 result->tv_sec--;
293 }
294 return result;
8b886ca7 295}
296
a6810074 297static int restart_kill(struct thread *t_kill)
8b886ca7 298{
a6810074
DL
299 struct restart_info *restart = THREAD_ARG(t_kill);
300 struct timeval delay;
301
302 time_elapsed(&delay, &restart->time);
d62a17ae 303 zlog_warn(
304 "Warning: %s %s child process %d still running after "
305 "%ld seconds, sending signal %d",
306 restart->what, restart->name, (int)restart->pid,
307 (long)delay.tv_sec, (restart->kills ? SIGKILL : SIGTERM));
a6810074
DL
308 kill(-restart->pid, (restart->kills ? SIGKILL : SIGTERM));
309 restart->kills++;
66e78ae6
QY
310 restart->t_kill = NULL;
311 thread_add_timer(master, restart_kill, restart, gs.restart_timeout,
312 &restart->t_kill);
a6810074 313 return 0;
8b886ca7 314}
315
a6810074 316static struct restart_info *find_child(pid_t child)
8b886ca7 317{
f168b713 318 struct daemon *dmn;
7c265f7d
CF
319 if (gs.restart.pid == child)
320 return &gs.restart;
321
f168b713
DL
322 for (dmn = gs.daemons; dmn; dmn = dmn->next) {
323 if (dmn->restart.pid == child)
324 return &dmn->restart;
a6810074
DL
325 }
326 return NULL;
8b886ca7 327}
328
a6810074 329static void sigchild(void)
8b886ca7 330{
a6810074
DL
331 pid_t child;
332 int status;
333 const char *name;
334 const char *what;
335 struct restart_info *restart;
336
337 switch (child = waitpid(-1, &status, WNOHANG)) {
338 case -1:
450971aa 339 flog_err_sys(EC_LIB_SYSTEM_CALL, "waitpid failed: %s",
09c866e3 340 safe_strerror(errno));
a6810074
DL
341 return;
342 case 0:
343 zlog_warn("SIGCHLD received, but waitpid did not reap a child");
344 return;
345 }
346
347 if (child == integrated_write_pid) {
348 integrated_write_sigchld(status);
349 return;
350 }
351
352 if ((restart = find_child(child)) != NULL) {
353 name = restart->name;
354 what = restart->what;
355 restart->pid = 0;
356 gs.numpids--;
357 thread_cancel(restart->t_kill);
358 restart->t_kill = NULL;
d62a17ae 359 /* Update restart time to reflect the time the command
360 * completed. */
a6810074
DL
361 gettimeofday(&restart->time, NULL);
362 } else {
09c866e3 363 flog_err_sys(
450971aa 364 EC_LIB_SYSTEM_CALL,
09c866e3
QY
365 "waitpid returned status for an unknown child process %d",
366 (int)child);
a6810074
DL
367 name = "(unknown)";
368 what = "background";
369 }
370 if (WIFSTOPPED(status))
d62a17ae 371 zlog_warn("warning: %s %s process %d is stopped", what, name,
372 (int)child);
a6810074 373 else if (WIFSIGNALED(status))
d62a17ae 374 zlog_warn("%s %s process %d terminated due to signal %d", what,
375 name, (int)child, WTERMSIG(status));
a6810074
DL
376 else if (WIFEXITED(status)) {
377 if (WEXITSTATUS(status) != 0)
d62a17ae 378 zlog_warn(
379 "%s %s process %d exited with non-zero status %d",
380 what, name, (int)child, WEXITSTATUS(status));
a6810074
DL
381 else
382 zlog_debug("%s %s process %d exited normally", what,
383 name, (int)child);
384 } else
09c866e3 385 flog_err_sys(
450971aa 386 EC_LIB_SYSTEM_CALL,
09c866e3
QY
387 "cannot interpret %s %s process %d wait status 0x%x",
388 what, name, (int)child, status);
a6810074 389 phase_check();
8b886ca7 390}
391
d62a17ae 392static int run_job(struct restart_info *restart, const char *cmdtype,
393 const char *command, int force, int update_interval)
8b886ca7 394{
a6810074
DL
395 struct timeval delay;
396
397 if (gs.loglevel > LOG_DEBUG + 1)
398 zlog_debug("attempting to %s %s", cmdtype, restart->name);
399
400 if (restart->pid) {
401 if (gs.loglevel > LOG_DEBUG + 1)
d62a17ae 402 zlog_debug(
403 "cannot %s %s, previous pid %d still running",
404 cmdtype, restart->name, (int)restart->pid);
a6810074
DL
405 return -1;
406 }
407
d62a17ae 408 /* Note: time_elapsed test must come before the force test, since we
409 need
a6810074
DL
410 to make sure that delay is initialized for use below in updating the
411 restart interval. */
412 if ((time_elapsed(&delay, &restart->time)->tv_sec < restart->interval)
413 && !force) {
414 if (gs.loglevel > LOG_DEBUG + 1)
d62a17ae 415 zlog_debug(
416 "postponing %s %s: "
417 "elapsed time %ld < retry interval %ld",
418 cmdtype, restart->name, (long)delay.tv_sec,
419 restart->interval);
a6810074
DL
420 return -1;
421 }
422
423 gettimeofday(&restart->time, NULL);
424 restart->kills = 0;
425 {
426 char cmd[strlen(command) + strlen(restart->name) + 1];
427 snprintf(cmd, sizeof(cmd), command, restart->name);
428 if ((restart->pid = run_background(cmd)) > 0) {
66e78ae6 429 restart->t_kill = NULL;
d62a17ae 430 thread_add_timer(master, restart_kill, restart,
431 gs.restart_timeout, &restart->t_kill);
a6810074
DL
432 restart->what = cmdtype;
433 gs.numpids++;
434 } else
435 restart->pid = 0;
436 }
437
438 /* Calculate the new restart interval. */
439 if (update_interval) {
440 if (delay.tv_sec > 2 * gs.max_restart_interval)
441 restart->interval = gs.min_restart_interval;
442 else if ((restart->interval *= 2) > gs.max_restart_interval)
443 restart->interval = gs.max_restart_interval;
444 if (gs.loglevel > LOG_DEBUG + 1)
445 zlog_debug("restart %s interval is now %ld",
446 restart->name, restart->interval);
447 }
448 return restart->pid;
8b886ca7 449}
450
d62a17ae 451#define SET_READ_HANDLER(DMN) \
452 do { \
453 (DMN)->t_read = NULL; \
454 thread_add_read(master, handle_read, (DMN), (DMN)->fd, \
455 &(DMN)->t_read); \
456 } while (0);
457
458#define SET_WAKEUP_DOWN(DMN) \
459 do { \
460 (DMN)->t_wakeup = NULL; \
461 thread_add_timer_msec(master, wakeup_down, (DMN), \
462 FUZZY(gs.period), &(DMN)->t_wakeup); \
463 } while (0);
464
465#define SET_WAKEUP_UNRESPONSIVE(DMN) \
466 do { \
467 (DMN)->t_wakeup = NULL; \
468 thread_add_timer_msec(master, wakeup_unresponsive, (DMN), \
469 FUZZY(gs.period), &(DMN)->t_wakeup); \
470 } while (0);
471
472#define SET_WAKEUP_ECHO(DMN) \
473 do { \
474 (DMN)->t_wakeup = NULL; \
475 thread_add_timer_msec(master, wakeup_send_echo, (DMN), \
476 FUZZY(gs.period), &(DMN)->t_wakeup); \
477 } while (0);
8b886ca7 478
a6810074 479static int wakeup_down(struct thread *t_wakeup)
8b886ca7 480{
a6810074
DL
481 struct daemon *dmn = THREAD_ARG(t_wakeup);
482
483 dmn->t_wakeup = NULL;
484 if (try_connect(dmn) < 0)
485 SET_WAKEUP_DOWN(dmn);
486 if ((dmn->connect_tries > 1) && (dmn->state != DAEMON_UP))
487 try_restart(dmn);
488 return 0;
8b886ca7 489}
490
a6810074 491static int wakeup_init(struct thread *t_wakeup)
8b886ca7 492{
a6810074
DL
493 struct daemon *dmn = THREAD_ARG(t_wakeup);
494
495 dmn->t_wakeup = NULL;
496 if (try_connect(dmn) < 0) {
497 SET_WAKEUP_DOWN(dmn);
f74ae2bb 498 flog_err(EC_WATCHFRR_CONNECTION,
1c50c1c0
QY
499 "%s state -> down : initial connection attempt failed",
500 dmn->name);
a6810074
DL
501 dmn->state = DAEMON_DOWN;
502 }
503 return 0;
8b886ca7 504}
505
a6810074 506static void daemon_down(struct daemon *dmn, const char *why)
8b886ca7 507{
a6810074 508 if (IS_UP(dmn) || (dmn->state == DAEMON_INIT))
1c50c1c0
QY
509 flog_err(EC_WATCHFRR_CONNECTION, "%s state -> down : %s",
510 dmn->name, why);
a6810074
DL
511 else if (gs.loglevel > LOG_DEBUG)
512 zlog_debug("%s still down : %s", dmn->name, why);
513 if (IS_UP(dmn))
514 gs.numdown++;
515 dmn->state = DAEMON_DOWN;
516 if (dmn->fd >= 0) {
517 close(dmn->fd);
518 dmn->fd = -1;
519 }
520 THREAD_OFF(dmn->t_read);
521 THREAD_OFF(dmn->t_write);
522 THREAD_OFF(dmn->t_wakeup);
523 if (try_connect(dmn) < 0)
524 SET_WAKEUP_DOWN(dmn);
525 phase_check();
8b886ca7 526}
527
a6810074 528static int handle_read(struct thread *t_read)
8b886ca7 529{
a6810074
DL
530 struct daemon *dmn = THREAD_ARG(t_read);
531 static const char resp[sizeof(PING_TOKEN) + 4] = PING_TOKEN "\n";
532 char buf[sizeof(resp) + 100];
533 ssize_t rc;
534 struct timeval delay;
535
536 dmn->t_read = NULL;
537 if ((rc = read(dmn->fd, buf, sizeof(buf))) < 0) {
538 char why[100];
539
540 if (ERRNO_IO_RETRY(errno)) {
541 /* Pretend it never happened. */
542 SET_READ_HANDLER(dmn);
543 return 0;
544 }
545 snprintf(why, sizeof(why), "unexpected read error: %s",
546 safe_strerror(errno));
547 daemon_down(dmn, why);
548 return 0;
8b886ca7 549 }
a6810074
DL
550 if (rc == 0) {
551 daemon_down(dmn, "read returned EOF");
552 return 0;
553 }
554 if (!dmn->echo_sent.tv_sec) {
555 char why[sizeof(buf) + 100];
556 snprintf(why, sizeof(why),
557 "unexpected read returns %d bytes: %.*s", (int)rc,
558 (int)rc, buf);
559 daemon_down(dmn, why);
560 return 0;
8b886ca7 561 }
a6810074
DL
562
563 /* We are expecting an echo response: is there any chance that the
564 response would not be returned entirely in the first read? That
565 seems inconceivable... */
566 if ((rc != sizeof(resp)) || memcmp(buf, resp, sizeof(resp))) {
567 char why[100 + sizeof(buf)];
568 snprintf(why, sizeof(why),
569 "read returned bad echo response of %d bytes "
d62a17ae 570 "(expecting %u): %.*s",
d7c0a89a 571 (int)rc, (unsigned int)sizeof(resp), (int)rc, buf);
a6810074
DL
572 daemon_down(dmn, why);
573 return 0;
574 }
575
576 time_elapsed(&delay, &dmn->echo_sent);
577 dmn->echo_sent.tv_sec = 0;
578 if (dmn->state == DAEMON_UNRESPONSIVE) {
579 if (delay.tv_sec < gs.timeout) {
580 dmn->state = DAEMON_UP;
d62a17ae 581 zlog_warn(
582 "%s state -> up : echo response received after %ld.%06ld "
583 "seconds",
584 dmn->name, (long)delay.tv_sec,
585 (long)delay.tv_usec);
a6810074 586 } else
d62a17ae 587 zlog_warn(
588 "%s: slow echo response finally received after %ld.%06ld "
589 "seconds",
590 dmn->name, (long)delay.tv_sec,
591 (long)delay.tv_usec);
a6810074
DL
592 } else if (gs.loglevel > LOG_DEBUG + 1)
593 zlog_debug("%s: echo response received after %ld.%06ld seconds",
594 dmn->name, (long)delay.tv_sec, (long)delay.tv_usec);
595
596 SET_READ_HANDLER(dmn);
597 if (dmn->t_wakeup)
598 thread_cancel(dmn->t_wakeup);
599 SET_WAKEUP_ECHO(dmn);
600
601 return 0;
8b886ca7 602}
603
207e0d7a
DS
604/*
605 * Wait till we notice that all daemons are ready before
606 * we send we are ready to systemd
607 */
a6810074 608static void daemon_send_ready(void)
207e0d7a 609{
a6810074
DL
610 static int sent = 0;
611 if (!sent && gs.numdown == 0) {
a6810074 612 FILE *fp;
207e0d7a 613
0a7c7856
DL
614 zlog_notice("all daemons up, doing startup-complete notify");
615 frr_detach();
616
a6810074 617 fp = fopen(DAEMON_VTY_DIR "/watchfrr.started", "w");
f5ba21fc
DS
618 if (fp)
619 fclose(fp);
60bd2534 620#if defined HAVE_SYSTEMD
a6810074 621 systemd_send_started(master, 0);
60bd2534 622#endif
a6810074
DL
623 sent = 1;
624 }
207e0d7a
DS
625}
626
a6810074 627static void daemon_up(struct daemon *dmn, const char *why)
8b886ca7 628{
a6810074
DL
629 dmn->state = DAEMON_UP;
630 gs.numdown--;
631 dmn->connect_tries = 0;
632 zlog_notice("%s state -> up : %s", dmn->name, why);
633 daemon_send_ready();
a8cbb8b3 634 SET_WAKEUP_ECHO(dmn);
a6810074 635 phase_check();
8b886ca7 636}
637
a6810074 638static int check_connect(struct thread *t_write)
8b886ca7 639{
a6810074
DL
640 struct daemon *dmn = THREAD_ARG(t_write);
641 int sockerr;
642 socklen_t reslen = sizeof(sockerr);
643
644 dmn->t_write = NULL;
645 if (getsockopt(dmn->fd, SOL_SOCKET, SO_ERROR, (char *)&sockerr, &reslen)
646 < 0) {
647 zlog_warn("%s: check_connect: getsockopt failed: %s", dmn->name,
648 safe_strerror(errno));
649 daemon_down(dmn,
650 "getsockopt failed checking connection success");
651 return 0;
652 }
653 if ((reslen == sizeof(sockerr)) && sockerr) {
654 char why[100];
d62a17ae 655 snprintf(
656 why, sizeof(why),
657 "getsockopt reports that connection attempt failed: %s",
658 safe_strerror(sockerr));
a6810074
DL
659 daemon_down(dmn, why);
660 return 0;
661 }
662
663 daemon_up(dmn, "delayed connect succeeded");
664 return 0;
8b886ca7 665}
666
a6810074 667static int wakeup_connect_hanging(struct thread *t_wakeup)
8b886ca7 668{
a6810074
DL
669 struct daemon *dmn = THREAD_ARG(t_wakeup);
670 char why[100];
671
672 dmn->t_wakeup = NULL;
673 snprintf(why, sizeof(why),
674 "connection attempt timed out after %ld seconds", gs.timeout);
675 daemon_down(dmn, why);
676 return 0;
8b886ca7 677}
678
679/* Making connection to protocol daemon. */
a6810074 680static int try_connect(struct daemon *dmn)
8b886ca7 681{
a6810074
DL
682 int sock;
683 struct sockaddr_un addr;
684 socklen_t len;
685
686 if (gs.loglevel > LOG_DEBUG + 1)
687 zlog_debug("%s: attempting to connect", dmn->name);
688 dmn->connect_tries++;
689
690 memset(&addr, 0, sizeof(struct sockaddr_un));
691 addr.sun_family = AF_UNIX;
d62a17ae 692 snprintf(addr.sun_path, sizeof(addr.sun_path), "%s/%s.vty", gs.vtydir,
693 dmn->name);
6f0e3f6e 694#ifdef HAVE_STRUCT_SOCKADDR_UN_SUN_LEN
a6810074 695 len = addr.sun_len = SUN_LEN(&addr);
8b886ca7 696#else
a6810074 697 len = sizeof(addr.sun_family) + strlen(addr.sun_path);
d62a17ae 698#endif /* HAVE_STRUCT_SOCKADDR_UN_SUN_LEN */
a6810074
DL
699
700 /* Quick check to see if we might succeed before we go to the trouble
701 of creating a socket. */
702 if (access(addr.sun_path, W_OK) < 0) {
703 if (errno != ENOENT)
450971aa 704 flog_err_sys(EC_LIB_SYSTEM_CALL,
09c866e3
QY
705 "%s: access to socket %s denied: %s",
706 dmn->name, addr.sun_path,
707 safe_strerror(errno));
a6810074
DL
708 return -1;
709 }
710
711 if ((sock = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) {
450971aa 712 flog_err_sys(EC_LIB_SOCKET, "%s(%s): cannot make socket: %s",
09c866e3 713 __func__, addr.sun_path, safe_strerror(errno));
a6810074
DL
714 return -1;
715 }
716
717 if (set_nonblocking(sock) < 0 || set_cloexec(sock) < 0) {
450971aa 718 flog_err_sys(EC_LIB_SYSTEM_CALL,
09c866e3
QY
719 "%s(%s): set_nonblocking/cloexec(%d) failed",
720 __func__, addr.sun_path, sock);
a6810074
DL
721 close(sock);
722 return -1;
8b886ca7 723 }
a6810074
DL
724
725 if (connect(sock, (struct sockaddr *)&addr, len) < 0) {
726 if ((errno != EINPROGRESS) && (errno != EWOULDBLOCK)) {
727 if (gs.loglevel > LOG_DEBUG)
728 zlog_debug("%s(%s): connect failed: %s",
729 __func__, addr.sun_path,
730 safe_strerror(errno));
731 close(sock);
732 return -1;
733 }
734 if (gs.loglevel > LOG_DEBUG)
735 zlog_debug("%s: connection in progress", dmn->name);
736 dmn->state = DAEMON_CONNECTING;
737 dmn->fd = sock;
66e78ae6
QY
738 dmn->t_write = NULL;
739 thread_add_write(master, check_connect, dmn, dmn->fd,
d62a17ae 740 &dmn->t_write);
741 dmn->t_wakeup = NULL;
742 thread_add_timer(master, wakeup_connect_hanging, dmn,
743 gs.timeout, &dmn->t_wakeup);
a6810074
DL
744 SET_READ_HANDLER(dmn);
745 return 0;
746 }
747
748 dmn->fd = sock;
749 SET_READ_HANDLER(dmn);
750 daemon_up(dmn, "connect succeeded");
751 return 1;
8b886ca7 752}
753
a6810074 754static int phase_hanging(struct thread *t_hanging)
8b886ca7 755{
a6810074 756 gs.t_phase_hanging = NULL;
f74ae2bb 757 flog_err(EC_WATCHFRR_CONNECTION,
1c50c1c0
QY
758 "Phase [%s] hanging for %ld seconds, aborting phased restart",
759 phase_str[gs.phase], PHASE_TIMEOUT);
a6810074
DL
760 gs.phase = PHASE_NONE;
761 return 0;
8b886ca7 762}
763
a6810074 764static void set_phase(restart_phase_t new_phase)
8b886ca7 765{
a6810074
DL
766 gs.phase = new_phase;
767 if (gs.t_phase_hanging)
768 thread_cancel(gs.t_phase_hanging);
66e78ae6
QY
769 gs.t_phase_hanging = NULL;
770 thread_add_timer(master, phase_hanging, NULL, PHASE_TIMEOUT,
771 &gs.t_phase_hanging);
8b886ca7 772}
773
a6810074 774static void phase_check(void)
8b886ca7 775{
a6810074
DL
776 switch (gs.phase) {
777 case PHASE_NONE:
778 break;
779 case PHASE_STOPS_PENDING:
780 if (gs.numpids)
781 break;
d62a17ae 782 zlog_info(
783 "Phased restart: all routing daemon stop jobs have completed.");
a6810074
DL
784 set_phase(PHASE_WAITING_DOWN);
785
d62a17ae 786 /*FALLTHRU*/
a6810074
DL
787 case PHASE_WAITING_DOWN:
788 if (gs.numdown + IS_UP(gs.special) < gs.numdaemons)
789 break;
790 zlog_info("Phased restart: all routing daemons now down.");
791 run_job(&gs.special->restart, "restart", gs.restart_command, 1,
792 1);
793 set_phase(PHASE_ZEBRA_RESTART_PENDING);
794
d62a17ae 795 /*FALLTHRU*/
a6810074
DL
796 case PHASE_ZEBRA_RESTART_PENDING:
797 if (gs.special->restart.pid)
798 break;
799 zlog_info("Phased restart: %s restart job completed.",
800 gs.special->name);
801 set_phase(PHASE_WAITING_ZEBRA_UP);
802
d62a17ae 803 /*FALLTHRU*/
a6810074
DL
804 case PHASE_WAITING_ZEBRA_UP:
805 if (!IS_UP(gs.special))
806 break;
807 zlog_info("Phased restart: %s is now up.", gs.special->name);
808 {
809 struct daemon *dmn;
810 for (dmn = gs.daemons; dmn; dmn = dmn->next) {
811 if (dmn != gs.special)
812 run_job(&dmn->restart, "start",
813 gs.start_command, 1, 0);
814 }
815 }
816 gs.phase = PHASE_NONE;
817 THREAD_OFF(gs.t_phase_hanging);
818 zlog_notice("Phased global restart has completed.");
819 break;
820 }
8b886ca7 821}
822
a6810074 823static void try_restart(struct daemon *dmn)
8b886ca7 824{
f168b713 825 if (watch_only)
a6810074 826 return;
a6810074 827
f168b713
DL
828 if (dmn != gs.special) {
829 if ((gs.special->state == DAEMON_UP)
830 && (gs.phase == PHASE_NONE))
831 run_job(&dmn->restart, "restart", gs.restart_command, 0,
832 1);
833 else
834 zlog_debug(
835 "%s: postponing restart attempt because master %s daemon "
836 "not up [%s], or phased restart in progress",
837 dmn->name, gs.special->name,
838 state_str[gs.special->state]);
839 return;
840 }
841
842 if ((gs.phase != PHASE_NONE) || gs.numpids) {
843 if (gs.loglevel > LOG_DEBUG + 1)
844 zlog_debug(
845 "postponing phased global restart: restart already in "
846 "progress [%s], or outstanding child processes [%d]",
847 phase_str[gs.phase], gs.numpids);
848 return;
849 }
850 /* Is it too soon for a restart? */
851 {
852 struct timeval delay;
853 if (time_elapsed(&delay, &gs.special->restart.time)->tv_sec
854 < gs.special->restart.interval) {
a6810074 855 if (gs.loglevel > LOG_DEBUG + 1)
d62a17ae 856 zlog_debug(
f168b713
DL
857 "postponing phased global restart: "
858 "elapsed time %ld < retry interval %ld",
859 (long)delay.tv_sec,
860 gs.special->restart.interval);
861 return;
a6810074 862 }
8b886ca7 863 }
f168b713 864 run_job(&gs.restart, "restart", gs.restart_command, 0, 1);
8b886ca7 865}
866
a6810074 867static int wakeup_unresponsive(struct thread *t_wakeup)
8b886ca7 868{
a6810074
DL
869 struct daemon *dmn = THREAD_ARG(t_wakeup);
870
871 dmn->t_wakeup = NULL;
872 if (dmn->state != DAEMON_UNRESPONSIVE)
f74ae2bb 873 flog_err(EC_WATCHFRR_CONNECTION,
1c50c1c0
QY
874 "%s: no longer unresponsive (now %s), "
875 "wakeup should have been cancelled!",
876 dmn->name, state_str[dmn->state]);
a6810074
DL
877 else {
878 SET_WAKEUP_UNRESPONSIVE(dmn);
879 try_restart(dmn);
880 }
881 return 0;
8b886ca7 882}
883
a6810074 884static int wakeup_no_answer(struct thread *t_wakeup)
8b886ca7 885{
a6810074
DL
886 struct daemon *dmn = THREAD_ARG(t_wakeup);
887
888 dmn->t_wakeup = NULL;
889 dmn->state = DAEMON_UNRESPONSIVE;
f74ae2bb 890 flog_err(EC_WATCHFRR_CONNECTION,
1c50c1c0
QY
891 "%s state -> unresponsive : no response yet to ping "
892 "sent %ld seconds ago",
893 dmn->name, gs.timeout);
71e7975a
DL
894 SET_WAKEUP_UNRESPONSIVE(dmn);
895 try_restart(dmn);
a6810074 896 return 0;
8b886ca7 897}
898
a6810074 899static int wakeup_send_echo(struct thread *t_wakeup)
8b886ca7 900{
a6810074
DL
901 static const char echocmd[] = "echo " PING_TOKEN;
902 ssize_t rc;
903 struct daemon *dmn = THREAD_ARG(t_wakeup);
904
905 dmn->t_wakeup = NULL;
d62a17ae 906 if (((rc = write(dmn->fd, echocmd, sizeof(echocmd))) < 0)
907 || ((size_t)rc != sizeof(echocmd))) {
a6810074
DL
908 char why[100 + sizeof(echocmd)];
909 snprintf(why, sizeof(why),
910 "write '%s' returned %d instead of %u", echocmd,
d7c0a89a 911 (int)rc, (unsigned int)sizeof(echocmd));
a6810074
DL
912 daemon_down(dmn, why);
913 } else {
914 gettimeofday(&dmn->echo_sent, NULL);
66e78ae6
QY
915 dmn->t_wakeup = NULL;
916 thread_add_timer(master, wakeup_no_answer, dmn, gs.timeout,
917 &dmn->t_wakeup);
a6810074
DL
918 }
919 return 0;
8b886ca7 920}
921
470bc619
QY
922bool check_all_up(void)
923{
924 struct daemon *dmn;
925
926 for (dmn = gs.daemons; dmn; dmn = dmn->next)
927 if (dmn->state != DAEMON_UP)
928 return false;
929 return true;
930}
931
a6810074 932static void sigint(void)
8b886ca7 933{
a6810074
DL
934 zlog_notice("Terminating on signal");
935 systemd_send_stopping();
936 exit(0);
8b886ca7 937}
938
a6810074 939static int valid_command(const char *cmd)
8b886ca7 940{
a6810074 941 char *p;
8b886ca7 942
a6810074 943 return ((p = strchr(cmd, '%')) != NULL) && (*(p + 1) == 's')
d62a17ae 944 && !strchr(p + 1, '%');
8b886ca7 945}
946
c8b40f86 947/* This is an ugly hack to circumvent problems with passing command-line
948 arguments that contain spaces. The fix is to use a configuration file. */
a6810074 949static char *translate_blanks(const char *cmd, const char *blankstr)
c8b40f86 950{
a6810074
DL
951 char *res;
952 char *p;
953 size_t bslen = strlen(blankstr);
954
955 if (!(res = strdup(cmd))) {
956 perror("strdup");
957 exit(1);
958 }
959 while ((p = strstr(res, blankstr)) != NULL) {
960 *p = ' ';
961 if (bslen != 1)
962 memmove(p + 1, p + bslen, strlen(p + bslen) + 1);
963 }
964 return res;
c8b40f86 965}
966
0a7c7856
DL
967static void watchfrr_init(int argc, char **argv)
968{
969 const char *special = "zebra";
970 int i;
971 struct daemon *dmn, **add = &gs.daemons;
972 char alldaemons[512] = "", *p = alldaemons;
973
974 for (i = optind; i < argc; i++) {
975 dmn = XCALLOC(MTYPE_WATCHFRR_DAEMON, sizeof(*dmn));
976
977 dmn->name = dmn->restart.name = argv[i];
978 dmn->state = DAEMON_INIT;
979 gs.numdaemons++;
980 gs.numdown++;
981 dmn->fd = -1;
982 dmn->t_wakeup = NULL;
983 thread_add_timer_msec(master, wakeup_init, dmn,
984 100 + (random() % 900),
985 &dmn->t_wakeup);
986 dmn->restart.interval = gs.min_restart_interval;
987 *add = dmn;
988 add = &dmn->next;
989
990 if (!strcmp(dmn->name, special))
991 gs.special = dmn;
992 }
993
994 if (!gs.daemons) {
995 fprintf(stderr,
996 "Must specify one or more daemons to monitor.\n\n");
997 frr_help_exit(1);
998 }
999 if (!watch_only && !gs.special) {
1000 fprintf(stderr, "\"%s\" daemon must be in daemon lists\n\n",
1001 special);
1002 frr_help_exit(1);
1003 }
1004
1005 for (dmn = gs.daemons; dmn; dmn = dmn->next) {
1006 snprintf(p, alldaemons + sizeof(alldaemons) - p, "%s%s",
1007 (p == alldaemons) ? "" : " ", dmn->name);
1008 p += strlen(p);
1009 }
1010 zlog_notice("%s %s watching [%s]%s", progname, FRR_VERSION, alldaemons,
1011 watch_only ? ", monitor mode" : "");
1012}
1013
a6810074 1014struct zebra_privs_t watchfrr_privs = {
95c4aff2 1015#ifdef VTY_GROUP
a6810074 1016 .vty_group = VTY_GROUP,
95c4aff2
DL
1017#endif
1018};
1019
4f04a76b
DL
1020static struct quagga_signal_t watchfrr_signals[] = {
1021 {
1022 .signal = SIGINT,
1023 .handler = sigint,
1024 },
1025 {
1026 .signal = SIGTERM,
1027 .handler = sigint,
1028 },
1029 {
1030 .signal = SIGCHLD,
1031 .handler = sigchild,
1032 },
1033};
1034
1035FRR_DAEMON_INFO(watchfrr, WATCHFRR,
d62a17ae 1036 .flags = FRR_NO_PRIVSEP | FRR_NO_TCPVTY | FRR_LIMITED_CLI
0a7c7856
DL
1037 | FRR_NO_CFG_PID_DRY | FRR_NO_ZCLIENT
1038 | FRR_DETACH_LATER,
4f04a76b 1039
d62a17ae 1040 .printhelp = printhelp,
1041 .copyright = "Copyright 2004 Andrew J. Schorr",
4f04a76b 1042
d62a17ae 1043 .signals = watchfrr_signals,
1044 .n_signals = array_size(watchfrr_signals),
4f04a76b 1045
d62a17ae 1046 .privs = &watchfrr_privs, )
4f04a76b 1047
999f153e
DL
1048#define DEPRECATED_OPTIONS "aAezR:"
1049
a6810074 1050int main(int argc, char **argv)
8b886ca7 1051{
a6810074 1052 int opt;
a6810074 1053 const char *blankstr = NULL;
a6810074 1054
4f04a76b
DL
1055 frr_preinit(&watchfrr_di, argc, argv);
1056 progname = watchfrr_di.progname;
1057
999f153e 1058 frr_opt_add("b:dk:l:i:p:r:S:s:t:T:" DEPRECATED_OPTIONS, longopts, "");
a6810074
DL
1059
1060 gs.restart.name = "all";
4f04a76b 1061 while ((opt = frr_getopt(argc, argv, NULL)) != EOF) {
999f153e
DL
1062 if (opt && opt < 128 && strchr(DEPRECATED_OPTIONS, opt)) {
1063 fprintf(stderr,
1064 "The -%c option no longer exists.\n"
1065 "Please refer to the watchfrr(8) man page.\n",
1066 opt);
1067 exit(1);
1068 }
1069
a6810074
DL
1070 switch (opt) {
1071 case 0:
1072 break;
a6810074
DL
1073 case 'b':
1074 blankstr = optarg;
1075 break;
f168b713
DL
1076 case OPTION_DRY:
1077 watch_only = true;
a6810074
DL
1078 break;
1079 case 'k':
1080 if (!valid_command(optarg)) {
1081 fprintf(stderr,
1082 "Invalid kill command, must contain '%%s': %s\n",
1083 optarg);
4f04a76b 1084 frr_help_exit(1);
a6810074
DL
1085 }
1086 gs.stop_command = optarg;
1087 break;
d62a17ae 1088 case 'l': {
1089 char garbage[3];
1090 if ((sscanf(optarg, "%d%1s", &gs.loglevel, garbage)
1091 != 1)
1092 || (gs.loglevel < LOG_EMERG)) {
1093 fprintf(stderr,
1094 "Invalid loglevel argument: %s\n",
1095 optarg);
1096 frr_help_exit(1);
a6810074 1097 }
d62a17ae 1098 } break;
1099 case OPTION_MINRESTART: {
1100 char garbage[3];
1101 if ((sscanf(optarg, "%ld%1s", &gs.min_restart_interval,
1102 garbage)
1103 != 1)
1104 || (gs.min_restart_interval < 0)) {
1105 fprintf(stderr,
1106 "Invalid min_restart_interval argument: %s\n",
1107 optarg);
1108 frr_help_exit(1);
a6810074 1109 }
d62a17ae 1110 } break;
1111 case OPTION_MAXRESTART: {
1112 char garbage[3];
1113 if ((sscanf(optarg, "%ld%1s", &gs.max_restart_interval,
1114 garbage)
1115 != 1)
1116 || (gs.max_restart_interval < 0)) {
1117 fprintf(stderr,
1118 "Invalid max_restart_interval argument: %s\n",
1119 optarg);
1120 frr_help_exit(1);
a6810074 1121 }
d62a17ae 1122 } break;
1123 case 'i': {
1124 char garbage[3];
1125 int period;
1126 if ((sscanf(optarg, "%d%1s", &period, garbage) != 1)
1127 || (gs.period < 1)) {
1128 fprintf(stderr,
1129 "Invalid interval argument: %s\n",
1130 optarg);
1131 frr_help_exit(1);
a6810074 1132 }
d62a17ae 1133 gs.period = 1000 * period;
1134 } break;
a6810074 1135 case 'p':
0a7c7856 1136 watchfrr_di.pid_file = optarg;
a6810074
DL
1137 break;
1138 case 'r':
a6810074
DL
1139 if (!valid_command(optarg)) {
1140 fprintf(stderr,
1141 "Invalid restart command, must contain '%%s': %s\n",
1142 optarg);
4f04a76b 1143 frr_help_exit(1);
a6810074
DL
1144 }
1145 gs.restart_command = optarg;
a6810074
DL
1146 break;
1147 case 's':
1148 if (!valid_command(optarg)) {
1149 fprintf(stderr,
1150 "Invalid start command, must contain '%%s': %s\n",
1151 optarg);
4f04a76b 1152 frr_help_exit(1);
a6810074
DL
1153 }
1154 gs.start_command = optarg;
1155 break;
1156 case 'S':
1157 gs.vtydir = optarg;
1158 break;
d62a17ae 1159 case 't': {
1160 char garbage[3];
1161 if ((sscanf(optarg, "%ld%1s", &gs.timeout, garbage)
1162 != 1)
1163 || (gs.timeout < 1)) {
1164 fprintf(stderr,
1165 "Invalid timeout argument: %s\n",
1166 optarg);
1167 frr_help_exit(1);
a6810074 1168 }
d62a17ae 1169 } break;
1170 case 'T': {
1171 char garbage[3];
1172 if ((sscanf(optarg, "%ld%1s", &gs.restart_timeout,
1173 garbage)
1174 != 1)
1175 || (gs.restart_timeout < 1)) {
1176 fprintf(stderr,
1177 "Invalid restart timeout argument: %s\n",
1178 optarg);
1179 frr_help_exit(1);
a6810074 1180 }
d62a17ae 1181 } break;
a6810074
DL
1182 default:
1183 fputs("Invalid option.\n", stderr);
4f04a76b 1184 frr_help_exit(1);
a6810074 1185 }
8b886ca7 1186 }
a6810074 1187
71e7975a
DL
1188 if (watch_only
1189 && (gs.start_command || gs.stop_command || gs.restart_command)) {
d87ae5cc 1190 fputs("Options -r/-s/-k are not used when --dry is active.\n",
a6810074 1191 stderr);
8b886ca7 1192 }
f168b713
DL
1193 if (!watch_only
1194 && (!gs.restart_command || !gs.start_command || !gs.stop_command)) {
1195 fprintf(stderr,
1196 "Options -s (start), -k (kill), and -r (restart) are required.\n");
1197 frr_help_exit(1);
8b886ca7 1198 }
8b886ca7 1199
a6810074
DL
1200 if (blankstr) {
1201 if (gs.restart_command)
1202 gs.restart_command =
d62a17ae 1203 translate_blanks(gs.restart_command, blankstr);
a6810074
DL
1204 if (gs.start_command)
1205 gs.start_command =
d62a17ae 1206 translate_blanks(gs.start_command, blankstr);
a6810074
DL
1207 if (gs.stop_command)
1208 gs.stop_command =
d62a17ae 1209 translate_blanks(gs.stop_command, blankstr);
065de903 1210 }
8b886ca7 1211
a6810074 1212 gs.restart.interval = gs.min_restart_interval;
8b886ca7 1213
4f04a76b 1214 master = frr_init();
b647dc2a 1215 watchfrr_error_init();
0a7c7856
DL
1216 watchfrr_init(argc, argv);
1217 watchfrr_vty_init();
1218
1219 frr_config_fork();
4f04a76b 1220
dd8376fe 1221 zlog_set_level(ZLOG_DEST_MONITOR, ZLOG_DISABLED);
0a7c7856 1222 if (watchfrr_di.daemon_mode)
dd8376fe 1223 zlog_set_level(ZLOG_DEST_SYSLOG, MIN(gs.loglevel, LOG_DEBUG));
0a7c7856 1224 else
dd8376fe 1225 zlog_set_level(ZLOG_DEST_STDOUT, MIN(gs.loglevel, LOG_DEBUG));
8b886ca7 1226
0a7c7856 1227 frr_run(master);
8b886ca7 1228
a6810074
DL
1229 systemd_send_stopping();
1230 /* Not reached. */
1231 return 0;
8b886ca7 1232}