]> git.proxmox.com Git - mirror_frr.git/blame - watchfrr/watchfrr.c
zebra, lib: fix the ZEBRA_INTERFACE_VRF_UPDATE zapi message
[mirror_frr.git] / watchfrr / watchfrr.c
CommitLineData
8b886ca7 1/*
896014f4
DL
2 * Monitor status of frr daemons and restart if necessary.
3 *
4 * Copyright (C) 2004 Andrew J. Schorr
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License along
17 * with this program; see the file COPYING; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
8b886ca7 19 */
20
a365534f 21#include <zebra.h>
8b886ca7 22#include <thread.h>
23#include <log.h>
52e66296 24#include <network.h>
8b886ca7 25#include <sigevent.h>
a365534f 26#include <lib/version.h>
95c4aff2 27#include "command.h"
87f44e2f 28#include "memory_vty.h"
4f04a76b 29#include "libfrr.h"
b647dc2a 30#include "lib_errors.h"
95c4aff2 31
6f594023 32#include <getopt.h>
a365534f 33#include <sys/un.h>
34#include <sys/wait.h>
837d16cc 35#include <memory.h>
651415bd 36#include <systemd.h>
8b886ca7 37
9473e340 38#include "watchfrr.h"
b647dc2a 39#include "watchfrr_errors.h"
95c4aff2 40
8b886ca7 41#ifndef MIN
42#define MIN(X,Y) (((X) <= (Y)) ? (X) : (Y))
43#endif
44
45/* Macros to help randomize timers. */
46#define JITTER(X) ((random() % ((X)+1))-((X)/2))
47#define FUZZY(X) ((X)+JITTER((X)/20))
48
49#define DEFAULT_PERIOD 5
0a64aff6 50#define DEFAULT_TIMEOUT 90
8b886ca7 51#define DEFAULT_RESTART_TIMEOUT 20
52#define DEFAULT_LOGLEVEL LOG_INFO
53#define DEFAULT_MIN_RESTART 60
54#define DEFAULT_MAX_RESTART 600
8b886ca7 55
56#define PING_TOKEN "PING"
57
0a7c7856
DL
58DEFINE_MGROUP(WATCHFRR, "watchfrr")
59DEFINE_MTYPE_STATIC(WATCHFRR, WATCHFRR_DAEMON, "watchfrr daemon entry")
60
55c72803 61/* Needs to be global, referenced somewhere inside libfrr. */
8b886ca7 62struct thread_master *master;
63
f168b713 64static bool watch_only = false;
8b886ca7 65
a6810074
DL
66typedef enum {
67 PHASE_NONE = 0,
c0e5cb52 68 PHASE_INIT,
a6810074
DL
69 PHASE_STOPS_PENDING,
70 PHASE_WAITING_DOWN,
71 PHASE_ZEBRA_RESTART_PENDING,
72 PHASE_WAITING_ZEBRA_UP
8b886ca7 73} restart_phase_t;
74
a6810074 75static const char *phase_str[] = {
af568444 76 "Idle",
c0e5cb52 77 "Startup",
a6810074
DL
78 "Stop jobs running",
79 "Waiting for other daemons to come down",
80 "Zebra restart job running",
81 "Waiting for zebra to come up",
82 "Start jobs running",
8b886ca7 83};
84
85#define PHASE_TIMEOUT (3*gs.restart_timeout)
86
a6810074
DL
87struct restart_info {
88 const char *name;
89 const char *what;
90 pid_t pid;
91 struct timeval time;
92 long interval;
93 struct thread *t_kill;
94 int kills;
098e240f 95};
96
a6810074 97static struct global_state {
a6810074
DL
98 restart_phase_t phase;
99 struct thread *t_phase_hanging;
100 const char *vtydir;
101 long period;
102 long timeout;
103 long restart_timeout;
104 long min_restart_interval;
105 long max_restart_interval;
a6810074
DL
106 struct daemon *daemons;
107 const char *restart_command;
108 const char *start_command;
109 const char *stop_command;
110 struct restart_info restart;
a6810074 111 int loglevel;
d62a17ae 112 struct daemon *special; /* points to zebra when doing phased restart */
a6810074
DL
113 int numdaemons;
114 int numpids;
d62a17ae 115 int numdown; /* # of daemons that are not UP or UNRESPONSIVE */
8b886ca7 116} gs = {
c0e5cb52 117 .phase = PHASE_INIT,
64a249ad 118 .vtydir = frr_vtydir,
d62a17ae 119 .period = 1000 * DEFAULT_PERIOD,
120 .timeout = DEFAULT_TIMEOUT,
121 .restart_timeout = DEFAULT_RESTART_TIMEOUT,
122 .loglevel = DEFAULT_LOGLEVEL,
123 .min_restart_interval = DEFAULT_MIN_RESTART,
124 .max_restart_interval = DEFAULT_MAX_RESTART,
d62a17ae 125};
a6810074
DL
126
127typedef enum {
128 DAEMON_INIT,
129 DAEMON_DOWN,
130 DAEMON_CONNECTING,
131 DAEMON_UP,
132 DAEMON_UNRESPONSIVE
8b886ca7 133} daemon_state_t;
134
d62a17ae 135#define IS_UP(DMN) \
136 (((DMN)->state == DAEMON_UP) || ((DMN)->state == DAEMON_UNRESPONSIVE))
8b886ca7 137
a6810074 138static const char *state_str[] = {
d62a17ae 139 "Init", "Down", "Connecting", "Up", "Unresponsive",
8b886ca7 140};
141
142struct daemon {
a6810074
DL
143 const char *name;
144 daemon_state_t state;
145 int fd;
146 struct timeval echo_sent;
d7c0a89a 147 unsigned int connect_tries;
a6810074
DL
148 struct thread *t_wakeup;
149 struct thread *t_read;
150 struct thread *t_write;
151 struct daemon *next;
152 struct restart_info restart;
8b886ca7 153};
154
9272302b
DL
155#define OPTION_MINRESTART 2000
156#define OPTION_MAXRESTART 2001
f168b713 157#define OPTION_DRY 2002
9272302b 158
a6810074
DL
159static const struct option longopts[] = {
160 {"daemon", no_argument, NULL, 'd'},
161 {"statedir", required_argument, NULL, 'S'},
a6810074
DL
162 {"loglevel", required_argument, NULL, 'l'},
163 {"interval", required_argument, NULL, 'i'},
164 {"timeout", required_argument, NULL, 't'},
165 {"restart-timeout", required_argument, NULL, 'T'},
166 {"restart", required_argument, NULL, 'r'},
167 {"start-command", required_argument, NULL, 's'},
168 {"kill-command", required_argument, NULL, 'k'},
f168b713 169 {"dry", no_argument, NULL, OPTION_DRY},
d62a17ae 170 {"min-restart-interval", required_argument, NULL, OPTION_MINRESTART},
171 {"max-restart-interval", required_argument, NULL, OPTION_MAXRESTART},
a6810074
DL
172 {"pid-file", required_argument, NULL, 'p'},
173 {"blank-string", required_argument, NULL, 'b'},
174 {"help", no_argument, NULL, 'h'},
175 {"version", no_argument, NULL, 'v'},
d62a17ae 176 {NULL, 0, NULL, 0}};
8b886ca7 177
178static int try_connect(struct daemon *dmn);
179static int wakeup_send_echo(struct thread *t_wakeup);
180static void try_restart(struct daemon *dmn);
181static void phase_check(void);
75f8b0e4 182static void restart_done(struct daemon *dmn);
8b886ca7 183
4f04a76b
DL
184static const char *progname;
185static void printhelp(FILE *target)
8b886ca7 186{
d62a17ae 187 fprintf(target,
188 "Usage : %s [OPTION...] <daemon name> ...\n\n\
9473e340 189Watchdog program to monitor status of frr daemons and try to restart\n\
8b886ca7 190them if they are down or unresponsive. It determines whether a daemon is\n\
191up based on whether it can connect to the daemon's vty unix stream socket.\n\
192It then repeatedly sends echo commands over that socket to determine whether\n\
193the daemon is responsive. If the daemon crashes, we will receive an EOF\n\
194on the socket connection and know immediately that the daemon is down.\n\n\
195The daemons to be monitored should be listed on the command line.\n\n\
8b886ca7 196In order to avoid attempting to restart the daemons in a fast loop,\n\
197the -m and -M options allow you to control the minimum delay between\n\
198restart commands. The minimum restart delay is recalculated each time\n\
199a restart is attempted: if the time since the last restart attempt exceeds\n\
200twice the -M value, then the restart delay is set to the -m value.\n\
d62a17ae 201Otherwise, the interval is doubled (but capped at the -M value).\n\n",
f168b713 202 progname);
e757c940 203
d62a17ae 204 fprintf(target,
205 "Options:\n\
8b886ca7 206-d, --daemon Run in daemon mode. In this mode, error messages are sent\n\
207 to syslog instead of stdout.\n\
208-S, --statedir Set the vty socket directory (default is %s)\n\
8b886ca7 209-l, --loglevel Set the logging level (default is %d).\n\
210 The value should range from %d (LOG_EMERG) to %d (LOG_DEBUG),\n\
211 but it can be set higher than %d if extra-verbose debugging\n\
212 messages are desired.\n\
9272302b 213 --min-restart-interval\n\
8b886ca7 214 Set the minimum seconds to wait between invocations of daemon\n\
215 restart commands (default is %d).\n\
9272302b 216 --max-restart-interval\n\
8b886ca7 217 Set the maximum seconds to wait between invocations of daemon\n\
218 restart commands (default is %d).\n\
219-i, --interval Set the status polling interval in seconds (default is %d)\n\
220-t, --timeout Set the unresponsiveness timeout in seconds (default is %d)\n\
221-T, --restart-timeout\n\
222 Set the restart (kill) timeout in seconds (default is %d).\n\
223 If any background jobs are still running after this much\n\
224 time has elapsed, they will be killed.\n\
225-r, --restart Supply a Bourne shell command to use to restart a single\n\
226 daemon. The command string should include '%%s' where the\n\
227 name of the daemon should be substituted.\n\
8b886ca7 228-s, --start-command\n\
229 Supply a Bourne shell to command to use to start a single\n\
230 daemon. The command string should include '%%s' where the\n\
231 name of the daemon should be substituted.\n\
232-k, --kill-command\n\
233 Supply a Bourne shell to command to use to stop a single\n\
234 daemon. The command string should include '%%s' where the\n\
235 name of the daemon should be substituted.\n\
f168b713 236 --dry Do not start or restart anything, just log.\n\
8b886ca7 237-p, --pid-file Set process identifier file name\n\
0a7c7856 238 (default is %s/watchfrr.pid).\n\
c8b40f86 239-b, --blank-string\n\
240 When the supplied argument string is found in any of the\n\
f168b713 241 various shell command arguments (-r, -s, or -k), replace\n\
c8b40f86 242 it with a space. This is an ugly hack to circumvent problems\n\
243 passing command-line arguments with embedded spaces.\n\
8b886ca7 244-v, --version Print program version\n\
d62a17ae 245-h, --help Display this help and exit\n",
64a249ad 246 frr_vtydir, DEFAULT_LOGLEVEL, LOG_EMERG, LOG_DEBUG, LOG_DEBUG,
d62a17ae 247 DEFAULT_MIN_RESTART, DEFAULT_MAX_RESTART, DEFAULT_PERIOD,
0a7c7856 248 DEFAULT_TIMEOUT, DEFAULT_RESTART_TIMEOUT, frr_vtydir);
8b886ca7 249}
250
a6810074 251static pid_t run_background(char *shell_cmd)
8b886ca7 252{
a6810074
DL
253 pid_t child;
254
255 switch (child = fork()) {
256 case -1:
450971aa 257 flog_err_sys(EC_LIB_SYSTEM_CALL,
09c866e3
QY
258 "fork failed, cannot run command [%s]: %s",
259 shell_cmd, safe_strerror(errno));
a6810074
DL
260 return -1;
261 case 0:
262 /* Child process. */
d62a17ae 263 /* Use separate process group so child processes can be killed
264 * easily. */
a6810074
DL
265 if (setpgid(0, 0) < 0)
266 zlog_warn("warning: setpgid(0,0) failed: %s",
267 safe_strerror(errno));
268 {
269 char shell[] = "sh";
270 char dashc[] = "-c";
d62a17ae 271 char *const argv[4] = {shell, dashc, shell_cmd, NULL};
a6810074 272 execv("/bin/sh", argv);
450971aa 273 flog_err_sys(EC_LIB_SYSTEM_CALL,
09c866e3
QY
274 "execv(/bin/sh -c '%s') failed: %s",
275 shell_cmd, safe_strerror(errno));
a6810074
DL
276 _exit(127);
277 }
278 default:
279 /* Parent process: we will reap the child later. */
450971aa 280 flog_err_sys(EC_LIB_SYSTEM_CALL,
09c866e3
QY
281 "Forked background command [pid %d]: %s",
282 (int)child, shell_cmd);
a6810074
DL
283 return child;
284 }
8b886ca7 285}
286
a6810074
DL
287static struct timeval *time_elapsed(struct timeval *result,
288 const struct timeval *start_time)
8b886ca7 289{
a6810074
DL
290 gettimeofday(result, NULL);
291 result->tv_sec -= start_time->tv_sec;
292 result->tv_usec -= start_time->tv_usec;
293 while (result->tv_usec < 0) {
294 result->tv_usec += 1000000L;
295 result->tv_sec--;
296 }
297 return result;
8b886ca7 298}
299
a6810074 300static int restart_kill(struct thread *t_kill)
8b886ca7 301{
a6810074
DL
302 struct restart_info *restart = THREAD_ARG(t_kill);
303 struct timeval delay;
304
305 time_elapsed(&delay, &restart->time);
d62a17ae 306 zlog_warn(
307 "Warning: %s %s child process %d still running after "
308 "%ld seconds, sending signal %d",
309 restart->what, restart->name, (int)restart->pid,
310 (long)delay.tv_sec, (restart->kills ? SIGKILL : SIGTERM));
a6810074
DL
311 kill(-restart->pid, (restart->kills ? SIGKILL : SIGTERM));
312 restart->kills++;
66e78ae6
QY
313 restart->t_kill = NULL;
314 thread_add_timer(master, restart_kill, restart, gs.restart_timeout,
315 &restart->t_kill);
a6810074 316 return 0;
8b886ca7 317}
318
a6810074 319static struct restart_info *find_child(pid_t child)
8b886ca7 320{
f168b713 321 struct daemon *dmn;
7c265f7d
CF
322 if (gs.restart.pid == child)
323 return &gs.restart;
324
f168b713
DL
325 for (dmn = gs.daemons; dmn; dmn = dmn->next) {
326 if (dmn->restart.pid == child)
327 return &dmn->restart;
a6810074
DL
328 }
329 return NULL;
8b886ca7 330}
331
a6810074 332static void sigchild(void)
8b886ca7 333{
a6810074
DL
334 pid_t child;
335 int status;
336 const char *name;
337 const char *what;
338 struct restart_info *restart;
75f8b0e4 339 struct daemon *dmn;
a6810074
DL
340
341 switch (child = waitpid(-1, &status, WNOHANG)) {
342 case -1:
450971aa 343 flog_err_sys(EC_LIB_SYSTEM_CALL, "waitpid failed: %s",
09c866e3 344 safe_strerror(errno));
a6810074
DL
345 return;
346 case 0:
347 zlog_warn("SIGCHLD received, but waitpid did not reap a child");
348 return;
349 }
350
351 if (child == integrated_write_pid) {
352 integrated_write_sigchld(status);
353 return;
354 }
355
356 if ((restart = find_child(child)) != NULL) {
357 name = restart->name;
358 what = restart->what;
359 restart->pid = 0;
360 gs.numpids--;
361 thread_cancel(restart->t_kill);
362 restart->t_kill = NULL;
d62a17ae 363 /* Update restart time to reflect the time the command
364 * completed. */
a6810074
DL
365 gettimeofday(&restart->time, NULL);
366 } else {
09c866e3 367 flog_err_sys(
450971aa 368 EC_LIB_SYSTEM_CALL,
09c866e3
QY
369 "waitpid returned status for an unknown child process %d",
370 (int)child);
a6810074
DL
371 name = "(unknown)";
372 what = "background";
373 }
374 if (WIFSTOPPED(status))
d62a17ae 375 zlog_warn("warning: %s %s process %d is stopped", what, name,
376 (int)child);
a6810074 377 else if (WIFSIGNALED(status))
d62a17ae 378 zlog_warn("%s %s process %d terminated due to signal %d", what,
379 name, (int)child, WTERMSIG(status));
a6810074
DL
380 else if (WIFEXITED(status)) {
381 if (WEXITSTATUS(status) != 0)
d62a17ae 382 zlog_warn(
383 "%s %s process %d exited with non-zero status %d",
384 what, name, (int)child, WEXITSTATUS(status));
75f8b0e4 385 else {
a6810074
DL
386 zlog_debug("%s %s process %d exited normally", what,
387 name, (int)child);
75f8b0e4
DL
388
389 if (restart && restart != &gs.restart) {
390 dmn = container_of(restart, struct daemon,
391 restart);
392 restart_done(dmn);
393 } else if (restart)
394 for (dmn = gs.daemons; dmn; dmn = dmn->next)
395 restart_done(dmn);
396 }
a6810074 397 } else
09c866e3 398 flog_err_sys(
450971aa 399 EC_LIB_SYSTEM_CALL,
09c866e3
QY
400 "cannot interpret %s %s process %d wait status 0x%x",
401 what, name, (int)child, status);
a6810074 402 phase_check();
8b886ca7 403}
404
d62a17ae 405static int run_job(struct restart_info *restart, const char *cmdtype,
406 const char *command, int force, int update_interval)
8b886ca7 407{
a6810074
DL
408 struct timeval delay;
409
410 if (gs.loglevel > LOG_DEBUG + 1)
411 zlog_debug("attempting to %s %s", cmdtype, restart->name);
412
413 if (restart->pid) {
414 if (gs.loglevel > LOG_DEBUG + 1)
d62a17ae 415 zlog_debug(
416 "cannot %s %s, previous pid %d still running",
417 cmdtype, restart->name, (int)restart->pid);
a6810074
DL
418 return -1;
419 }
420
d62a17ae 421 /* Note: time_elapsed test must come before the force test, since we
422 need
a6810074
DL
423 to make sure that delay is initialized for use below in updating the
424 restart interval. */
425 if ((time_elapsed(&delay, &restart->time)->tv_sec < restart->interval)
426 && !force) {
427 if (gs.loglevel > LOG_DEBUG + 1)
d62a17ae 428 zlog_debug(
429 "postponing %s %s: "
430 "elapsed time %ld < retry interval %ld",
431 cmdtype, restart->name, (long)delay.tv_sec,
432 restart->interval);
a6810074
DL
433 return -1;
434 }
435
436 gettimeofday(&restart->time, NULL);
437 restart->kills = 0;
438 {
439 char cmd[strlen(command) + strlen(restart->name) + 1];
440 snprintf(cmd, sizeof(cmd), command, restart->name);
441 if ((restart->pid = run_background(cmd)) > 0) {
66e78ae6 442 restart->t_kill = NULL;
d62a17ae 443 thread_add_timer(master, restart_kill, restart,
444 gs.restart_timeout, &restart->t_kill);
a6810074
DL
445 restart->what = cmdtype;
446 gs.numpids++;
447 } else
448 restart->pid = 0;
449 }
450
451 /* Calculate the new restart interval. */
452 if (update_interval) {
453 if (delay.tv_sec > 2 * gs.max_restart_interval)
454 restart->interval = gs.min_restart_interval;
455 else if ((restart->interval *= 2) > gs.max_restart_interval)
456 restart->interval = gs.max_restart_interval;
457 if (gs.loglevel > LOG_DEBUG + 1)
458 zlog_debug("restart %s interval is now %ld",
459 restart->name, restart->interval);
460 }
461 return restart->pid;
8b886ca7 462}
463
d62a17ae 464#define SET_READ_HANDLER(DMN) \
465 do { \
466 (DMN)->t_read = NULL; \
467 thread_add_read(master, handle_read, (DMN), (DMN)->fd, \
468 &(DMN)->t_read); \
469 } while (0);
470
471#define SET_WAKEUP_DOWN(DMN) \
472 do { \
473 (DMN)->t_wakeup = NULL; \
474 thread_add_timer_msec(master, wakeup_down, (DMN), \
475 FUZZY(gs.period), &(DMN)->t_wakeup); \
476 } while (0);
477
478#define SET_WAKEUP_UNRESPONSIVE(DMN) \
479 do { \
480 (DMN)->t_wakeup = NULL; \
481 thread_add_timer_msec(master, wakeup_unresponsive, (DMN), \
482 FUZZY(gs.period), &(DMN)->t_wakeup); \
483 } while (0);
484
485#define SET_WAKEUP_ECHO(DMN) \
486 do { \
487 (DMN)->t_wakeup = NULL; \
488 thread_add_timer_msec(master, wakeup_send_echo, (DMN), \
489 FUZZY(gs.period), &(DMN)->t_wakeup); \
490 } while (0);
8b886ca7 491
a6810074 492static int wakeup_down(struct thread *t_wakeup)
8b886ca7 493{
a6810074
DL
494 struct daemon *dmn = THREAD_ARG(t_wakeup);
495
496 dmn->t_wakeup = NULL;
497 if (try_connect(dmn) < 0)
498 SET_WAKEUP_DOWN(dmn);
499 if ((dmn->connect_tries > 1) && (dmn->state != DAEMON_UP))
500 try_restart(dmn);
501 return 0;
8b886ca7 502}
503
a6810074 504static int wakeup_init(struct thread *t_wakeup)
8b886ca7 505{
a6810074
DL
506 struct daemon *dmn = THREAD_ARG(t_wakeup);
507
508 dmn->t_wakeup = NULL;
509 if (try_connect(dmn) < 0) {
f74ae2bb 510 flog_err(EC_WATCHFRR_CONNECTION,
1c50c1c0
QY
511 "%s state -> down : initial connection attempt failed",
512 dmn->name);
a6810074
DL
513 dmn->state = DAEMON_DOWN;
514 }
c0e5cb52 515 phase_check();
a6810074 516 return 0;
8b886ca7 517}
518
75f8b0e4
DL
519static void restart_done(struct daemon *dmn)
520{
521 if (dmn->state != DAEMON_DOWN) {
522 zlog_warn("wtf?");
523 return;
524 }
525 if (dmn->t_wakeup)
526 THREAD_OFF(dmn->t_wakeup);
527 if (try_connect(dmn) < 0)
528 SET_WAKEUP_DOWN(dmn);
529}
530
a6810074 531static void daemon_down(struct daemon *dmn, const char *why)
8b886ca7 532{
a6810074 533 if (IS_UP(dmn) || (dmn->state == DAEMON_INIT))
1c50c1c0
QY
534 flog_err(EC_WATCHFRR_CONNECTION, "%s state -> down : %s",
535 dmn->name, why);
a6810074
DL
536 else if (gs.loglevel > LOG_DEBUG)
537 zlog_debug("%s still down : %s", dmn->name, why);
538 if (IS_UP(dmn))
539 gs.numdown++;
540 dmn->state = DAEMON_DOWN;
541 if (dmn->fd >= 0) {
542 close(dmn->fd);
543 dmn->fd = -1;
544 }
545 THREAD_OFF(dmn->t_read);
546 THREAD_OFF(dmn->t_write);
547 THREAD_OFF(dmn->t_wakeup);
548 if (try_connect(dmn) < 0)
549 SET_WAKEUP_DOWN(dmn);
550 phase_check();
8b886ca7 551}
552
a6810074 553static int handle_read(struct thread *t_read)
8b886ca7 554{
a6810074
DL
555 struct daemon *dmn = THREAD_ARG(t_read);
556 static const char resp[sizeof(PING_TOKEN) + 4] = PING_TOKEN "\n";
557 char buf[sizeof(resp) + 100];
558 ssize_t rc;
559 struct timeval delay;
560
561 dmn->t_read = NULL;
562 if ((rc = read(dmn->fd, buf, sizeof(buf))) < 0) {
563 char why[100];
564
565 if (ERRNO_IO_RETRY(errno)) {
566 /* Pretend it never happened. */
567 SET_READ_HANDLER(dmn);
568 return 0;
569 }
570 snprintf(why, sizeof(why), "unexpected read error: %s",
571 safe_strerror(errno));
572 daemon_down(dmn, why);
573 return 0;
8b886ca7 574 }
a6810074
DL
575 if (rc == 0) {
576 daemon_down(dmn, "read returned EOF");
577 return 0;
578 }
579 if (!dmn->echo_sent.tv_sec) {
580 char why[sizeof(buf) + 100];
581 snprintf(why, sizeof(why),
582 "unexpected read returns %d bytes: %.*s", (int)rc,
583 (int)rc, buf);
584 daemon_down(dmn, why);
585 return 0;
8b886ca7 586 }
a6810074
DL
587
588 /* We are expecting an echo response: is there any chance that the
589 response would not be returned entirely in the first read? That
590 seems inconceivable... */
591 if ((rc != sizeof(resp)) || memcmp(buf, resp, sizeof(resp))) {
592 char why[100 + sizeof(buf)];
593 snprintf(why, sizeof(why),
594 "read returned bad echo response of %d bytes "
d62a17ae 595 "(expecting %u): %.*s",
d7c0a89a 596 (int)rc, (unsigned int)sizeof(resp), (int)rc, buf);
a6810074
DL
597 daemon_down(dmn, why);
598 return 0;
599 }
600
601 time_elapsed(&delay, &dmn->echo_sent);
602 dmn->echo_sent.tv_sec = 0;
603 if (dmn->state == DAEMON_UNRESPONSIVE) {
604 if (delay.tv_sec < gs.timeout) {
605 dmn->state = DAEMON_UP;
d62a17ae 606 zlog_warn(
607 "%s state -> up : echo response received after %ld.%06ld "
608 "seconds",
609 dmn->name, (long)delay.tv_sec,
610 (long)delay.tv_usec);
a6810074 611 } else
d62a17ae 612 zlog_warn(
613 "%s: slow echo response finally received after %ld.%06ld "
614 "seconds",
615 dmn->name, (long)delay.tv_sec,
616 (long)delay.tv_usec);
a6810074
DL
617 } else if (gs.loglevel > LOG_DEBUG + 1)
618 zlog_debug("%s: echo response received after %ld.%06ld seconds",
619 dmn->name, (long)delay.tv_sec, (long)delay.tv_usec);
620
621 SET_READ_HANDLER(dmn);
622 if (dmn->t_wakeup)
623 thread_cancel(dmn->t_wakeup);
624 SET_WAKEUP_ECHO(dmn);
625
626 return 0;
8b886ca7 627}
628
207e0d7a
DS
629/*
630 * Wait till we notice that all daemons are ready before
631 * we send we are ready to systemd
632 */
a6810074 633static void daemon_send_ready(void)
207e0d7a 634{
a6810074
DL
635 static int sent = 0;
636 if (!sent && gs.numdown == 0) {
a6810074 637 FILE *fp;
207e0d7a 638
0a7c7856
DL
639 zlog_notice("all daemons up, doing startup-complete notify");
640 frr_detach();
641
a6810074 642 fp = fopen(DAEMON_VTY_DIR "/watchfrr.started", "w");
f5ba21fc
DS
643 if (fp)
644 fclose(fp);
60bd2534 645#if defined HAVE_SYSTEMD
a6810074 646 systemd_send_started(master, 0);
60bd2534 647#endif
a6810074
DL
648 sent = 1;
649 }
207e0d7a
DS
650}
651
a6810074 652static void daemon_up(struct daemon *dmn, const char *why)
8b886ca7 653{
a6810074
DL
654 dmn->state = DAEMON_UP;
655 gs.numdown--;
656 dmn->connect_tries = 0;
657 zlog_notice("%s state -> up : %s", dmn->name, why);
658 daemon_send_ready();
a8cbb8b3 659 SET_WAKEUP_ECHO(dmn);
a6810074 660 phase_check();
8b886ca7 661}
662
a6810074 663static int check_connect(struct thread *t_write)
8b886ca7 664{
a6810074
DL
665 struct daemon *dmn = THREAD_ARG(t_write);
666 int sockerr;
667 socklen_t reslen = sizeof(sockerr);
668
669 dmn->t_write = NULL;
670 if (getsockopt(dmn->fd, SOL_SOCKET, SO_ERROR, (char *)&sockerr, &reslen)
671 < 0) {
672 zlog_warn("%s: check_connect: getsockopt failed: %s", dmn->name,
673 safe_strerror(errno));
674 daemon_down(dmn,
675 "getsockopt failed checking connection success");
676 return 0;
677 }
678 if ((reslen == sizeof(sockerr)) && sockerr) {
679 char why[100];
d62a17ae 680 snprintf(
681 why, sizeof(why),
682 "getsockopt reports that connection attempt failed: %s",
683 safe_strerror(sockerr));
a6810074
DL
684 daemon_down(dmn, why);
685 return 0;
686 }
687
688 daemon_up(dmn, "delayed connect succeeded");
689 return 0;
8b886ca7 690}
691
a6810074 692static int wakeup_connect_hanging(struct thread *t_wakeup)
8b886ca7 693{
a6810074
DL
694 struct daemon *dmn = THREAD_ARG(t_wakeup);
695 char why[100];
696
697 dmn->t_wakeup = NULL;
698 snprintf(why, sizeof(why),
699 "connection attempt timed out after %ld seconds", gs.timeout);
700 daemon_down(dmn, why);
701 return 0;
8b886ca7 702}
703
704/* Making connection to protocol daemon. */
a6810074 705static int try_connect(struct daemon *dmn)
8b886ca7 706{
a6810074
DL
707 int sock;
708 struct sockaddr_un addr;
709 socklen_t len;
710
711 if (gs.loglevel > LOG_DEBUG + 1)
712 zlog_debug("%s: attempting to connect", dmn->name);
713 dmn->connect_tries++;
714
715 memset(&addr, 0, sizeof(struct sockaddr_un));
716 addr.sun_family = AF_UNIX;
d62a17ae 717 snprintf(addr.sun_path, sizeof(addr.sun_path), "%s/%s.vty", gs.vtydir,
718 dmn->name);
6f0e3f6e 719#ifdef HAVE_STRUCT_SOCKADDR_UN_SUN_LEN
a6810074 720 len = addr.sun_len = SUN_LEN(&addr);
8b886ca7 721#else
a6810074 722 len = sizeof(addr.sun_family) + strlen(addr.sun_path);
d62a17ae 723#endif /* HAVE_STRUCT_SOCKADDR_UN_SUN_LEN */
a6810074
DL
724
725 /* Quick check to see if we might succeed before we go to the trouble
726 of creating a socket. */
727 if (access(addr.sun_path, W_OK) < 0) {
728 if (errno != ENOENT)
450971aa 729 flog_err_sys(EC_LIB_SYSTEM_CALL,
09c866e3
QY
730 "%s: access to socket %s denied: %s",
731 dmn->name, addr.sun_path,
732 safe_strerror(errno));
a6810074
DL
733 return -1;
734 }
735
736 if ((sock = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) {
450971aa 737 flog_err_sys(EC_LIB_SOCKET, "%s(%s): cannot make socket: %s",
09c866e3 738 __func__, addr.sun_path, safe_strerror(errno));
a6810074
DL
739 return -1;
740 }
741
742 if (set_nonblocking(sock) < 0 || set_cloexec(sock) < 0) {
450971aa 743 flog_err_sys(EC_LIB_SYSTEM_CALL,
09c866e3
QY
744 "%s(%s): set_nonblocking/cloexec(%d) failed",
745 __func__, addr.sun_path, sock);
a6810074
DL
746 close(sock);
747 return -1;
8b886ca7 748 }
a6810074
DL
749
750 if (connect(sock, (struct sockaddr *)&addr, len) < 0) {
751 if ((errno != EINPROGRESS) && (errno != EWOULDBLOCK)) {
752 if (gs.loglevel > LOG_DEBUG)
753 zlog_debug("%s(%s): connect failed: %s",
754 __func__, addr.sun_path,
755 safe_strerror(errno));
756 close(sock);
757 return -1;
758 }
759 if (gs.loglevel > LOG_DEBUG)
760 zlog_debug("%s: connection in progress", dmn->name);
761 dmn->state = DAEMON_CONNECTING;
762 dmn->fd = sock;
66e78ae6
QY
763 dmn->t_write = NULL;
764 thread_add_write(master, check_connect, dmn, dmn->fd,
d62a17ae 765 &dmn->t_write);
766 dmn->t_wakeup = NULL;
767 thread_add_timer(master, wakeup_connect_hanging, dmn,
768 gs.timeout, &dmn->t_wakeup);
a6810074
DL
769 SET_READ_HANDLER(dmn);
770 return 0;
771 }
772
773 dmn->fd = sock;
774 SET_READ_HANDLER(dmn);
775 daemon_up(dmn, "connect succeeded");
776 return 1;
8b886ca7 777}
778
a6810074 779static int phase_hanging(struct thread *t_hanging)
8b886ca7 780{
a6810074 781 gs.t_phase_hanging = NULL;
f74ae2bb 782 flog_err(EC_WATCHFRR_CONNECTION,
1c50c1c0
QY
783 "Phase [%s] hanging for %ld seconds, aborting phased restart",
784 phase_str[gs.phase], PHASE_TIMEOUT);
a6810074
DL
785 gs.phase = PHASE_NONE;
786 return 0;
8b886ca7 787}
788
a6810074 789static void set_phase(restart_phase_t new_phase)
8b886ca7 790{
a6810074
DL
791 gs.phase = new_phase;
792 if (gs.t_phase_hanging)
793 thread_cancel(gs.t_phase_hanging);
66e78ae6
QY
794 gs.t_phase_hanging = NULL;
795 thread_add_timer(master, phase_hanging, NULL, PHASE_TIMEOUT,
796 &gs.t_phase_hanging);
8b886ca7 797}
798
a6810074 799static void phase_check(void)
8b886ca7 800{
c0e5cb52
DL
801 struct daemon *dmn;
802
a6810074
DL
803 switch (gs.phase) {
804 case PHASE_NONE:
805 break;
c0e5cb52
DL
806
807 case PHASE_INIT:
808 for (dmn = gs.daemons; dmn; dmn = dmn->next)
809 if (dmn->state == DAEMON_INIT)
810 return;
811
812 /* startup complete, everything out of INIT */
813 gs.phase = PHASE_NONE;
814 for (dmn = gs.daemons; dmn; dmn = dmn->next)
815 if (dmn->state == DAEMON_DOWN) {
816 SET_WAKEUP_DOWN(dmn);
817 try_restart(dmn);
818 }
819 break;
a6810074
DL
820 case PHASE_STOPS_PENDING:
821 if (gs.numpids)
822 break;
d62a17ae 823 zlog_info(
824 "Phased restart: all routing daemon stop jobs have completed.");
a6810074
DL
825 set_phase(PHASE_WAITING_DOWN);
826
d62a17ae 827 /*FALLTHRU*/
a6810074
DL
828 case PHASE_WAITING_DOWN:
829 if (gs.numdown + IS_UP(gs.special) < gs.numdaemons)
830 break;
831 zlog_info("Phased restart: all routing daemons now down.");
832 run_job(&gs.special->restart, "restart", gs.restart_command, 1,
833 1);
834 set_phase(PHASE_ZEBRA_RESTART_PENDING);
835
d62a17ae 836 /*FALLTHRU*/
a6810074
DL
837 case PHASE_ZEBRA_RESTART_PENDING:
838 if (gs.special->restart.pid)
839 break;
840 zlog_info("Phased restart: %s restart job completed.",
841 gs.special->name);
842 set_phase(PHASE_WAITING_ZEBRA_UP);
843
d62a17ae 844 /*FALLTHRU*/
a6810074
DL
845 case PHASE_WAITING_ZEBRA_UP:
846 if (!IS_UP(gs.special))
847 break;
848 zlog_info("Phased restart: %s is now up.", gs.special->name);
849 {
850 struct daemon *dmn;
851 for (dmn = gs.daemons; dmn; dmn = dmn->next) {
852 if (dmn != gs.special)
853 run_job(&dmn->restart, "start",
854 gs.start_command, 1, 0);
855 }
856 }
857 gs.phase = PHASE_NONE;
858 THREAD_OFF(gs.t_phase_hanging);
859 zlog_notice("Phased global restart has completed.");
860 break;
861 }
8b886ca7 862}
863
a6810074 864static void try_restart(struct daemon *dmn)
8b886ca7 865{
f168b713 866 if (watch_only)
a6810074 867 return;
a6810074 868
f168b713
DL
869 if (dmn != gs.special) {
870 if ((gs.special->state == DAEMON_UP)
871 && (gs.phase == PHASE_NONE))
872 run_job(&dmn->restart, "restart", gs.restart_command, 0,
873 1);
874 else
875 zlog_debug(
876 "%s: postponing restart attempt because master %s daemon "
877 "not up [%s], or phased restart in progress",
878 dmn->name, gs.special->name,
879 state_str[gs.special->state]);
880 return;
881 }
882
883 if ((gs.phase != PHASE_NONE) || gs.numpids) {
884 if (gs.loglevel > LOG_DEBUG + 1)
885 zlog_debug(
886 "postponing phased global restart: restart already in "
887 "progress [%s], or outstanding child processes [%d]",
888 phase_str[gs.phase], gs.numpids);
889 return;
890 }
891 /* Is it too soon for a restart? */
892 {
893 struct timeval delay;
894 if (time_elapsed(&delay, &gs.special->restart.time)->tv_sec
895 < gs.special->restart.interval) {
a6810074 896 if (gs.loglevel > LOG_DEBUG + 1)
d62a17ae 897 zlog_debug(
f168b713
DL
898 "postponing phased global restart: "
899 "elapsed time %ld < retry interval %ld",
900 (long)delay.tv_sec,
901 gs.special->restart.interval);
902 return;
a6810074 903 }
8b886ca7 904 }
f168b713 905 run_job(&gs.restart, "restart", gs.restart_command, 0, 1);
8b886ca7 906}
907
a6810074 908static int wakeup_unresponsive(struct thread *t_wakeup)
8b886ca7 909{
a6810074
DL
910 struct daemon *dmn = THREAD_ARG(t_wakeup);
911
912 dmn->t_wakeup = NULL;
913 if (dmn->state != DAEMON_UNRESPONSIVE)
f74ae2bb 914 flog_err(EC_WATCHFRR_CONNECTION,
1c50c1c0
QY
915 "%s: no longer unresponsive (now %s), "
916 "wakeup should have been cancelled!",
917 dmn->name, state_str[dmn->state]);
a6810074
DL
918 else {
919 SET_WAKEUP_UNRESPONSIVE(dmn);
920 try_restart(dmn);
921 }
922 return 0;
8b886ca7 923}
924
a6810074 925static int wakeup_no_answer(struct thread *t_wakeup)
8b886ca7 926{
a6810074
DL
927 struct daemon *dmn = THREAD_ARG(t_wakeup);
928
929 dmn->t_wakeup = NULL;
930 dmn->state = DAEMON_UNRESPONSIVE;
f74ae2bb 931 flog_err(EC_WATCHFRR_CONNECTION,
1c50c1c0
QY
932 "%s state -> unresponsive : no response yet to ping "
933 "sent %ld seconds ago",
934 dmn->name, gs.timeout);
71e7975a
DL
935 SET_WAKEUP_UNRESPONSIVE(dmn);
936 try_restart(dmn);
a6810074 937 return 0;
8b886ca7 938}
939
a6810074 940static int wakeup_send_echo(struct thread *t_wakeup)
8b886ca7 941{
a6810074
DL
942 static const char echocmd[] = "echo " PING_TOKEN;
943 ssize_t rc;
944 struct daemon *dmn = THREAD_ARG(t_wakeup);
945
946 dmn->t_wakeup = NULL;
d62a17ae 947 if (((rc = write(dmn->fd, echocmd, sizeof(echocmd))) < 0)
948 || ((size_t)rc != sizeof(echocmd))) {
a6810074
DL
949 char why[100 + sizeof(echocmd)];
950 snprintf(why, sizeof(why),
951 "write '%s' returned %d instead of %u", echocmd,
d7c0a89a 952 (int)rc, (unsigned int)sizeof(echocmd));
a6810074
DL
953 daemon_down(dmn, why);
954 } else {
955 gettimeofday(&dmn->echo_sent, NULL);
66e78ae6
QY
956 dmn->t_wakeup = NULL;
957 thread_add_timer(master, wakeup_no_answer, dmn, gs.timeout,
958 &dmn->t_wakeup);
a6810074
DL
959 }
960 return 0;
8b886ca7 961}
962
470bc619
QY
963bool check_all_up(void)
964{
965 struct daemon *dmn;
966
967 for (dmn = gs.daemons; dmn; dmn = dmn->next)
968 if (dmn->state != DAEMON_UP)
969 return false;
970 return true;
971}
972
af568444
DL
973void watchfrr_status(struct vty *vty)
974{
975 struct daemon *dmn;
976 struct timeval delay;
977
978 vty_out(vty, "watchfrr global phase: %s\n", phase_str[gs.phase]);
979 if (gs.restart.pid)
980 vty_out(vty, " global restart running, pid %ld\n",
981 (long)gs.restart.pid);
982
983 for (dmn = gs.daemons; dmn; dmn = dmn->next) {
984 vty_out(vty, " %-20s %s\n", dmn->name, state_str[dmn->state]);
985 if (dmn->restart.pid)
986 vty_out(vty, " restart running, pid %ld\n",
987 (long)dmn->restart.pid);
988 else if (dmn->state == DAEMON_DOWN &&
989 time_elapsed(&delay, &dmn->restart.time)->tv_sec
990 < dmn->restart.interval)
991 vty_out(vty, " restarting in %ld seconds"
992 " (%lds backoff interval)\n",
993 dmn->restart.interval - delay.tv_sec,
994 dmn->restart.interval);
995 }
996}
997
a6810074 998static void sigint(void)
8b886ca7 999{
a6810074
DL
1000 zlog_notice("Terminating on signal");
1001 systemd_send_stopping();
1002 exit(0);
8b886ca7 1003}
1004
a6810074 1005static int valid_command(const char *cmd)
8b886ca7 1006{
a6810074 1007 char *p;
8b886ca7 1008
a6810074 1009 return ((p = strchr(cmd, '%')) != NULL) && (*(p + 1) == 's')
d62a17ae 1010 && !strchr(p + 1, '%');
8b886ca7 1011}
1012
c8b40f86 1013/* This is an ugly hack to circumvent problems with passing command-line
1014 arguments that contain spaces. The fix is to use a configuration file. */
a6810074 1015static char *translate_blanks(const char *cmd, const char *blankstr)
c8b40f86 1016{
a6810074
DL
1017 char *res;
1018 char *p;
1019 size_t bslen = strlen(blankstr);
1020
1021 if (!(res = strdup(cmd))) {
1022 perror("strdup");
1023 exit(1);
1024 }
1025 while ((p = strstr(res, blankstr)) != NULL) {
1026 *p = ' ';
1027 if (bslen != 1)
1028 memmove(p + 1, p + bslen, strlen(p + bslen) + 1);
1029 }
1030 return res;
c8b40f86 1031}
1032
0a7c7856
DL
1033static void watchfrr_init(int argc, char **argv)
1034{
1035 const char *special = "zebra";
1036 int i;
1037 struct daemon *dmn, **add = &gs.daemons;
1038 char alldaemons[512] = "", *p = alldaemons;
1039
1040 for (i = optind; i < argc; i++) {
1041 dmn = XCALLOC(MTYPE_WATCHFRR_DAEMON, sizeof(*dmn));
1042
1043 dmn->name = dmn->restart.name = argv[i];
1044 dmn->state = DAEMON_INIT;
1045 gs.numdaemons++;
1046 gs.numdown++;
1047 dmn->fd = -1;
1048 dmn->t_wakeup = NULL;
c0e5cb52 1049 thread_add_timer_msec(master, wakeup_init, dmn, 0,
0a7c7856
DL
1050 &dmn->t_wakeup);
1051 dmn->restart.interval = gs.min_restart_interval;
1052 *add = dmn;
1053 add = &dmn->next;
1054
1055 if (!strcmp(dmn->name, special))
1056 gs.special = dmn;
1057 }
1058
1059 if (!gs.daemons) {
1060 fprintf(stderr,
1061 "Must specify one or more daemons to monitor.\n\n");
1062 frr_help_exit(1);
1063 }
1064 if (!watch_only && !gs.special) {
1065 fprintf(stderr, "\"%s\" daemon must be in daemon lists\n\n",
1066 special);
1067 frr_help_exit(1);
1068 }
1069
1070 for (dmn = gs.daemons; dmn; dmn = dmn->next) {
1071 snprintf(p, alldaemons + sizeof(alldaemons) - p, "%s%s",
1072 (p == alldaemons) ? "" : " ", dmn->name);
1073 p += strlen(p);
1074 }
1075 zlog_notice("%s %s watching [%s]%s", progname, FRR_VERSION, alldaemons,
1076 watch_only ? ", monitor mode" : "");
1077}
1078
a6810074 1079struct zebra_privs_t watchfrr_privs = {
95c4aff2 1080#ifdef VTY_GROUP
a6810074 1081 .vty_group = VTY_GROUP,
95c4aff2
DL
1082#endif
1083};
1084
4f04a76b
DL
1085static struct quagga_signal_t watchfrr_signals[] = {
1086 {
1087 .signal = SIGINT,
1088 .handler = sigint,
1089 },
1090 {
1091 .signal = SIGTERM,
1092 .handler = sigint,
1093 },
1094 {
1095 .signal = SIGCHLD,
1096 .handler = sigchild,
1097 },
1098};
1099
1100FRR_DAEMON_INFO(watchfrr, WATCHFRR,
d62a17ae 1101 .flags = FRR_NO_PRIVSEP | FRR_NO_TCPVTY | FRR_LIMITED_CLI
0a7c7856
DL
1102 | FRR_NO_CFG_PID_DRY | FRR_NO_ZCLIENT
1103 | FRR_DETACH_LATER,
4f04a76b 1104
d62a17ae 1105 .printhelp = printhelp,
1106 .copyright = "Copyright 2004 Andrew J. Schorr",
4f04a76b 1107
d62a17ae 1108 .signals = watchfrr_signals,
1109 .n_signals = array_size(watchfrr_signals),
4f04a76b 1110
d62a17ae 1111 .privs = &watchfrr_privs, )
4f04a76b 1112
999f153e
DL
1113#define DEPRECATED_OPTIONS "aAezR:"
1114
a6810074 1115int main(int argc, char **argv)
8b886ca7 1116{
a6810074 1117 int opt;
a6810074 1118 const char *blankstr = NULL;
a6810074 1119
4f04a76b
DL
1120 frr_preinit(&watchfrr_di, argc, argv);
1121 progname = watchfrr_di.progname;
1122
999f153e 1123 frr_opt_add("b:dk:l:i:p:r:S:s:t:T:" DEPRECATED_OPTIONS, longopts, "");
a6810074
DL
1124
1125 gs.restart.name = "all";
4f04a76b 1126 while ((opt = frr_getopt(argc, argv, NULL)) != EOF) {
999f153e
DL
1127 if (opt && opt < 128 && strchr(DEPRECATED_OPTIONS, opt)) {
1128 fprintf(stderr,
1129 "The -%c option no longer exists.\n"
1130 "Please refer to the watchfrr(8) man page.\n",
1131 opt);
1132 exit(1);
1133 }
1134
a6810074
DL
1135 switch (opt) {
1136 case 0:
1137 break;
a6810074
DL
1138 case 'b':
1139 blankstr = optarg;
1140 break;
f168b713
DL
1141 case OPTION_DRY:
1142 watch_only = true;
a6810074
DL
1143 break;
1144 case 'k':
1145 if (!valid_command(optarg)) {
1146 fprintf(stderr,
1147 "Invalid kill command, must contain '%%s': %s\n",
1148 optarg);
4f04a76b 1149 frr_help_exit(1);
a6810074
DL
1150 }
1151 gs.stop_command = optarg;
1152 break;
d62a17ae 1153 case 'l': {
1154 char garbage[3];
1155 if ((sscanf(optarg, "%d%1s", &gs.loglevel, garbage)
1156 != 1)
1157 || (gs.loglevel < LOG_EMERG)) {
1158 fprintf(stderr,
1159 "Invalid loglevel argument: %s\n",
1160 optarg);
1161 frr_help_exit(1);
a6810074 1162 }
d62a17ae 1163 } break;
1164 case OPTION_MINRESTART: {
1165 char garbage[3];
1166 if ((sscanf(optarg, "%ld%1s", &gs.min_restart_interval,
1167 garbage)
1168 != 1)
1169 || (gs.min_restart_interval < 0)) {
1170 fprintf(stderr,
1171 "Invalid min_restart_interval argument: %s\n",
1172 optarg);
1173 frr_help_exit(1);
a6810074 1174 }
d62a17ae 1175 } break;
1176 case OPTION_MAXRESTART: {
1177 char garbage[3];
1178 if ((sscanf(optarg, "%ld%1s", &gs.max_restart_interval,
1179 garbage)
1180 != 1)
1181 || (gs.max_restart_interval < 0)) {
1182 fprintf(stderr,
1183 "Invalid max_restart_interval argument: %s\n",
1184 optarg);
1185 frr_help_exit(1);
a6810074 1186 }
d62a17ae 1187 } break;
1188 case 'i': {
1189 char garbage[3];
1190 int period;
1191 if ((sscanf(optarg, "%d%1s", &period, garbage) != 1)
1192 || (gs.period < 1)) {
1193 fprintf(stderr,
1194 "Invalid interval argument: %s\n",
1195 optarg);
1196 frr_help_exit(1);
a6810074 1197 }
d62a17ae 1198 gs.period = 1000 * period;
1199 } break;
a6810074 1200 case 'p':
0a7c7856 1201 watchfrr_di.pid_file = optarg;
a6810074
DL
1202 break;
1203 case 'r':
a6810074
DL
1204 if (!valid_command(optarg)) {
1205 fprintf(stderr,
1206 "Invalid restart command, must contain '%%s': %s\n",
1207 optarg);
4f04a76b 1208 frr_help_exit(1);
a6810074
DL
1209 }
1210 gs.restart_command = optarg;
a6810074
DL
1211 break;
1212 case 's':
1213 if (!valid_command(optarg)) {
1214 fprintf(stderr,
1215 "Invalid start command, must contain '%%s': %s\n",
1216 optarg);
4f04a76b 1217 frr_help_exit(1);
a6810074
DL
1218 }
1219 gs.start_command = optarg;
1220 break;
1221 case 'S':
1222 gs.vtydir = optarg;
1223 break;
d62a17ae 1224 case 't': {
1225 char garbage[3];
1226 if ((sscanf(optarg, "%ld%1s", &gs.timeout, garbage)
1227 != 1)
1228 || (gs.timeout < 1)) {
1229 fprintf(stderr,
1230 "Invalid timeout argument: %s\n",
1231 optarg);
1232 frr_help_exit(1);
a6810074 1233 }
d62a17ae 1234 } break;
1235 case 'T': {
1236 char garbage[3];
1237 if ((sscanf(optarg, "%ld%1s", &gs.restart_timeout,
1238 garbage)
1239 != 1)
1240 || (gs.restart_timeout < 1)) {
1241 fprintf(stderr,
1242 "Invalid restart timeout argument: %s\n",
1243 optarg);
1244 frr_help_exit(1);
a6810074 1245 }
d62a17ae 1246 } break;
a6810074
DL
1247 default:
1248 fputs("Invalid option.\n", stderr);
4f04a76b 1249 frr_help_exit(1);
a6810074 1250 }
8b886ca7 1251 }
a6810074 1252
71e7975a
DL
1253 if (watch_only
1254 && (gs.start_command || gs.stop_command || gs.restart_command)) {
d87ae5cc 1255 fputs("Options -r/-s/-k are not used when --dry is active.\n",
a6810074 1256 stderr);
8b886ca7 1257 }
f168b713
DL
1258 if (!watch_only
1259 && (!gs.restart_command || !gs.start_command || !gs.stop_command)) {
1260 fprintf(stderr,
1261 "Options -s (start), -k (kill), and -r (restart) are required.\n");
1262 frr_help_exit(1);
8b886ca7 1263 }
8b886ca7 1264
a6810074
DL
1265 if (blankstr) {
1266 if (gs.restart_command)
1267 gs.restart_command =
d62a17ae 1268 translate_blanks(gs.restart_command, blankstr);
a6810074
DL
1269 if (gs.start_command)
1270 gs.start_command =
d62a17ae 1271 translate_blanks(gs.start_command, blankstr);
a6810074
DL
1272 if (gs.stop_command)
1273 gs.stop_command =
d62a17ae 1274 translate_blanks(gs.stop_command, blankstr);
065de903 1275 }
8b886ca7 1276
a6810074 1277 gs.restart.interval = gs.min_restart_interval;
8b886ca7 1278
4f04a76b 1279 master = frr_init();
b647dc2a 1280 watchfrr_error_init();
0a7c7856
DL
1281 watchfrr_init(argc, argv);
1282 watchfrr_vty_init();
1283
1284 frr_config_fork();
4f04a76b 1285
dd8376fe 1286 zlog_set_level(ZLOG_DEST_MONITOR, ZLOG_DISABLED);
0a7c7856 1287 if (watchfrr_di.daemon_mode)
dd8376fe 1288 zlog_set_level(ZLOG_DEST_SYSLOG, MIN(gs.loglevel, LOG_DEBUG));
0a7c7856 1289 else
dd8376fe 1290 zlog_set_level(ZLOG_DEST_STDOUT, MIN(gs.loglevel, LOG_DEBUG));
8b886ca7 1291
0a7c7856 1292 frr_run(master);
8b886ca7 1293
a6810074
DL
1294 systemd_send_stopping();
1295 /* Not reached. */
1296 return 0;
8b886ca7 1297}