]> git.proxmox.com Git - mirror_frr.git/blame - watchfrr/watchfrr.c
watchfrr: remove abundance of modes
[mirror_frr.git] / watchfrr / watchfrr.c
CommitLineData
8b886ca7 1/*
896014f4
DL
2 * Monitor status of frr daemons and restart if necessary.
3 *
4 * Copyright (C) 2004 Andrew J. Schorr
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License along
17 * with this program; see the file COPYING; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
8b886ca7 19 */
20
a365534f 21#include <zebra.h>
8b886ca7 22#include <thread.h>
23#include <log.h>
52e66296 24#include <network.h>
8b886ca7 25#include <sigevent.h>
a365534f 26#include <lib/version.h>
95c4aff2 27#include "command.h"
87f44e2f 28#include "memory_vty.h"
4f04a76b 29#include "libfrr.h"
95c4aff2 30
6f594023 31#include <getopt.h>
a365534f 32#include <sys/un.h>
33#include <sys/wait.h>
837d16cc 34#include <memory.h>
651415bd 35#include <systemd.h>
8b886ca7 36
9473e340 37#include "watchfrr.h"
95c4aff2 38
8b886ca7 39#ifndef MIN
40#define MIN(X,Y) (((X) <= (Y)) ? (X) : (Y))
41#endif
42
43/* Macros to help randomize timers. */
44#define JITTER(X) ((random() % ((X)+1))-((X)/2))
45#define FUZZY(X) ((X)+JITTER((X)/20))
46
47#define DEFAULT_PERIOD 5
48#define DEFAULT_TIMEOUT 10
49#define DEFAULT_RESTART_TIMEOUT 20
50#define DEFAULT_LOGLEVEL LOG_INFO
51#define DEFAULT_MIN_RESTART 60
52#define DEFAULT_MAX_RESTART 600
9473e340
DS
53#ifdef PATH_WATCHFRR_PID
54#define DEFAULT_PIDFILE PATH_WATCHFRR_PID
6028df52 55#else
9473e340 56#define DEFAULT_PIDFILE STATEDIR "/watchfrr.pid"
6028df52 57#endif
16f6511e 58#ifdef DAEMON_VTY_DIR
59#define VTYDIR DAEMON_VTY_DIR
60#else
61#define VTYDIR STATEDIR
62#endif
8b886ca7 63
64#define PING_TOKEN "PING"
65
55c72803 66/* Needs to be global, referenced somewhere inside libfrr. */
8b886ca7 67struct thread_master *master;
68
f168b713 69static bool watch_only = false;
8b886ca7 70
a6810074
DL
71typedef enum {
72 PHASE_NONE = 0,
73 PHASE_STOPS_PENDING,
74 PHASE_WAITING_DOWN,
75 PHASE_ZEBRA_RESTART_PENDING,
76 PHASE_WAITING_ZEBRA_UP
8b886ca7 77} restart_phase_t;
78
a6810074
DL
79static const char *phase_str[] = {
80 "None",
81 "Stop jobs running",
82 "Waiting for other daemons to come down",
83 "Zebra restart job running",
84 "Waiting for zebra to come up",
85 "Start jobs running",
8b886ca7 86};
87
88#define PHASE_TIMEOUT (3*gs.restart_timeout)
89
a6810074
DL
90struct restart_info {
91 const char *name;
92 const char *what;
93 pid_t pid;
94 struct timeval time;
95 long interval;
96 struct thread *t_kill;
97 int kills;
098e240f 98};
99
a6810074 100static struct global_state {
a6810074
DL
101 restart_phase_t phase;
102 struct thread *t_phase_hanging;
103 const char *vtydir;
104 long period;
105 long timeout;
106 long restart_timeout;
107 long min_restart_interval;
108 long max_restart_interval;
109 int do_ping;
110 struct daemon *daemons;
111 const char *restart_command;
112 const char *start_command;
113 const char *stop_command;
114 struct restart_info restart;
115 int unresponsive_restart;
116 int loglevel;
d62a17ae 117 struct daemon *special; /* points to zebra when doing phased restart */
a6810074
DL
118 int numdaemons;
119 int numpids;
d62a17ae 120 int numdown; /* # of daemons that are not UP or UNRESPONSIVE */
8b886ca7 121} gs = {
d62a17ae 122 .phase = PHASE_NONE,
123 .vtydir = VTYDIR,
124 .period = 1000 * DEFAULT_PERIOD,
125 .timeout = DEFAULT_TIMEOUT,
126 .restart_timeout = DEFAULT_RESTART_TIMEOUT,
127 .loglevel = DEFAULT_LOGLEVEL,
128 .min_restart_interval = DEFAULT_MIN_RESTART,
129 .max_restart_interval = DEFAULT_MAX_RESTART,
130 .do_ping = 1,
131};
a6810074
DL
132
133typedef enum {
134 DAEMON_INIT,
135 DAEMON_DOWN,
136 DAEMON_CONNECTING,
137 DAEMON_UP,
138 DAEMON_UNRESPONSIVE
8b886ca7 139} daemon_state_t;
140
d62a17ae 141#define IS_UP(DMN) \
142 (((DMN)->state == DAEMON_UP) || ((DMN)->state == DAEMON_UNRESPONSIVE))
8b886ca7 143
a6810074 144static const char *state_str[] = {
d62a17ae 145 "Init", "Down", "Connecting", "Up", "Unresponsive",
8b886ca7 146};
147
148struct daemon {
a6810074
DL
149 const char *name;
150 daemon_state_t state;
151 int fd;
152 struct timeval echo_sent;
153 u_int connect_tries;
154 struct thread *t_wakeup;
155 struct thread *t_read;
156 struct thread *t_write;
157 struct daemon *next;
158 struct restart_info restart;
8b886ca7 159};
160
9272302b
DL
161#define OPTION_MINRESTART 2000
162#define OPTION_MAXRESTART 2001
f168b713 163#define OPTION_DRY 2002
9272302b 164
a6810074
DL
165static const struct option longopts[] = {
166 {"daemon", no_argument, NULL, 'd'},
167 {"statedir", required_argument, NULL, 'S'},
168 {"no-echo", no_argument, NULL, 'e'},
169 {"loglevel", required_argument, NULL, 'l'},
170 {"interval", required_argument, NULL, 'i'},
171 {"timeout", required_argument, NULL, 't'},
172 {"restart-timeout", required_argument, NULL, 'T'},
173 {"restart", required_argument, NULL, 'r'},
174 {"start-command", required_argument, NULL, 's'},
175 {"kill-command", required_argument, NULL, 'k'},
a6810074 176 {"unresponsive-restart", no_argument, NULL, 'z'},
f168b713 177 {"dry", no_argument, NULL, OPTION_DRY},
d62a17ae 178 {"min-restart-interval", required_argument, NULL, OPTION_MINRESTART},
179 {"max-restart-interval", required_argument, NULL, OPTION_MAXRESTART},
a6810074
DL
180 {"pid-file", required_argument, NULL, 'p'},
181 {"blank-string", required_argument, NULL, 'b'},
182 {"help", no_argument, NULL, 'h'},
183 {"version", no_argument, NULL, 'v'},
d62a17ae 184 {NULL, 0, NULL, 0}};
8b886ca7 185
186static int try_connect(struct daemon *dmn);
187static int wakeup_send_echo(struct thread *t_wakeup);
188static void try_restart(struct daemon *dmn);
189static void phase_check(void);
190
4f04a76b
DL
191static const char *progname;
192static void printhelp(FILE *target)
8b886ca7 193{
d62a17ae 194 fprintf(target,
195 "Usage : %s [OPTION...] <daemon name> ...\n\n\
9473e340 196Watchdog program to monitor status of frr daemons and try to restart\n\
8b886ca7 197them if they are down or unresponsive. It determines whether a daemon is\n\
198up based on whether it can connect to the daemon's vty unix stream socket.\n\
199It then repeatedly sends echo commands over that socket to determine whether\n\
200the daemon is responsive. If the daemon crashes, we will receive an EOF\n\
201on the socket connection and know immediately that the daemon is down.\n\n\
202The daemons to be monitored should be listed on the command line.\n\n\
8b886ca7 203In order to avoid attempting to restart the daemons in a fast loop,\n\
204the -m and -M options allow you to control the minimum delay between\n\
205restart commands. The minimum restart delay is recalculated each time\n\
206a restart is attempted: if the time since the last restart attempt exceeds\n\
207twice the -M value, then the restart delay is set to the -m value.\n\
d62a17ae 208Otherwise, the interval is doubled (but capped at the -M value).\n\n",
f168b713 209 progname);
e757c940 210
d62a17ae 211 fprintf(target,
212 "Options:\n\
8b886ca7 213-d, --daemon Run in daemon mode. In this mode, error messages are sent\n\
214 to syslog instead of stdout.\n\
215-S, --statedir Set the vty socket directory (default is %s)\n\
216-e, --no-echo Do not ping the daemons to test responsiveness (this\n\
217 option is necessary if the daemons do not support the\n\
218 echo command)\n\
219-l, --loglevel Set the logging level (default is %d).\n\
220 The value should range from %d (LOG_EMERG) to %d (LOG_DEBUG),\n\
221 but it can be set higher than %d if extra-verbose debugging\n\
222 messages are desired.\n\
9272302b 223 --min-restart-interval\n\
8b886ca7 224 Set the minimum seconds to wait between invocations of daemon\n\
225 restart commands (default is %d).\n\
9272302b 226 --max-restart-interval\n\
8b886ca7 227 Set the maximum seconds to wait between invocations of daemon\n\
228 restart commands (default is %d).\n\
229-i, --interval Set the status polling interval in seconds (default is %d)\n\
230-t, --timeout Set the unresponsiveness timeout in seconds (default is %d)\n\
231-T, --restart-timeout\n\
232 Set the restart (kill) timeout in seconds (default is %d).\n\
233 If any background jobs are still running after this much\n\
234 time has elapsed, they will be killed.\n\
235-r, --restart Supply a Bourne shell command to use to restart a single\n\
236 daemon. The command string should include '%%s' where the\n\
237 name of the daemon should be substituted.\n\
8b886ca7 238-s, --start-command\n\
239 Supply a Bourne shell to command to use to start a single\n\
240 daemon. The command string should include '%%s' where the\n\
241 name of the daemon should be substituted.\n\
242-k, --kill-command\n\
243 Supply a Bourne shell to command to use to stop a single\n\
244 daemon. The command string should include '%%s' where the\n\
245 name of the daemon should be substituted.\n\
8b886ca7 246-z, --unresponsive-restart\n\
247 When a daemon is unresponsive, treat it as being down for\n\
248 restart purposes.\n\
f168b713 249 --dry Do not start or restart anything, just log.\n\
8b886ca7 250-p, --pid-file Set process identifier file name\n\
251 (default is %s).\n\
c8b40f86 252-b, --blank-string\n\
253 When the supplied argument string is found in any of the\n\
f168b713 254 various shell command arguments (-r, -s, or -k), replace\n\
c8b40f86 255 it with a space. This is an ugly hack to circumvent problems\n\
256 passing command-line arguments with embedded spaces.\n\
8b886ca7 257-v, --version Print program version\n\
d62a17ae 258-h, --help Display this help and exit\n",
259 VTYDIR, DEFAULT_LOGLEVEL, LOG_EMERG, LOG_DEBUG, LOG_DEBUG,
260 DEFAULT_MIN_RESTART, DEFAULT_MAX_RESTART, DEFAULT_PERIOD,
261 DEFAULT_TIMEOUT, DEFAULT_RESTART_TIMEOUT, DEFAULT_PIDFILE);
8b886ca7 262}
263
a6810074 264static pid_t run_background(char *shell_cmd)
8b886ca7 265{
a6810074
DL
266 pid_t child;
267
268 switch (child = fork()) {
269 case -1:
d62a17ae 270 zlog_err("fork failed, cannot run command [%s]: %s", shell_cmd,
271 safe_strerror(errno));
a6810074
DL
272 return -1;
273 case 0:
274 /* Child process. */
d62a17ae 275 /* Use separate process group so child processes can be killed
276 * easily. */
a6810074
DL
277 if (setpgid(0, 0) < 0)
278 zlog_warn("warning: setpgid(0,0) failed: %s",
279 safe_strerror(errno));
280 {
281 char shell[] = "sh";
282 char dashc[] = "-c";
d62a17ae 283 char *const argv[4] = {shell, dashc, shell_cmd, NULL};
a6810074 284 execv("/bin/sh", argv);
d62a17ae 285 zlog_err("execv(/bin/sh -c '%s') failed: %s", shell_cmd,
286 safe_strerror(errno));
a6810074
DL
287 _exit(127);
288 }
289 default:
290 /* Parent process: we will reap the child later. */
291 zlog_err("Forked background command [pid %d]: %s", (int)child,
292 shell_cmd);
293 return child;
294 }
8b886ca7 295}
296
a6810074
DL
297static struct timeval *time_elapsed(struct timeval *result,
298 const struct timeval *start_time)
8b886ca7 299{
a6810074
DL
300 gettimeofday(result, NULL);
301 result->tv_sec -= start_time->tv_sec;
302 result->tv_usec -= start_time->tv_usec;
303 while (result->tv_usec < 0) {
304 result->tv_usec += 1000000L;
305 result->tv_sec--;
306 }
307 return result;
8b886ca7 308}
309
a6810074 310static int restart_kill(struct thread *t_kill)
8b886ca7 311{
a6810074
DL
312 struct restart_info *restart = THREAD_ARG(t_kill);
313 struct timeval delay;
314
315 time_elapsed(&delay, &restart->time);
d62a17ae 316 zlog_warn(
317 "Warning: %s %s child process %d still running after "
318 "%ld seconds, sending signal %d",
319 restart->what, restart->name, (int)restart->pid,
320 (long)delay.tv_sec, (restart->kills ? SIGKILL : SIGTERM));
a6810074
DL
321 kill(-restart->pid, (restart->kills ? SIGKILL : SIGTERM));
322 restart->kills++;
66e78ae6
QY
323 restart->t_kill = NULL;
324 thread_add_timer(master, restart_kill, restart, gs.restart_timeout,
325 &restart->t_kill);
a6810074 326 return 0;
8b886ca7 327}
328
a6810074 329static struct restart_info *find_child(pid_t child)
8b886ca7 330{
f168b713
DL
331 struct daemon *dmn;
332 for (dmn = gs.daemons; dmn; dmn = dmn->next) {
333 if (dmn->restart.pid == child)
334 return &dmn->restart;
a6810074
DL
335 }
336 return NULL;
8b886ca7 337}
338
a6810074 339static void sigchild(void)
8b886ca7 340{
a6810074
DL
341 pid_t child;
342 int status;
343 const char *name;
344 const char *what;
345 struct restart_info *restart;
346
347 switch (child = waitpid(-1, &status, WNOHANG)) {
348 case -1:
349 zlog_err("waitpid failed: %s", safe_strerror(errno));
350 return;
351 case 0:
352 zlog_warn("SIGCHLD received, but waitpid did not reap a child");
353 return;
354 }
355
356 if (child == integrated_write_pid) {
357 integrated_write_sigchld(status);
358 return;
359 }
360
361 if ((restart = find_child(child)) != NULL) {
362 name = restart->name;
363 what = restart->what;
364 restart->pid = 0;
365 gs.numpids--;
366 thread_cancel(restart->t_kill);
367 restart->t_kill = NULL;
d62a17ae 368 /* Update restart time to reflect the time the command
369 * completed. */
a6810074
DL
370 gettimeofday(&restart->time, NULL);
371 } else {
d62a17ae 372 zlog_err(
373 "waitpid returned status for an unknown child process %d",
374 (int)child);
a6810074
DL
375 name = "(unknown)";
376 what = "background";
377 }
378 if (WIFSTOPPED(status))
d62a17ae 379 zlog_warn("warning: %s %s process %d is stopped", what, name,
380 (int)child);
a6810074 381 else if (WIFSIGNALED(status))
d62a17ae 382 zlog_warn("%s %s process %d terminated due to signal %d", what,
383 name, (int)child, WTERMSIG(status));
a6810074
DL
384 else if (WIFEXITED(status)) {
385 if (WEXITSTATUS(status) != 0)
d62a17ae 386 zlog_warn(
387 "%s %s process %d exited with non-zero status %d",
388 what, name, (int)child, WEXITSTATUS(status));
a6810074
DL
389 else
390 zlog_debug("%s %s process %d exited normally", what,
391 name, (int)child);
392 } else
393 zlog_err("cannot interpret %s %s process %d wait status 0x%x",
394 what, name, (int)child, status);
395 phase_check();
8b886ca7 396}
397
d62a17ae 398static int run_job(struct restart_info *restart, const char *cmdtype,
399 const char *command, int force, int update_interval)
8b886ca7 400{
a6810074
DL
401 struct timeval delay;
402
403 if (gs.loglevel > LOG_DEBUG + 1)
404 zlog_debug("attempting to %s %s", cmdtype, restart->name);
405
406 if (restart->pid) {
407 if (gs.loglevel > LOG_DEBUG + 1)
d62a17ae 408 zlog_debug(
409 "cannot %s %s, previous pid %d still running",
410 cmdtype, restart->name, (int)restart->pid);
a6810074
DL
411 return -1;
412 }
413
d62a17ae 414 /* Note: time_elapsed test must come before the force test, since we
415 need
a6810074
DL
416 to make sure that delay is initialized for use below in updating the
417 restart interval. */
418 if ((time_elapsed(&delay, &restart->time)->tv_sec < restart->interval)
419 && !force) {
420 if (gs.loglevel > LOG_DEBUG + 1)
d62a17ae 421 zlog_debug(
422 "postponing %s %s: "
423 "elapsed time %ld < retry interval %ld",
424 cmdtype, restart->name, (long)delay.tv_sec,
425 restart->interval);
a6810074
DL
426 return -1;
427 }
428
429 gettimeofday(&restart->time, NULL);
430 restart->kills = 0;
431 {
432 char cmd[strlen(command) + strlen(restart->name) + 1];
433 snprintf(cmd, sizeof(cmd), command, restart->name);
434 if ((restart->pid = run_background(cmd)) > 0) {
66e78ae6 435 restart->t_kill = NULL;
d62a17ae 436 thread_add_timer(master, restart_kill, restart,
437 gs.restart_timeout, &restart->t_kill);
a6810074
DL
438 restart->what = cmdtype;
439 gs.numpids++;
440 } else
441 restart->pid = 0;
442 }
443
444 /* Calculate the new restart interval. */
445 if (update_interval) {
446 if (delay.tv_sec > 2 * gs.max_restart_interval)
447 restart->interval = gs.min_restart_interval;
448 else if ((restart->interval *= 2) > gs.max_restart_interval)
449 restart->interval = gs.max_restart_interval;
450 if (gs.loglevel > LOG_DEBUG + 1)
451 zlog_debug("restart %s interval is now %ld",
452 restart->name, restart->interval);
453 }
454 return restart->pid;
8b886ca7 455}
456
d62a17ae 457#define SET_READ_HANDLER(DMN) \
458 do { \
459 (DMN)->t_read = NULL; \
460 thread_add_read(master, handle_read, (DMN), (DMN)->fd, \
461 &(DMN)->t_read); \
462 } while (0);
463
464#define SET_WAKEUP_DOWN(DMN) \
465 do { \
466 (DMN)->t_wakeup = NULL; \
467 thread_add_timer_msec(master, wakeup_down, (DMN), \
468 FUZZY(gs.period), &(DMN)->t_wakeup); \
469 } while (0);
470
471#define SET_WAKEUP_UNRESPONSIVE(DMN) \
472 do { \
473 (DMN)->t_wakeup = NULL; \
474 thread_add_timer_msec(master, wakeup_unresponsive, (DMN), \
475 FUZZY(gs.period), &(DMN)->t_wakeup); \
476 } while (0);
477
478#define SET_WAKEUP_ECHO(DMN) \
479 do { \
480 (DMN)->t_wakeup = NULL; \
481 thread_add_timer_msec(master, wakeup_send_echo, (DMN), \
482 FUZZY(gs.period), &(DMN)->t_wakeup); \
483 } while (0);
8b886ca7 484
a6810074 485static int wakeup_down(struct thread *t_wakeup)
8b886ca7 486{
a6810074
DL
487 struct daemon *dmn = THREAD_ARG(t_wakeup);
488
489 dmn->t_wakeup = NULL;
490 if (try_connect(dmn) < 0)
491 SET_WAKEUP_DOWN(dmn);
492 if ((dmn->connect_tries > 1) && (dmn->state != DAEMON_UP))
493 try_restart(dmn);
494 return 0;
8b886ca7 495}
496
a6810074 497static int wakeup_init(struct thread *t_wakeup)
8b886ca7 498{
a6810074
DL
499 struct daemon *dmn = THREAD_ARG(t_wakeup);
500
501 dmn->t_wakeup = NULL;
502 if (try_connect(dmn) < 0) {
503 SET_WAKEUP_DOWN(dmn);
504 zlog_err("%s state -> down : initial connection attempt failed",
505 dmn->name);
506 dmn->state = DAEMON_DOWN;
507 }
508 return 0;
8b886ca7 509}
510
a6810074 511static void daemon_down(struct daemon *dmn, const char *why)
8b886ca7 512{
a6810074
DL
513 if (IS_UP(dmn) || (dmn->state == DAEMON_INIT))
514 zlog_err("%s state -> down : %s", dmn->name, why);
515 else if (gs.loglevel > LOG_DEBUG)
516 zlog_debug("%s still down : %s", dmn->name, why);
517 if (IS_UP(dmn))
518 gs.numdown++;
519 dmn->state = DAEMON_DOWN;
520 if (dmn->fd >= 0) {
521 close(dmn->fd);
522 dmn->fd = -1;
523 }
524 THREAD_OFF(dmn->t_read);
525 THREAD_OFF(dmn->t_write);
526 THREAD_OFF(dmn->t_wakeup);
527 if (try_connect(dmn) < 0)
528 SET_WAKEUP_DOWN(dmn);
529 phase_check();
8b886ca7 530}
531
a6810074 532static int handle_read(struct thread *t_read)
8b886ca7 533{
a6810074
DL
534 struct daemon *dmn = THREAD_ARG(t_read);
535 static const char resp[sizeof(PING_TOKEN) + 4] = PING_TOKEN "\n";
536 char buf[sizeof(resp) + 100];
537 ssize_t rc;
538 struct timeval delay;
539
540 dmn->t_read = NULL;
541 if ((rc = read(dmn->fd, buf, sizeof(buf))) < 0) {
542 char why[100];
543
544 if (ERRNO_IO_RETRY(errno)) {
545 /* Pretend it never happened. */
546 SET_READ_HANDLER(dmn);
547 return 0;
548 }
549 snprintf(why, sizeof(why), "unexpected read error: %s",
550 safe_strerror(errno));
551 daemon_down(dmn, why);
552 return 0;
8b886ca7 553 }
a6810074
DL
554 if (rc == 0) {
555 daemon_down(dmn, "read returned EOF");
556 return 0;
557 }
558 if (!dmn->echo_sent.tv_sec) {
559 char why[sizeof(buf) + 100];
560 snprintf(why, sizeof(why),
561 "unexpected read returns %d bytes: %.*s", (int)rc,
562 (int)rc, buf);
563 daemon_down(dmn, why);
564 return 0;
8b886ca7 565 }
a6810074
DL
566
567 /* We are expecting an echo response: is there any chance that the
568 response would not be returned entirely in the first read? That
569 seems inconceivable... */
570 if ((rc != sizeof(resp)) || memcmp(buf, resp, sizeof(resp))) {
571 char why[100 + sizeof(buf)];
572 snprintf(why, sizeof(why),
573 "read returned bad echo response of %d bytes "
d62a17ae 574 "(expecting %u): %.*s",
575 (int)rc, (u_int)sizeof(resp), (int)rc, buf);
a6810074
DL
576 daemon_down(dmn, why);
577 return 0;
578 }
579
580 time_elapsed(&delay, &dmn->echo_sent);
581 dmn->echo_sent.tv_sec = 0;
582 if (dmn->state == DAEMON_UNRESPONSIVE) {
583 if (delay.tv_sec < gs.timeout) {
584 dmn->state = DAEMON_UP;
d62a17ae 585 zlog_warn(
586 "%s state -> up : echo response received after %ld.%06ld "
587 "seconds",
588 dmn->name, (long)delay.tv_sec,
589 (long)delay.tv_usec);
a6810074 590 } else
d62a17ae 591 zlog_warn(
592 "%s: slow echo response finally received after %ld.%06ld "
593 "seconds",
594 dmn->name, (long)delay.tv_sec,
595 (long)delay.tv_usec);
a6810074
DL
596 } else if (gs.loglevel > LOG_DEBUG + 1)
597 zlog_debug("%s: echo response received after %ld.%06ld seconds",
598 dmn->name, (long)delay.tv_sec, (long)delay.tv_usec);
599
600 SET_READ_HANDLER(dmn);
601 if (dmn->t_wakeup)
602 thread_cancel(dmn->t_wakeup);
603 SET_WAKEUP_ECHO(dmn);
604
605 return 0;
8b886ca7 606}
607
207e0d7a
DS
608/*
609 * Wait till we notice that all daemons are ready before
610 * we send we are ready to systemd
611 */
a6810074 612static void daemon_send_ready(void)
207e0d7a 613{
a6810074
DL
614 static int sent = 0;
615 if (!sent && gs.numdown == 0) {
a6810074 616 FILE *fp;
207e0d7a 617
a6810074
DL
618 fp = fopen(DAEMON_VTY_DIR "/watchfrr.started", "w");
619 fclose(fp);
d62a17ae 620 zlog_notice(
621 "Watchfrr: Notifying Systemd we are up and running");
a6810074
DL
622 systemd_send_started(master, 0);
623 sent = 1;
624 }
207e0d7a
DS
625}
626
a6810074 627static void daemon_up(struct daemon *dmn, const char *why)
8b886ca7 628{
a6810074
DL
629 dmn->state = DAEMON_UP;
630 gs.numdown--;
631 dmn->connect_tries = 0;
632 zlog_notice("%s state -> up : %s", dmn->name, why);
633 daemon_send_ready();
634 if (gs.do_ping)
635 SET_WAKEUP_ECHO(dmn);
636 phase_check();
8b886ca7 637}
638
a6810074 639static int check_connect(struct thread *t_write)
8b886ca7 640{
a6810074
DL
641 struct daemon *dmn = THREAD_ARG(t_write);
642 int sockerr;
643 socklen_t reslen = sizeof(sockerr);
644
645 dmn->t_write = NULL;
646 if (getsockopt(dmn->fd, SOL_SOCKET, SO_ERROR, (char *)&sockerr, &reslen)
647 < 0) {
648 zlog_warn("%s: check_connect: getsockopt failed: %s", dmn->name,
649 safe_strerror(errno));
650 daemon_down(dmn,
651 "getsockopt failed checking connection success");
652 return 0;
653 }
654 if ((reslen == sizeof(sockerr)) && sockerr) {
655 char why[100];
d62a17ae 656 snprintf(
657 why, sizeof(why),
658 "getsockopt reports that connection attempt failed: %s",
659 safe_strerror(sockerr));
a6810074
DL
660 daemon_down(dmn, why);
661 return 0;
662 }
663
664 daemon_up(dmn, "delayed connect succeeded");
665 return 0;
8b886ca7 666}
667
a6810074 668static int wakeup_connect_hanging(struct thread *t_wakeup)
8b886ca7 669{
a6810074
DL
670 struct daemon *dmn = THREAD_ARG(t_wakeup);
671 char why[100];
672
673 dmn->t_wakeup = NULL;
674 snprintf(why, sizeof(why),
675 "connection attempt timed out after %ld seconds", gs.timeout);
676 daemon_down(dmn, why);
677 return 0;
8b886ca7 678}
679
680/* Making connection to protocol daemon. */
a6810074 681static int try_connect(struct daemon *dmn)
8b886ca7 682{
a6810074
DL
683 int sock;
684 struct sockaddr_un addr;
685 socklen_t len;
686
687 if (gs.loglevel > LOG_DEBUG + 1)
688 zlog_debug("%s: attempting to connect", dmn->name);
689 dmn->connect_tries++;
690
691 memset(&addr, 0, sizeof(struct sockaddr_un));
692 addr.sun_family = AF_UNIX;
d62a17ae 693 snprintf(addr.sun_path, sizeof(addr.sun_path), "%s/%s.vty", gs.vtydir,
694 dmn->name);
6f0e3f6e 695#ifdef HAVE_STRUCT_SOCKADDR_UN_SUN_LEN
a6810074 696 len = addr.sun_len = SUN_LEN(&addr);
8b886ca7 697#else
a6810074 698 len = sizeof(addr.sun_family) + strlen(addr.sun_path);
d62a17ae 699#endif /* HAVE_STRUCT_SOCKADDR_UN_SUN_LEN */
a6810074
DL
700
701 /* Quick check to see if we might succeed before we go to the trouble
702 of creating a socket. */
703 if (access(addr.sun_path, W_OK) < 0) {
704 if (errno != ENOENT)
705 zlog_err("%s: access to socket %s denied: %s",
706 dmn->name, addr.sun_path,
707 safe_strerror(errno));
708 return -1;
709 }
710
711 if ((sock = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) {
d62a17ae 712 zlog_err("%s(%s): cannot make socket: %s", __func__,
713 addr.sun_path, safe_strerror(errno));
a6810074
DL
714 return -1;
715 }
716
717 if (set_nonblocking(sock) < 0 || set_cloexec(sock) < 0) {
d62a17ae 718 zlog_err("%s(%s): set_nonblocking/cloexec(%d) failed", __func__,
719 addr.sun_path, sock);
a6810074
DL
720 close(sock);
721 return -1;
8b886ca7 722 }
a6810074
DL
723
724 if (connect(sock, (struct sockaddr *)&addr, len) < 0) {
725 if ((errno != EINPROGRESS) && (errno != EWOULDBLOCK)) {
726 if (gs.loglevel > LOG_DEBUG)
727 zlog_debug("%s(%s): connect failed: %s",
728 __func__, addr.sun_path,
729 safe_strerror(errno));
730 close(sock);
731 return -1;
732 }
733 if (gs.loglevel > LOG_DEBUG)
734 zlog_debug("%s: connection in progress", dmn->name);
735 dmn->state = DAEMON_CONNECTING;
736 dmn->fd = sock;
66e78ae6
QY
737 dmn->t_write = NULL;
738 thread_add_write(master, check_connect, dmn, dmn->fd,
d62a17ae 739 &dmn->t_write);
740 dmn->t_wakeup = NULL;
741 thread_add_timer(master, wakeup_connect_hanging, dmn,
742 gs.timeout, &dmn->t_wakeup);
a6810074
DL
743 SET_READ_HANDLER(dmn);
744 return 0;
745 }
746
747 dmn->fd = sock;
748 SET_READ_HANDLER(dmn);
749 daemon_up(dmn, "connect succeeded");
750 return 1;
8b886ca7 751}
752
a6810074 753static int phase_hanging(struct thread *t_hanging)
8b886ca7 754{
a6810074
DL
755 gs.t_phase_hanging = NULL;
756 zlog_err("Phase [%s] hanging for %ld seconds, aborting phased restart",
757 phase_str[gs.phase], PHASE_TIMEOUT);
758 gs.phase = PHASE_NONE;
759 return 0;
8b886ca7 760}
761
a6810074 762static void set_phase(restart_phase_t new_phase)
8b886ca7 763{
a6810074
DL
764 gs.phase = new_phase;
765 if (gs.t_phase_hanging)
766 thread_cancel(gs.t_phase_hanging);
66e78ae6
QY
767 gs.t_phase_hanging = NULL;
768 thread_add_timer(master, phase_hanging, NULL, PHASE_TIMEOUT,
769 &gs.t_phase_hanging);
8b886ca7 770}
771
a6810074 772static void phase_check(void)
8b886ca7 773{
a6810074
DL
774 switch (gs.phase) {
775 case PHASE_NONE:
776 break;
777 case PHASE_STOPS_PENDING:
778 if (gs.numpids)
779 break;
d62a17ae 780 zlog_info(
781 "Phased restart: all routing daemon stop jobs have completed.");
a6810074
DL
782 set_phase(PHASE_WAITING_DOWN);
783
d62a17ae 784 /*FALLTHRU*/
a6810074
DL
785 case PHASE_WAITING_DOWN:
786 if (gs.numdown + IS_UP(gs.special) < gs.numdaemons)
787 break;
788 zlog_info("Phased restart: all routing daemons now down.");
789 run_job(&gs.special->restart, "restart", gs.restart_command, 1,
790 1);
791 set_phase(PHASE_ZEBRA_RESTART_PENDING);
792
d62a17ae 793 /*FALLTHRU*/
a6810074
DL
794 case PHASE_ZEBRA_RESTART_PENDING:
795 if (gs.special->restart.pid)
796 break;
797 zlog_info("Phased restart: %s restart job completed.",
798 gs.special->name);
799 set_phase(PHASE_WAITING_ZEBRA_UP);
800
d62a17ae 801 /*FALLTHRU*/
a6810074
DL
802 case PHASE_WAITING_ZEBRA_UP:
803 if (!IS_UP(gs.special))
804 break;
805 zlog_info("Phased restart: %s is now up.", gs.special->name);
806 {
807 struct daemon *dmn;
808 for (dmn = gs.daemons; dmn; dmn = dmn->next) {
809 if (dmn != gs.special)
810 run_job(&dmn->restart, "start",
811 gs.start_command, 1, 0);
812 }
813 }
814 gs.phase = PHASE_NONE;
815 THREAD_OFF(gs.t_phase_hanging);
816 zlog_notice("Phased global restart has completed.");
817 break;
818 }
8b886ca7 819}
820
a6810074 821static void try_restart(struct daemon *dmn)
8b886ca7 822{
f168b713 823 if (watch_only)
a6810074 824 return;
a6810074 825
f168b713
DL
826 if (dmn != gs.special) {
827 if ((gs.special->state == DAEMON_UP)
828 && (gs.phase == PHASE_NONE))
829 run_job(&dmn->restart, "restart", gs.restart_command, 0,
830 1);
831 else
832 zlog_debug(
833 "%s: postponing restart attempt because master %s daemon "
834 "not up [%s], or phased restart in progress",
835 dmn->name, gs.special->name,
836 state_str[gs.special->state]);
837 return;
838 }
839
840 if ((gs.phase != PHASE_NONE) || gs.numpids) {
841 if (gs.loglevel > LOG_DEBUG + 1)
842 zlog_debug(
843 "postponing phased global restart: restart already in "
844 "progress [%s], or outstanding child processes [%d]",
845 phase_str[gs.phase], gs.numpids);
846 return;
847 }
848 /* Is it too soon for a restart? */
849 {
850 struct timeval delay;
851 if (time_elapsed(&delay, &gs.special->restart.time)->tv_sec
852 < gs.special->restart.interval) {
a6810074 853 if (gs.loglevel > LOG_DEBUG + 1)
d62a17ae 854 zlog_debug(
f168b713
DL
855 "postponing phased global restart: "
856 "elapsed time %ld < retry interval %ld",
857 (long)delay.tv_sec,
858 gs.special->restart.interval);
859 return;
a6810074 860 }
8b886ca7 861 }
f168b713 862 run_job(&gs.restart, "restart", gs.restart_command, 0, 1);
8b886ca7 863}
864
a6810074 865static int wakeup_unresponsive(struct thread *t_wakeup)
8b886ca7 866{
a6810074
DL
867 struct daemon *dmn = THREAD_ARG(t_wakeup);
868
869 dmn->t_wakeup = NULL;
870 if (dmn->state != DAEMON_UNRESPONSIVE)
d62a17ae 871 zlog_err(
872 "%s: no longer unresponsive (now %s), "
873 "wakeup should have been cancelled!",
874 dmn->name, state_str[dmn->state]);
a6810074
DL
875 else {
876 SET_WAKEUP_UNRESPONSIVE(dmn);
877 try_restart(dmn);
878 }
879 return 0;
8b886ca7 880}
881
a6810074 882static int wakeup_no_answer(struct thread *t_wakeup)
8b886ca7 883{
a6810074
DL
884 struct daemon *dmn = THREAD_ARG(t_wakeup);
885
886 dmn->t_wakeup = NULL;
887 dmn->state = DAEMON_UNRESPONSIVE;
d62a17ae 888 zlog_err(
889 "%s state -> unresponsive : no response yet to ping "
890 "sent %ld seconds ago",
891 dmn->name, gs.timeout);
a6810074
DL
892 if (gs.unresponsive_restart) {
893 SET_WAKEUP_UNRESPONSIVE(dmn);
894 try_restart(dmn);
895 }
896 return 0;
8b886ca7 897}
898
a6810074 899static int wakeup_send_echo(struct thread *t_wakeup)
8b886ca7 900{
a6810074
DL
901 static const char echocmd[] = "echo " PING_TOKEN;
902 ssize_t rc;
903 struct daemon *dmn = THREAD_ARG(t_wakeup);
904
905 dmn->t_wakeup = NULL;
d62a17ae 906 if (((rc = write(dmn->fd, echocmd, sizeof(echocmd))) < 0)
907 || ((size_t)rc != sizeof(echocmd))) {
a6810074
DL
908 char why[100 + sizeof(echocmd)];
909 snprintf(why, sizeof(why),
910 "write '%s' returned %d instead of %u", echocmd,
d62a17ae 911 (int)rc, (u_int)sizeof(echocmd));
a6810074
DL
912 daemon_down(dmn, why);
913 } else {
914 gettimeofday(&dmn->echo_sent, NULL);
66e78ae6
QY
915 dmn->t_wakeup = NULL;
916 thread_add_timer(master, wakeup_no_answer, dmn, gs.timeout,
917 &dmn->t_wakeup);
a6810074
DL
918 }
919 return 0;
8b886ca7 920}
921
a6810074 922static void sigint(void)
8b886ca7 923{
a6810074
DL
924 zlog_notice("Terminating on signal");
925 systemd_send_stopping();
926 exit(0);
8b886ca7 927}
928
a6810074 929static int valid_command(const char *cmd)
8b886ca7 930{
a6810074 931 char *p;
8b886ca7 932
a6810074 933 return ((p = strchr(cmd, '%')) != NULL) && (*(p + 1) == 's')
d62a17ae 934 && !strchr(p + 1, '%');
8b886ca7 935}
936
c8b40f86 937/* This is an ugly hack to circumvent problems with passing command-line
938 arguments that contain spaces. The fix is to use a configuration file. */
a6810074 939static char *translate_blanks(const char *cmd, const char *blankstr)
c8b40f86 940{
a6810074
DL
941 char *res;
942 char *p;
943 size_t bslen = strlen(blankstr);
944
945 if (!(res = strdup(cmd))) {
946 perror("strdup");
947 exit(1);
948 }
949 while ((p = strstr(res, blankstr)) != NULL) {
950 *p = ' ';
951 if (bslen != 1)
952 memmove(p + 1, p + bslen, strlen(p + bslen) + 1);
953 }
954 return res;
c8b40f86 955}
956
a6810074 957struct zebra_privs_t watchfrr_privs = {
95c4aff2 958#ifdef VTY_GROUP
a6810074 959 .vty_group = VTY_GROUP,
95c4aff2
DL
960#endif
961};
962
4f04a76b
DL
963static struct quagga_signal_t watchfrr_signals[] = {
964 {
965 .signal = SIGINT,
966 .handler = sigint,
967 },
968 {
969 .signal = SIGTERM,
970 .handler = sigint,
971 },
972 {
973 .signal = SIGCHLD,
974 .handler = sigchild,
975 },
976};
977
978FRR_DAEMON_INFO(watchfrr, WATCHFRR,
d62a17ae 979 .flags = FRR_NO_PRIVSEP | FRR_NO_TCPVTY | FRR_LIMITED_CLI
980 | FRR_NO_CFG_PID_DRY | FRR_NO_ZCLIENT,
4f04a76b 981
d62a17ae 982 .printhelp = printhelp,
983 .copyright = "Copyright 2004 Andrew J. Schorr",
4f04a76b 984
d62a17ae 985 .signals = watchfrr_signals,
986 .n_signals = array_size(watchfrr_signals),
4f04a76b 987
d62a17ae 988 .privs = &watchfrr_privs, )
4f04a76b 989
a6810074 990int main(int argc, char **argv)
8b886ca7 991{
a6810074 992 int opt;
a6810074
DL
993 const char *pidfile = DEFAULT_PIDFILE;
994 const char *special = "zebra";
995 const char *blankstr = NULL;
a6810074 996
4f04a76b
DL
997 frr_preinit(&watchfrr_di, argc, argv);
998 progname = watchfrr_di.progname;
999
f168b713 1000 frr_opt_add("b:dek:l:i:p:r:S:s:t:T:z", longopts, "");
a6810074
DL
1001
1002 gs.restart.name = "all";
4f04a76b 1003 while ((opt = frr_getopt(argc, argv, NULL)) != EOF) {
a6810074
DL
1004 switch (opt) {
1005 case 0:
1006 break;
a6810074
DL
1007 case 'b':
1008 blankstr = optarg;
1009 break;
f168b713
DL
1010 case OPTION_DRY:
1011 watch_only = true;
1012 break;
a6810074
DL
1013 case 'e':
1014 gs.do_ping = 0;
1015 break;
1016 case 'k':
1017 if (!valid_command(optarg)) {
1018 fprintf(stderr,
1019 "Invalid kill command, must contain '%%s': %s\n",
1020 optarg);
4f04a76b 1021 frr_help_exit(1);
a6810074
DL
1022 }
1023 gs.stop_command = optarg;
1024 break;
d62a17ae 1025 case 'l': {
1026 char garbage[3];
1027 if ((sscanf(optarg, "%d%1s", &gs.loglevel, garbage)
1028 != 1)
1029 || (gs.loglevel < LOG_EMERG)) {
1030 fprintf(stderr,
1031 "Invalid loglevel argument: %s\n",
1032 optarg);
1033 frr_help_exit(1);
a6810074 1034 }
d62a17ae 1035 } break;
1036 case OPTION_MINRESTART: {
1037 char garbage[3];
1038 if ((sscanf(optarg, "%ld%1s", &gs.min_restart_interval,
1039 garbage)
1040 != 1)
1041 || (gs.min_restart_interval < 0)) {
1042 fprintf(stderr,
1043 "Invalid min_restart_interval argument: %s\n",
1044 optarg);
1045 frr_help_exit(1);
a6810074 1046 }
d62a17ae 1047 } break;
1048 case OPTION_MAXRESTART: {
1049 char garbage[3];
1050 if ((sscanf(optarg, "%ld%1s", &gs.max_restart_interval,
1051 garbage)
1052 != 1)
1053 || (gs.max_restart_interval < 0)) {
1054 fprintf(stderr,
1055 "Invalid max_restart_interval argument: %s\n",
1056 optarg);
1057 frr_help_exit(1);
a6810074 1058 }
d62a17ae 1059 } break;
1060 case 'i': {
1061 char garbage[3];
1062 int period;
1063 if ((sscanf(optarg, "%d%1s", &period, garbage) != 1)
1064 || (gs.period < 1)) {
1065 fprintf(stderr,
1066 "Invalid interval argument: %s\n",
1067 optarg);
1068 frr_help_exit(1);
a6810074 1069 }
d62a17ae 1070 gs.period = 1000 * period;
1071 } break;
a6810074
DL
1072 case 'p':
1073 pidfile = optarg;
1074 break;
1075 case 'r':
a6810074
DL
1076 if (!valid_command(optarg)) {
1077 fprintf(stderr,
1078 "Invalid restart command, must contain '%%s': %s\n",
1079 optarg);
4f04a76b 1080 frr_help_exit(1);
a6810074
DL
1081 }
1082 gs.restart_command = optarg;
a6810074
DL
1083 break;
1084 case 's':
1085 if (!valid_command(optarg)) {
1086 fprintf(stderr,
1087 "Invalid start command, must contain '%%s': %s\n",
1088 optarg);
4f04a76b 1089 frr_help_exit(1);
a6810074
DL
1090 }
1091 gs.start_command = optarg;
1092 break;
1093 case 'S':
1094 gs.vtydir = optarg;
1095 break;
d62a17ae 1096 case 't': {
1097 char garbage[3];
1098 if ((sscanf(optarg, "%ld%1s", &gs.timeout, garbage)
1099 != 1)
1100 || (gs.timeout < 1)) {
1101 fprintf(stderr,
1102 "Invalid timeout argument: %s\n",
1103 optarg);
1104 frr_help_exit(1);
a6810074 1105 }
d62a17ae 1106 } break;
1107 case 'T': {
1108 char garbage[3];
1109 if ((sscanf(optarg, "%ld%1s", &gs.restart_timeout,
1110 garbage)
1111 != 1)
1112 || (gs.restart_timeout < 1)) {
1113 fprintf(stderr,
1114 "Invalid restart timeout argument: %s\n",
1115 optarg);
1116 frr_help_exit(1);
a6810074 1117 }
d62a17ae 1118 } break;
a6810074
DL
1119 case 'z':
1120 gs.unresponsive_restart = 1;
1121 break;
a6810074
DL
1122 default:
1123 fputs("Invalid option.\n", stderr);
4f04a76b 1124 frr_help_exit(1);
a6810074 1125 }
8b886ca7 1126 }
a6810074 1127
f168b713
DL
1128 if (watch_only && (gs.unresponsive_restart || gs.start_command
1129 || gs.stop_command || gs.restart_command)) {
1130 fputs("Options -z/-r/-s/-k make no sense combined with -D.\n",
a6810074 1131 stderr);
4f04a76b 1132 frr_help_exit(1);
8b886ca7 1133 }
f168b713
DL
1134 if (!watch_only
1135 && (!gs.restart_command || !gs.start_command || !gs.stop_command)) {
1136 fprintf(stderr,
1137 "Options -s (start), -k (kill), and -r (restart) are required.\n");
1138 frr_help_exit(1);
8b886ca7 1139 }
8b886ca7 1140
a6810074
DL
1141 if (blankstr) {
1142 if (gs.restart_command)
1143 gs.restart_command =
d62a17ae 1144 translate_blanks(gs.restart_command, blankstr);
a6810074
DL
1145 if (gs.start_command)
1146 gs.start_command =
d62a17ae 1147 translate_blanks(gs.start_command, blankstr);
a6810074
DL
1148 if (gs.stop_command)
1149 gs.stop_command =
d62a17ae 1150 translate_blanks(gs.stop_command, blankstr);
065de903 1151 }
8b886ca7 1152
a6810074 1153 gs.restart.interval = gs.min_restart_interval;
8b886ca7 1154
4f04a76b
DL
1155 master = frr_init();
1156
dd8376fe 1157 zlog_set_level(ZLOG_DEST_MONITOR, ZLOG_DISABLED);
eb05883f 1158 if (watchfrr_di.daemon_mode) {
dd8376fe 1159 zlog_set_level(ZLOG_DEST_SYSLOG, MIN(gs.loglevel, LOG_DEBUG));
d62a17ae 1160 if (daemon(0, 0) < 0) {
2f4f11fa 1161 fprintf(stderr, "Watchfrr daemon failed: %s",
d62a17ae 1162 strerror(errno));
1163 exit(1);
4f04a76b
DL
1164 }
1165 } else
dd8376fe 1166 zlog_set_level(ZLOG_DEST_STDOUT, MIN(gs.loglevel, LOG_DEBUG));
8b886ca7 1167
a6810074 1168 watchfrr_vty_init();
8b886ca7 1169
eb05883f 1170 frr_vty_serv();
8b886ca7 1171
8b886ca7 1172 {
a6810074
DL
1173 int i;
1174 struct daemon *tail = NULL;
1175
1176 for (i = optind; i < argc; i++) {
1177 struct daemon *dmn;
1178
1179 if (!(dmn = (struct daemon *)calloc(1, sizeof(*dmn)))) {
1180 fprintf(stderr, "calloc(1,%u) failed: %s\n",
d62a17ae 1181 (u_int)sizeof(*dmn),
a6810074
DL
1182 safe_strerror(errno));
1183 return 1;
1184 }
1185 dmn->name = dmn->restart.name = argv[i];
1186 dmn->state = DAEMON_INIT;
1187 gs.numdaemons++;
1188 gs.numdown++;
1189 dmn->fd = -1;
66e78ae6 1190 dmn->t_wakeup = NULL;
d62a17ae 1191 thread_add_timer_msec(master, wakeup_init, dmn,
1192 100 + (random() % 900),
66e78ae6 1193 &dmn->t_wakeup);
a6810074
DL
1194 dmn->restart.interval = gs.min_restart_interval;
1195 if (tail)
1196 tail->next = dmn;
1197 else
1198 gs.daemons = dmn;
1199 tail = dmn;
1200
f168b713 1201 if (!strcmp(dmn->name, special))
a6810074
DL
1202 gs.special = dmn;
1203 }
1204 }
1205 if (!gs.daemons) {
1206 fputs("Must specify one or more daemons to monitor.\n", stderr);
4f04a76b 1207 frr_help_exit(1);
a6810074 1208 }
f168b713
DL
1209 if (!watch_only && !gs.special) {
1210 fprintf(stderr, "\"%s\" daemon must be in daemon list\n",
1211 special);
4f04a76b 1212 frr_help_exit(1);
8b886ca7 1213 }
8b886ca7 1214
a6810074
DL
1215 /* Make sure we're not already running. */
1216 pid_output(pidfile);
1217
1218 /* Announce which daemons are being monitored. */
1219 {
1220 struct daemon *dmn;
1221 size_t len = 0;
1222
1223 for (dmn = gs.daemons; dmn; dmn = dmn->next)
1224 len += strlen(dmn->name) + 1;
1225
1226 {
1227 char buf[len + 1];
1228 char *p = buf;
1229
1230 for (dmn = gs.daemons; dmn; dmn = dmn->next) {
1231 if (p != buf)
1232 *p++ = ' ';
1233 strcpy(p, dmn->name);
1234 p += strlen(p);
1235 }
f168b713
DL
1236 zlog_notice("%s %s watching [%s]%s", progname,
1237 FRR_VERSION, buf,
1238 watch_only ? ", monitor mode" : "");
a6810074
DL
1239 }
1240 }
8b886ca7 1241
a6810074
DL
1242 {
1243 struct thread thread;
1244
1245 while (thread_fetch(master, &thread))
1246 thread_call(&thread);
1247 }
8b886ca7 1248
a6810074
DL
1249 systemd_send_stopping();
1250 /* Not reached. */
1251 return 0;
8b886ca7 1252}