]> git.proxmox.com Git - mirror_frr.git/blame - watchfrr/watchfrr.c
Merge pull request #2304 from ppmathis/enhancement/bgp-pg-overrides
[mirror_frr.git] / watchfrr / watchfrr.c
CommitLineData
8b886ca7 1/*
896014f4
DL
2 * Monitor status of frr daemons and restart if necessary.
3 *
4 * Copyright (C) 2004 Andrew J. Schorr
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License along
17 * with this program; see the file COPYING; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
8b886ca7 19 */
20
a365534f 21#include <zebra.h>
8b886ca7 22#include <thread.h>
23#include <log.h>
52e66296 24#include <network.h>
8b886ca7 25#include <sigevent.h>
a365534f 26#include <lib/version.h>
95c4aff2 27#include "command.h"
87f44e2f 28#include "memory_vty.h"
4f04a76b 29#include "libfrr.h"
95c4aff2 30
6f594023 31#include <getopt.h>
a365534f 32#include <sys/un.h>
33#include <sys/wait.h>
837d16cc 34#include <memory.h>
651415bd 35#include <systemd.h>
8b886ca7 36
9473e340 37#include "watchfrr.h"
95c4aff2 38
8b886ca7 39#ifndef MIN
40#define MIN(X,Y) (((X) <= (Y)) ? (X) : (Y))
41#endif
42
43/* Macros to help randomize timers. */
44#define JITTER(X) ((random() % ((X)+1))-((X)/2))
45#define FUZZY(X) ((X)+JITTER((X)/20))
46
47#define DEFAULT_PERIOD 5
0a64aff6 48#define DEFAULT_TIMEOUT 90
8b886ca7 49#define DEFAULT_RESTART_TIMEOUT 20
50#define DEFAULT_LOGLEVEL LOG_INFO
51#define DEFAULT_MIN_RESTART 60
52#define DEFAULT_MAX_RESTART 600
8b886ca7 53
54#define PING_TOKEN "PING"
55
55c72803 56/* Needs to be global, referenced somewhere inside libfrr. */
8b886ca7 57struct thread_master *master;
64a249ad 58static char pidfile_default[256];
8b886ca7 59
f168b713 60static bool watch_only = false;
8b886ca7 61
a6810074
DL
62typedef enum {
63 PHASE_NONE = 0,
64 PHASE_STOPS_PENDING,
65 PHASE_WAITING_DOWN,
66 PHASE_ZEBRA_RESTART_PENDING,
67 PHASE_WAITING_ZEBRA_UP
8b886ca7 68} restart_phase_t;
69
a6810074
DL
70static const char *phase_str[] = {
71 "None",
72 "Stop jobs running",
73 "Waiting for other daemons to come down",
74 "Zebra restart job running",
75 "Waiting for zebra to come up",
76 "Start jobs running",
8b886ca7 77};
78
79#define PHASE_TIMEOUT (3*gs.restart_timeout)
80
a6810074
DL
81struct restart_info {
82 const char *name;
83 const char *what;
84 pid_t pid;
85 struct timeval time;
86 long interval;
87 struct thread *t_kill;
88 int kills;
098e240f 89};
90
a6810074 91static struct global_state {
a6810074
DL
92 restart_phase_t phase;
93 struct thread *t_phase_hanging;
94 const char *vtydir;
95 long period;
96 long timeout;
97 long restart_timeout;
98 long min_restart_interval;
99 long max_restart_interval;
a6810074
DL
100 struct daemon *daemons;
101 const char *restart_command;
102 const char *start_command;
103 const char *stop_command;
104 struct restart_info restart;
a6810074 105 int loglevel;
d62a17ae 106 struct daemon *special; /* points to zebra when doing phased restart */
a6810074
DL
107 int numdaemons;
108 int numpids;
d62a17ae 109 int numdown; /* # of daemons that are not UP or UNRESPONSIVE */
8b886ca7 110} gs = {
d62a17ae 111 .phase = PHASE_NONE,
64a249ad 112 .vtydir = frr_vtydir,
d62a17ae 113 .period = 1000 * DEFAULT_PERIOD,
114 .timeout = DEFAULT_TIMEOUT,
115 .restart_timeout = DEFAULT_RESTART_TIMEOUT,
116 .loglevel = DEFAULT_LOGLEVEL,
117 .min_restart_interval = DEFAULT_MIN_RESTART,
118 .max_restart_interval = DEFAULT_MAX_RESTART,
d62a17ae 119};
a6810074
DL
120
121typedef enum {
122 DAEMON_INIT,
123 DAEMON_DOWN,
124 DAEMON_CONNECTING,
125 DAEMON_UP,
126 DAEMON_UNRESPONSIVE
8b886ca7 127} daemon_state_t;
128
d62a17ae 129#define IS_UP(DMN) \
130 (((DMN)->state == DAEMON_UP) || ((DMN)->state == DAEMON_UNRESPONSIVE))
8b886ca7 131
a6810074 132static const char *state_str[] = {
d62a17ae 133 "Init", "Down", "Connecting", "Up", "Unresponsive",
8b886ca7 134};
135
136struct daemon {
a6810074
DL
137 const char *name;
138 daemon_state_t state;
139 int fd;
140 struct timeval echo_sent;
d7c0a89a 141 unsigned int connect_tries;
a6810074
DL
142 struct thread *t_wakeup;
143 struct thread *t_read;
144 struct thread *t_write;
145 struct daemon *next;
146 struct restart_info restart;
8b886ca7 147};
148
9272302b
DL
149#define OPTION_MINRESTART 2000
150#define OPTION_MAXRESTART 2001
f168b713 151#define OPTION_DRY 2002
9272302b 152
a6810074
DL
153static const struct option longopts[] = {
154 {"daemon", no_argument, NULL, 'd'},
155 {"statedir", required_argument, NULL, 'S'},
a6810074
DL
156 {"loglevel", required_argument, NULL, 'l'},
157 {"interval", required_argument, NULL, 'i'},
158 {"timeout", required_argument, NULL, 't'},
159 {"restart-timeout", required_argument, NULL, 'T'},
160 {"restart", required_argument, NULL, 'r'},
161 {"start-command", required_argument, NULL, 's'},
162 {"kill-command", required_argument, NULL, 'k'},
f168b713 163 {"dry", no_argument, NULL, OPTION_DRY},
d62a17ae 164 {"min-restart-interval", required_argument, NULL, OPTION_MINRESTART},
165 {"max-restart-interval", required_argument, NULL, OPTION_MAXRESTART},
a6810074
DL
166 {"pid-file", required_argument, NULL, 'p'},
167 {"blank-string", required_argument, NULL, 'b'},
168 {"help", no_argument, NULL, 'h'},
169 {"version", no_argument, NULL, 'v'},
d62a17ae 170 {NULL, 0, NULL, 0}};
8b886ca7 171
172static int try_connect(struct daemon *dmn);
173static int wakeup_send_echo(struct thread *t_wakeup);
174static void try_restart(struct daemon *dmn);
175static void phase_check(void);
176
4f04a76b
DL
177static const char *progname;
178static void printhelp(FILE *target)
8b886ca7 179{
d62a17ae 180 fprintf(target,
181 "Usage : %s [OPTION...] <daemon name> ...\n\n\
9473e340 182Watchdog program to monitor status of frr daemons and try to restart\n\
8b886ca7 183them if they are down or unresponsive. It determines whether a daemon is\n\
184up based on whether it can connect to the daemon's vty unix stream socket.\n\
185It then repeatedly sends echo commands over that socket to determine whether\n\
186the daemon is responsive. If the daemon crashes, we will receive an EOF\n\
187on the socket connection and know immediately that the daemon is down.\n\n\
188The daemons to be monitored should be listed on the command line.\n\n\
8b886ca7 189In order to avoid attempting to restart the daemons in a fast loop,\n\
190the -m and -M options allow you to control the minimum delay between\n\
191restart commands. The minimum restart delay is recalculated each time\n\
192a restart is attempted: if the time since the last restart attempt exceeds\n\
193twice the -M value, then the restart delay is set to the -m value.\n\
d62a17ae 194Otherwise, the interval is doubled (but capped at the -M value).\n\n",
f168b713 195 progname);
e757c940 196
d62a17ae 197 fprintf(target,
198 "Options:\n\
8b886ca7 199-d, --daemon Run in daemon mode. In this mode, error messages are sent\n\
200 to syslog instead of stdout.\n\
201-S, --statedir Set the vty socket directory (default is %s)\n\
8b886ca7 202-l, --loglevel Set the logging level (default is %d).\n\
203 The value should range from %d (LOG_EMERG) to %d (LOG_DEBUG),\n\
204 but it can be set higher than %d if extra-verbose debugging\n\
205 messages are desired.\n\
9272302b 206 --min-restart-interval\n\
8b886ca7 207 Set the minimum seconds to wait between invocations of daemon\n\
208 restart commands (default is %d).\n\
9272302b 209 --max-restart-interval\n\
8b886ca7 210 Set the maximum seconds to wait between invocations of daemon\n\
211 restart commands (default is %d).\n\
212-i, --interval Set the status polling interval in seconds (default is %d)\n\
213-t, --timeout Set the unresponsiveness timeout in seconds (default is %d)\n\
214-T, --restart-timeout\n\
215 Set the restart (kill) timeout in seconds (default is %d).\n\
216 If any background jobs are still running after this much\n\
217 time has elapsed, they will be killed.\n\
218-r, --restart Supply a Bourne shell command to use to restart a single\n\
219 daemon. The command string should include '%%s' where the\n\
220 name of the daemon should be substituted.\n\
8b886ca7 221-s, --start-command\n\
222 Supply a Bourne shell to command to use to start a single\n\
223 daemon. The command string should include '%%s' where the\n\
224 name of the daemon should be substituted.\n\
225-k, --kill-command\n\
226 Supply a Bourne shell to command to use to stop a single\n\
227 daemon. The command string should include '%%s' where the\n\
228 name of the daemon should be substituted.\n\
f168b713 229 --dry Do not start or restart anything, just log.\n\
8b886ca7 230-p, --pid-file Set process identifier file name\n\
231 (default is %s).\n\
c8b40f86 232-b, --blank-string\n\
233 When the supplied argument string is found in any of the\n\
f168b713 234 various shell command arguments (-r, -s, or -k), replace\n\
c8b40f86 235 it with a space. This is an ugly hack to circumvent problems\n\
236 passing command-line arguments with embedded spaces.\n\
8b886ca7 237-v, --version Print program version\n\
d62a17ae 238-h, --help Display this help and exit\n",
64a249ad 239 frr_vtydir, DEFAULT_LOGLEVEL, LOG_EMERG, LOG_DEBUG, LOG_DEBUG,
d62a17ae 240 DEFAULT_MIN_RESTART, DEFAULT_MAX_RESTART, DEFAULT_PERIOD,
64a249ad 241 DEFAULT_TIMEOUT, DEFAULT_RESTART_TIMEOUT, pidfile_default);
8b886ca7 242}
243
a6810074 244static pid_t run_background(char *shell_cmd)
8b886ca7 245{
a6810074
DL
246 pid_t child;
247
248 switch (child = fork()) {
249 case -1:
d62a17ae 250 zlog_err("fork failed, cannot run command [%s]: %s", shell_cmd,
251 safe_strerror(errno));
a6810074
DL
252 return -1;
253 case 0:
254 /* Child process. */
d62a17ae 255 /* Use separate process group so child processes can be killed
256 * easily. */
a6810074
DL
257 if (setpgid(0, 0) < 0)
258 zlog_warn("warning: setpgid(0,0) failed: %s",
259 safe_strerror(errno));
260 {
261 char shell[] = "sh";
262 char dashc[] = "-c";
d62a17ae 263 char *const argv[4] = {shell, dashc, shell_cmd, NULL};
a6810074 264 execv("/bin/sh", argv);
d62a17ae 265 zlog_err("execv(/bin/sh -c '%s') failed: %s", shell_cmd,
266 safe_strerror(errno));
a6810074
DL
267 _exit(127);
268 }
269 default:
270 /* Parent process: we will reap the child later. */
271 zlog_err("Forked background command [pid %d]: %s", (int)child,
272 shell_cmd);
273 return child;
274 }
8b886ca7 275}
276
a6810074
DL
277static struct timeval *time_elapsed(struct timeval *result,
278 const struct timeval *start_time)
8b886ca7 279{
a6810074
DL
280 gettimeofday(result, NULL);
281 result->tv_sec -= start_time->tv_sec;
282 result->tv_usec -= start_time->tv_usec;
283 while (result->tv_usec < 0) {
284 result->tv_usec += 1000000L;
285 result->tv_sec--;
286 }
287 return result;
8b886ca7 288}
289
a6810074 290static int restart_kill(struct thread *t_kill)
8b886ca7 291{
a6810074
DL
292 struct restart_info *restart = THREAD_ARG(t_kill);
293 struct timeval delay;
294
295 time_elapsed(&delay, &restart->time);
d62a17ae 296 zlog_warn(
297 "Warning: %s %s child process %d still running after "
298 "%ld seconds, sending signal %d",
299 restart->what, restart->name, (int)restart->pid,
300 (long)delay.tv_sec, (restart->kills ? SIGKILL : SIGTERM));
a6810074
DL
301 kill(-restart->pid, (restart->kills ? SIGKILL : SIGTERM));
302 restart->kills++;
66e78ae6
QY
303 restart->t_kill = NULL;
304 thread_add_timer(master, restart_kill, restart, gs.restart_timeout,
305 &restart->t_kill);
a6810074 306 return 0;
8b886ca7 307}
308
a6810074 309static struct restart_info *find_child(pid_t child)
8b886ca7 310{
f168b713
DL
311 struct daemon *dmn;
312 for (dmn = gs.daemons; dmn; dmn = dmn->next) {
313 if (dmn->restart.pid == child)
314 return &dmn->restart;
a6810074
DL
315 }
316 return NULL;
8b886ca7 317}
318
a6810074 319static void sigchild(void)
8b886ca7 320{
a6810074
DL
321 pid_t child;
322 int status;
323 const char *name;
324 const char *what;
325 struct restart_info *restart;
326
327 switch (child = waitpid(-1, &status, WNOHANG)) {
328 case -1:
329 zlog_err("waitpid failed: %s", safe_strerror(errno));
330 return;
331 case 0:
332 zlog_warn("SIGCHLD received, but waitpid did not reap a child");
333 return;
334 }
335
336 if (child == integrated_write_pid) {
337 integrated_write_sigchld(status);
338 return;
339 }
340
341 if ((restart = find_child(child)) != NULL) {
342 name = restart->name;
343 what = restart->what;
344 restart->pid = 0;
345 gs.numpids--;
346 thread_cancel(restart->t_kill);
347 restart->t_kill = NULL;
d62a17ae 348 /* Update restart time to reflect the time the command
349 * completed. */
a6810074
DL
350 gettimeofday(&restart->time, NULL);
351 } else {
d62a17ae 352 zlog_err(
353 "waitpid returned status for an unknown child process %d",
354 (int)child);
a6810074
DL
355 name = "(unknown)";
356 what = "background";
357 }
358 if (WIFSTOPPED(status))
d62a17ae 359 zlog_warn("warning: %s %s process %d is stopped", what, name,
360 (int)child);
a6810074 361 else if (WIFSIGNALED(status))
d62a17ae 362 zlog_warn("%s %s process %d terminated due to signal %d", what,
363 name, (int)child, WTERMSIG(status));
a6810074
DL
364 else if (WIFEXITED(status)) {
365 if (WEXITSTATUS(status) != 0)
d62a17ae 366 zlog_warn(
367 "%s %s process %d exited with non-zero status %d",
368 what, name, (int)child, WEXITSTATUS(status));
a6810074
DL
369 else
370 zlog_debug("%s %s process %d exited normally", what,
371 name, (int)child);
372 } else
373 zlog_err("cannot interpret %s %s process %d wait status 0x%x",
374 what, name, (int)child, status);
375 phase_check();
8b886ca7 376}
377
d62a17ae 378static int run_job(struct restart_info *restart, const char *cmdtype,
379 const char *command, int force, int update_interval)
8b886ca7 380{
a6810074
DL
381 struct timeval delay;
382
383 if (gs.loglevel > LOG_DEBUG + 1)
384 zlog_debug("attempting to %s %s", cmdtype, restart->name);
385
386 if (restart->pid) {
387 if (gs.loglevel > LOG_DEBUG + 1)
d62a17ae 388 zlog_debug(
389 "cannot %s %s, previous pid %d still running",
390 cmdtype, restart->name, (int)restart->pid);
a6810074
DL
391 return -1;
392 }
393
d62a17ae 394 /* Note: time_elapsed test must come before the force test, since we
395 need
a6810074
DL
396 to make sure that delay is initialized for use below in updating the
397 restart interval. */
398 if ((time_elapsed(&delay, &restart->time)->tv_sec < restart->interval)
399 && !force) {
400 if (gs.loglevel > LOG_DEBUG + 1)
d62a17ae 401 zlog_debug(
402 "postponing %s %s: "
403 "elapsed time %ld < retry interval %ld",
404 cmdtype, restart->name, (long)delay.tv_sec,
405 restart->interval);
a6810074
DL
406 return -1;
407 }
408
409 gettimeofday(&restart->time, NULL);
410 restart->kills = 0;
411 {
412 char cmd[strlen(command) + strlen(restart->name) + 1];
413 snprintf(cmd, sizeof(cmd), command, restart->name);
414 if ((restart->pid = run_background(cmd)) > 0) {
66e78ae6 415 restart->t_kill = NULL;
d62a17ae 416 thread_add_timer(master, restart_kill, restart,
417 gs.restart_timeout, &restart->t_kill);
a6810074
DL
418 restart->what = cmdtype;
419 gs.numpids++;
420 } else
421 restart->pid = 0;
422 }
423
424 /* Calculate the new restart interval. */
425 if (update_interval) {
426 if (delay.tv_sec > 2 * gs.max_restart_interval)
427 restart->interval = gs.min_restart_interval;
428 else if ((restart->interval *= 2) > gs.max_restart_interval)
429 restart->interval = gs.max_restart_interval;
430 if (gs.loglevel > LOG_DEBUG + 1)
431 zlog_debug("restart %s interval is now %ld",
432 restart->name, restart->interval);
433 }
434 return restart->pid;
8b886ca7 435}
436
d62a17ae 437#define SET_READ_HANDLER(DMN) \
438 do { \
439 (DMN)->t_read = NULL; \
440 thread_add_read(master, handle_read, (DMN), (DMN)->fd, \
441 &(DMN)->t_read); \
442 } while (0);
443
444#define SET_WAKEUP_DOWN(DMN) \
445 do { \
446 (DMN)->t_wakeup = NULL; \
447 thread_add_timer_msec(master, wakeup_down, (DMN), \
448 FUZZY(gs.period), &(DMN)->t_wakeup); \
449 } while (0);
450
451#define SET_WAKEUP_UNRESPONSIVE(DMN) \
452 do { \
453 (DMN)->t_wakeup = NULL; \
454 thread_add_timer_msec(master, wakeup_unresponsive, (DMN), \
455 FUZZY(gs.period), &(DMN)->t_wakeup); \
456 } while (0);
457
458#define SET_WAKEUP_ECHO(DMN) \
459 do { \
460 (DMN)->t_wakeup = NULL; \
461 thread_add_timer_msec(master, wakeup_send_echo, (DMN), \
462 FUZZY(gs.period), &(DMN)->t_wakeup); \
463 } while (0);
8b886ca7 464
a6810074 465static int wakeup_down(struct thread *t_wakeup)
8b886ca7 466{
a6810074
DL
467 struct daemon *dmn = THREAD_ARG(t_wakeup);
468
469 dmn->t_wakeup = NULL;
470 if (try_connect(dmn) < 0)
471 SET_WAKEUP_DOWN(dmn);
472 if ((dmn->connect_tries > 1) && (dmn->state != DAEMON_UP))
473 try_restart(dmn);
474 return 0;
8b886ca7 475}
476
a6810074 477static int wakeup_init(struct thread *t_wakeup)
8b886ca7 478{
a6810074
DL
479 struct daemon *dmn = THREAD_ARG(t_wakeup);
480
481 dmn->t_wakeup = NULL;
482 if (try_connect(dmn) < 0) {
483 SET_WAKEUP_DOWN(dmn);
484 zlog_err("%s state -> down : initial connection attempt failed",
485 dmn->name);
486 dmn->state = DAEMON_DOWN;
487 }
488 return 0;
8b886ca7 489}
490
a6810074 491static void daemon_down(struct daemon *dmn, const char *why)
8b886ca7 492{
a6810074
DL
493 if (IS_UP(dmn) || (dmn->state == DAEMON_INIT))
494 zlog_err("%s state -> down : %s", dmn->name, why);
495 else if (gs.loglevel > LOG_DEBUG)
496 zlog_debug("%s still down : %s", dmn->name, why);
497 if (IS_UP(dmn))
498 gs.numdown++;
499 dmn->state = DAEMON_DOWN;
500 if (dmn->fd >= 0) {
501 close(dmn->fd);
502 dmn->fd = -1;
503 }
504 THREAD_OFF(dmn->t_read);
505 THREAD_OFF(dmn->t_write);
506 THREAD_OFF(dmn->t_wakeup);
507 if (try_connect(dmn) < 0)
508 SET_WAKEUP_DOWN(dmn);
509 phase_check();
8b886ca7 510}
511
a6810074 512static int handle_read(struct thread *t_read)
8b886ca7 513{
a6810074
DL
514 struct daemon *dmn = THREAD_ARG(t_read);
515 static const char resp[sizeof(PING_TOKEN) + 4] = PING_TOKEN "\n";
516 char buf[sizeof(resp) + 100];
517 ssize_t rc;
518 struct timeval delay;
519
520 dmn->t_read = NULL;
521 if ((rc = read(dmn->fd, buf, sizeof(buf))) < 0) {
522 char why[100];
523
524 if (ERRNO_IO_RETRY(errno)) {
525 /* Pretend it never happened. */
526 SET_READ_HANDLER(dmn);
527 return 0;
528 }
529 snprintf(why, sizeof(why), "unexpected read error: %s",
530 safe_strerror(errno));
531 daemon_down(dmn, why);
532 return 0;
8b886ca7 533 }
a6810074
DL
534 if (rc == 0) {
535 daemon_down(dmn, "read returned EOF");
536 return 0;
537 }
538 if (!dmn->echo_sent.tv_sec) {
539 char why[sizeof(buf) + 100];
540 snprintf(why, sizeof(why),
541 "unexpected read returns %d bytes: %.*s", (int)rc,
542 (int)rc, buf);
543 daemon_down(dmn, why);
544 return 0;
8b886ca7 545 }
a6810074
DL
546
547 /* We are expecting an echo response: is there any chance that the
548 response would not be returned entirely in the first read? That
549 seems inconceivable... */
550 if ((rc != sizeof(resp)) || memcmp(buf, resp, sizeof(resp))) {
551 char why[100 + sizeof(buf)];
552 snprintf(why, sizeof(why),
553 "read returned bad echo response of %d bytes "
d62a17ae 554 "(expecting %u): %.*s",
d7c0a89a 555 (int)rc, (unsigned int)sizeof(resp), (int)rc, buf);
a6810074
DL
556 daemon_down(dmn, why);
557 return 0;
558 }
559
560 time_elapsed(&delay, &dmn->echo_sent);
561 dmn->echo_sent.tv_sec = 0;
562 if (dmn->state == DAEMON_UNRESPONSIVE) {
563 if (delay.tv_sec < gs.timeout) {
564 dmn->state = DAEMON_UP;
d62a17ae 565 zlog_warn(
566 "%s state -> up : echo response received after %ld.%06ld "
567 "seconds",
568 dmn->name, (long)delay.tv_sec,
569 (long)delay.tv_usec);
a6810074 570 } else
d62a17ae 571 zlog_warn(
572 "%s: slow echo response finally received after %ld.%06ld "
573 "seconds",
574 dmn->name, (long)delay.tv_sec,
575 (long)delay.tv_usec);
a6810074
DL
576 } else if (gs.loglevel > LOG_DEBUG + 1)
577 zlog_debug("%s: echo response received after %ld.%06ld seconds",
578 dmn->name, (long)delay.tv_sec, (long)delay.tv_usec);
579
580 SET_READ_HANDLER(dmn);
581 if (dmn->t_wakeup)
582 thread_cancel(dmn->t_wakeup);
583 SET_WAKEUP_ECHO(dmn);
584
585 return 0;
8b886ca7 586}
587
207e0d7a
DS
588/*
589 * Wait till we notice that all daemons are ready before
590 * we send we are ready to systemd
591 */
a6810074 592static void daemon_send_ready(void)
207e0d7a 593{
a6810074
DL
594 static int sent = 0;
595 if (!sent && gs.numdown == 0) {
a6810074 596 FILE *fp;
207e0d7a 597
a6810074 598 fp = fopen(DAEMON_VTY_DIR "/watchfrr.started", "w");
f5ba21fc
DS
599 if (fp)
600 fclose(fp);
60bd2534 601#if defined HAVE_SYSTEMD
d62a17ae 602 zlog_notice(
603 "Watchfrr: Notifying Systemd we are up and running");
a6810074 604 systemd_send_started(master, 0);
60bd2534 605#endif
a6810074
DL
606 sent = 1;
607 }
207e0d7a
DS
608}
609
a6810074 610static void daemon_up(struct daemon *dmn, const char *why)
8b886ca7 611{
a6810074
DL
612 dmn->state = DAEMON_UP;
613 gs.numdown--;
614 dmn->connect_tries = 0;
615 zlog_notice("%s state -> up : %s", dmn->name, why);
616 daemon_send_ready();
a8cbb8b3 617 SET_WAKEUP_ECHO(dmn);
a6810074 618 phase_check();
8b886ca7 619}
620
a6810074 621static int check_connect(struct thread *t_write)
8b886ca7 622{
a6810074
DL
623 struct daemon *dmn = THREAD_ARG(t_write);
624 int sockerr;
625 socklen_t reslen = sizeof(sockerr);
626
627 dmn->t_write = NULL;
628 if (getsockopt(dmn->fd, SOL_SOCKET, SO_ERROR, (char *)&sockerr, &reslen)
629 < 0) {
630 zlog_warn("%s: check_connect: getsockopt failed: %s", dmn->name,
631 safe_strerror(errno));
632 daemon_down(dmn,
633 "getsockopt failed checking connection success");
634 return 0;
635 }
636 if ((reslen == sizeof(sockerr)) && sockerr) {
637 char why[100];
d62a17ae 638 snprintf(
639 why, sizeof(why),
640 "getsockopt reports that connection attempt failed: %s",
641 safe_strerror(sockerr));
a6810074
DL
642 daemon_down(dmn, why);
643 return 0;
644 }
645
646 daemon_up(dmn, "delayed connect succeeded");
647 return 0;
8b886ca7 648}
649
a6810074 650static int wakeup_connect_hanging(struct thread *t_wakeup)
8b886ca7 651{
a6810074
DL
652 struct daemon *dmn = THREAD_ARG(t_wakeup);
653 char why[100];
654
655 dmn->t_wakeup = NULL;
656 snprintf(why, sizeof(why),
657 "connection attempt timed out after %ld seconds", gs.timeout);
658 daemon_down(dmn, why);
659 return 0;
8b886ca7 660}
661
662/* Making connection to protocol daemon. */
a6810074 663static int try_connect(struct daemon *dmn)
8b886ca7 664{
a6810074
DL
665 int sock;
666 struct sockaddr_un addr;
667 socklen_t len;
668
669 if (gs.loglevel > LOG_DEBUG + 1)
670 zlog_debug("%s: attempting to connect", dmn->name);
671 dmn->connect_tries++;
672
673 memset(&addr, 0, sizeof(struct sockaddr_un));
674 addr.sun_family = AF_UNIX;
d62a17ae 675 snprintf(addr.sun_path, sizeof(addr.sun_path), "%s/%s.vty", gs.vtydir,
676 dmn->name);
6f0e3f6e 677#ifdef HAVE_STRUCT_SOCKADDR_UN_SUN_LEN
a6810074 678 len = addr.sun_len = SUN_LEN(&addr);
8b886ca7 679#else
a6810074 680 len = sizeof(addr.sun_family) + strlen(addr.sun_path);
d62a17ae 681#endif /* HAVE_STRUCT_SOCKADDR_UN_SUN_LEN */
a6810074
DL
682
683 /* Quick check to see if we might succeed before we go to the trouble
684 of creating a socket. */
685 if (access(addr.sun_path, W_OK) < 0) {
686 if (errno != ENOENT)
687 zlog_err("%s: access to socket %s denied: %s",
688 dmn->name, addr.sun_path,
689 safe_strerror(errno));
690 return -1;
691 }
692
693 if ((sock = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) {
d62a17ae 694 zlog_err("%s(%s): cannot make socket: %s", __func__,
695 addr.sun_path, safe_strerror(errno));
a6810074
DL
696 return -1;
697 }
698
699 if (set_nonblocking(sock) < 0 || set_cloexec(sock) < 0) {
d62a17ae 700 zlog_err("%s(%s): set_nonblocking/cloexec(%d) failed", __func__,
701 addr.sun_path, sock);
a6810074
DL
702 close(sock);
703 return -1;
8b886ca7 704 }
a6810074
DL
705
706 if (connect(sock, (struct sockaddr *)&addr, len) < 0) {
707 if ((errno != EINPROGRESS) && (errno != EWOULDBLOCK)) {
708 if (gs.loglevel > LOG_DEBUG)
709 zlog_debug("%s(%s): connect failed: %s",
710 __func__, addr.sun_path,
711 safe_strerror(errno));
712 close(sock);
713 return -1;
714 }
715 if (gs.loglevel > LOG_DEBUG)
716 zlog_debug("%s: connection in progress", dmn->name);
717 dmn->state = DAEMON_CONNECTING;
718 dmn->fd = sock;
66e78ae6
QY
719 dmn->t_write = NULL;
720 thread_add_write(master, check_connect, dmn, dmn->fd,
d62a17ae 721 &dmn->t_write);
722 dmn->t_wakeup = NULL;
723 thread_add_timer(master, wakeup_connect_hanging, dmn,
724 gs.timeout, &dmn->t_wakeup);
a6810074
DL
725 SET_READ_HANDLER(dmn);
726 return 0;
727 }
728
729 dmn->fd = sock;
730 SET_READ_HANDLER(dmn);
731 daemon_up(dmn, "connect succeeded");
732 return 1;
8b886ca7 733}
734
a6810074 735static int phase_hanging(struct thread *t_hanging)
8b886ca7 736{
a6810074
DL
737 gs.t_phase_hanging = NULL;
738 zlog_err("Phase [%s] hanging for %ld seconds, aborting phased restart",
739 phase_str[gs.phase], PHASE_TIMEOUT);
740 gs.phase = PHASE_NONE;
741 return 0;
8b886ca7 742}
743
a6810074 744static void set_phase(restart_phase_t new_phase)
8b886ca7 745{
a6810074
DL
746 gs.phase = new_phase;
747 if (gs.t_phase_hanging)
748 thread_cancel(gs.t_phase_hanging);
66e78ae6
QY
749 gs.t_phase_hanging = NULL;
750 thread_add_timer(master, phase_hanging, NULL, PHASE_TIMEOUT,
751 &gs.t_phase_hanging);
8b886ca7 752}
753
a6810074 754static void phase_check(void)
8b886ca7 755{
a6810074
DL
756 switch (gs.phase) {
757 case PHASE_NONE:
758 break;
759 case PHASE_STOPS_PENDING:
760 if (gs.numpids)
761 break;
d62a17ae 762 zlog_info(
763 "Phased restart: all routing daemon stop jobs have completed.");
a6810074
DL
764 set_phase(PHASE_WAITING_DOWN);
765
d62a17ae 766 /*FALLTHRU*/
a6810074
DL
767 case PHASE_WAITING_DOWN:
768 if (gs.numdown + IS_UP(gs.special) < gs.numdaemons)
769 break;
770 zlog_info("Phased restart: all routing daemons now down.");
771 run_job(&gs.special->restart, "restart", gs.restart_command, 1,
772 1);
773 set_phase(PHASE_ZEBRA_RESTART_PENDING);
774
d62a17ae 775 /*FALLTHRU*/
a6810074
DL
776 case PHASE_ZEBRA_RESTART_PENDING:
777 if (gs.special->restart.pid)
778 break;
779 zlog_info("Phased restart: %s restart job completed.",
780 gs.special->name);
781 set_phase(PHASE_WAITING_ZEBRA_UP);
782
d62a17ae 783 /*FALLTHRU*/
a6810074
DL
784 case PHASE_WAITING_ZEBRA_UP:
785 if (!IS_UP(gs.special))
786 break;
787 zlog_info("Phased restart: %s is now up.", gs.special->name);
788 {
789 struct daemon *dmn;
790 for (dmn = gs.daemons; dmn; dmn = dmn->next) {
791 if (dmn != gs.special)
792 run_job(&dmn->restart, "start",
793 gs.start_command, 1, 0);
794 }
795 }
796 gs.phase = PHASE_NONE;
797 THREAD_OFF(gs.t_phase_hanging);
798 zlog_notice("Phased global restart has completed.");
799 break;
800 }
8b886ca7 801}
802
a6810074 803static void try_restart(struct daemon *dmn)
8b886ca7 804{
f168b713 805 if (watch_only)
a6810074 806 return;
a6810074 807
f168b713
DL
808 if (dmn != gs.special) {
809 if ((gs.special->state == DAEMON_UP)
810 && (gs.phase == PHASE_NONE))
811 run_job(&dmn->restart, "restart", gs.restart_command, 0,
812 1);
813 else
814 zlog_debug(
815 "%s: postponing restart attempt because master %s daemon "
816 "not up [%s], or phased restart in progress",
817 dmn->name, gs.special->name,
818 state_str[gs.special->state]);
819 return;
820 }
821
822 if ((gs.phase != PHASE_NONE) || gs.numpids) {
823 if (gs.loglevel > LOG_DEBUG + 1)
824 zlog_debug(
825 "postponing phased global restart: restart already in "
826 "progress [%s], or outstanding child processes [%d]",
827 phase_str[gs.phase], gs.numpids);
828 return;
829 }
830 /* Is it too soon for a restart? */
831 {
832 struct timeval delay;
833 if (time_elapsed(&delay, &gs.special->restart.time)->tv_sec
834 < gs.special->restart.interval) {
a6810074 835 if (gs.loglevel > LOG_DEBUG + 1)
d62a17ae 836 zlog_debug(
f168b713
DL
837 "postponing phased global restart: "
838 "elapsed time %ld < retry interval %ld",
839 (long)delay.tv_sec,
840 gs.special->restart.interval);
841 return;
a6810074 842 }
8b886ca7 843 }
f168b713 844 run_job(&gs.restart, "restart", gs.restart_command, 0, 1);
8b886ca7 845}
846
a6810074 847static int wakeup_unresponsive(struct thread *t_wakeup)
8b886ca7 848{
a6810074
DL
849 struct daemon *dmn = THREAD_ARG(t_wakeup);
850
851 dmn->t_wakeup = NULL;
852 if (dmn->state != DAEMON_UNRESPONSIVE)
d62a17ae 853 zlog_err(
854 "%s: no longer unresponsive (now %s), "
855 "wakeup should have been cancelled!",
856 dmn->name, state_str[dmn->state]);
a6810074
DL
857 else {
858 SET_WAKEUP_UNRESPONSIVE(dmn);
859 try_restart(dmn);
860 }
861 return 0;
8b886ca7 862}
863
a6810074 864static int wakeup_no_answer(struct thread *t_wakeup)
8b886ca7 865{
a6810074
DL
866 struct daemon *dmn = THREAD_ARG(t_wakeup);
867
868 dmn->t_wakeup = NULL;
869 dmn->state = DAEMON_UNRESPONSIVE;
d62a17ae 870 zlog_err(
871 "%s state -> unresponsive : no response yet to ping "
872 "sent %ld seconds ago",
873 dmn->name, gs.timeout);
71e7975a
DL
874 SET_WAKEUP_UNRESPONSIVE(dmn);
875 try_restart(dmn);
a6810074 876 return 0;
8b886ca7 877}
878
a6810074 879static int wakeup_send_echo(struct thread *t_wakeup)
8b886ca7 880{
a6810074
DL
881 static const char echocmd[] = "echo " PING_TOKEN;
882 ssize_t rc;
883 struct daemon *dmn = THREAD_ARG(t_wakeup);
884
885 dmn->t_wakeup = NULL;
d62a17ae 886 if (((rc = write(dmn->fd, echocmd, sizeof(echocmd))) < 0)
887 || ((size_t)rc != sizeof(echocmd))) {
a6810074
DL
888 char why[100 + sizeof(echocmd)];
889 snprintf(why, sizeof(why),
890 "write '%s' returned %d instead of %u", echocmd,
d7c0a89a 891 (int)rc, (unsigned int)sizeof(echocmd));
a6810074
DL
892 daemon_down(dmn, why);
893 } else {
894 gettimeofday(&dmn->echo_sent, NULL);
66e78ae6
QY
895 dmn->t_wakeup = NULL;
896 thread_add_timer(master, wakeup_no_answer, dmn, gs.timeout,
897 &dmn->t_wakeup);
a6810074
DL
898 }
899 return 0;
8b886ca7 900}
901
470bc619
QY
902bool check_all_up(void)
903{
904 struct daemon *dmn;
905
906 for (dmn = gs.daemons; dmn; dmn = dmn->next)
907 if (dmn->state != DAEMON_UP)
908 return false;
909 return true;
910}
911
a6810074 912static void sigint(void)
8b886ca7 913{
a6810074
DL
914 zlog_notice("Terminating on signal");
915 systemd_send_stopping();
916 exit(0);
8b886ca7 917}
918
a6810074 919static int valid_command(const char *cmd)
8b886ca7 920{
a6810074 921 char *p;
8b886ca7 922
a6810074 923 return ((p = strchr(cmd, '%')) != NULL) && (*(p + 1) == 's')
d62a17ae 924 && !strchr(p + 1, '%');
8b886ca7 925}
926
c8b40f86 927/* This is an ugly hack to circumvent problems with passing command-line
928 arguments that contain spaces. The fix is to use a configuration file. */
a6810074 929static char *translate_blanks(const char *cmd, const char *blankstr)
c8b40f86 930{
a6810074
DL
931 char *res;
932 char *p;
933 size_t bslen = strlen(blankstr);
934
935 if (!(res = strdup(cmd))) {
936 perror("strdup");
937 exit(1);
938 }
939 while ((p = strstr(res, blankstr)) != NULL) {
940 *p = ' ';
941 if (bslen != 1)
942 memmove(p + 1, p + bslen, strlen(p + bslen) + 1);
943 }
944 return res;
c8b40f86 945}
946
a6810074 947struct zebra_privs_t watchfrr_privs = {
95c4aff2 948#ifdef VTY_GROUP
a6810074 949 .vty_group = VTY_GROUP,
95c4aff2
DL
950#endif
951};
952
4f04a76b
DL
953static struct quagga_signal_t watchfrr_signals[] = {
954 {
955 .signal = SIGINT,
956 .handler = sigint,
957 },
958 {
959 .signal = SIGTERM,
960 .handler = sigint,
961 },
962 {
963 .signal = SIGCHLD,
964 .handler = sigchild,
965 },
966};
967
968FRR_DAEMON_INFO(watchfrr, WATCHFRR,
d62a17ae 969 .flags = FRR_NO_PRIVSEP | FRR_NO_TCPVTY | FRR_LIMITED_CLI
970 | FRR_NO_CFG_PID_DRY | FRR_NO_ZCLIENT,
4f04a76b 971
d62a17ae 972 .printhelp = printhelp,
973 .copyright = "Copyright 2004 Andrew J. Schorr",
4f04a76b 974
d62a17ae 975 .signals = watchfrr_signals,
976 .n_signals = array_size(watchfrr_signals),
4f04a76b 977
d62a17ae 978 .privs = &watchfrr_privs, )
4f04a76b 979
999f153e
DL
980#define DEPRECATED_OPTIONS "aAezR:"
981
a6810074 982int main(int argc, char **argv)
8b886ca7 983{
a6810074 984 int opt;
64a249ad 985 const char *pidfile = pidfile_default;
a6810074
DL
986 const char *special = "zebra";
987 const char *blankstr = NULL;
a6810074 988
64a249ad
DL
989 snprintf(pidfile_default, sizeof(pidfile_default), "%s/watchfrr.pid",
990 frr_vtydir);
991
4f04a76b
DL
992 frr_preinit(&watchfrr_di, argc, argv);
993 progname = watchfrr_di.progname;
994
999f153e 995 frr_opt_add("b:dk:l:i:p:r:S:s:t:T:" DEPRECATED_OPTIONS, longopts, "");
a6810074
DL
996
997 gs.restart.name = "all";
4f04a76b 998 while ((opt = frr_getopt(argc, argv, NULL)) != EOF) {
999f153e
DL
999 if (opt && opt < 128 && strchr(DEPRECATED_OPTIONS, opt)) {
1000 fprintf(stderr,
1001 "The -%c option no longer exists.\n"
1002 "Please refer to the watchfrr(8) man page.\n",
1003 opt);
1004 exit(1);
1005 }
1006
a6810074
DL
1007 switch (opt) {
1008 case 0:
1009 break;
a6810074
DL
1010 case 'b':
1011 blankstr = optarg;
1012 break;
f168b713
DL
1013 case OPTION_DRY:
1014 watch_only = true;
a6810074
DL
1015 break;
1016 case 'k':
1017 if (!valid_command(optarg)) {
1018 fprintf(stderr,
1019 "Invalid kill command, must contain '%%s': %s\n",
1020 optarg);
4f04a76b 1021 frr_help_exit(1);
a6810074
DL
1022 }
1023 gs.stop_command = optarg;
1024 break;
d62a17ae 1025 case 'l': {
1026 char garbage[3];
1027 if ((sscanf(optarg, "%d%1s", &gs.loglevel, garbage)
1028 != 1)
1029 || (gs.loglevel < LOG_EMERG)) {
1030 fprintf(stderr,
1031 "Invalid loglevel argument: %s\n",
1032 optarg);
1033 frr_help_exit(1);
a6810074 1034 }
d62a17ae 1035 } break;
1036 case OPTION_MINRESTART: {
1037 char garbage[3];
1038 if ((sscanf(optarg, "%ld%1s", &gs.min_restart_interval,
1039 garbage)
1040 != 1)
1041 || (gs.min_restart_interval < 0)) {
1042 fprintf(stderr,
1043 "Invalid min_restart_interval argument: %s\n",
1044 optarg);
1045 frr_help_exit(1);
a6810074 1046 }
d62a17ae 1047 } break;
1048 case OPTION_MAXRESTART: {
1049 char garbage[3];
1050 if ((sscanf(optarg, "%ld%1s", &gs.max_restart_interval,
1051 garbage)
1052 != 1)
1053 || (gs.max_restart_interval < 0)) {
1054 fprintf(stderr,
1055 "Invalid max_restart_interval argument: %s\n",
1056 optarg);
1057 frr_help_exit(1);
a6810074 1058 }
d62a17ae 1059 } break;
1060 case 'i': {
1061 char garbage[3];
1062 int period;
1063 if ((sscanf(optarg, "%d%1s", &period, garbage) != 1)
1064 || (gs.period < 1)) {
1065 fprintf(stderr,
1066 "Invalid interval argument: %s\n",
1067 optarg);
1068 frr_help_exit(1);
a6810074 1069 }
d62a17ae 1070 gs.period = 1000 * period;
1071 } break;
a6810074
DL
1072 case 'p':
1073 pidfile = optarg;
1074 break;
1075 case 'r':
a6810074
DL
1076 if (!valid_command(optarg)) {
1077 fprintf(stderr,
1078 "Invalid restart command, must contain '%%s': %s\n",
1079 optarg);
4f04a76b 1080 frr_help_exit(1);
a6810074
DL
1081 }
1082 gs.restart_command = optarg;
a6810074
DL
1083 break;
1084 case 's':
1085 if (!valid_command(optarg)) {
1086 fprintf(stderr,
1087 "Invalid start command, must contain '%%s': %s\n",
1088 optarg);
4f04a76b 1089 frr_help_exit(1);
a6810074
DL
1090 }
1091 gs.start_command = optarg;
1092 break;
1093 case 'S':
1094 gs.vtydir = optarg;
1095 break;
d62a17ae 1096 case 't': {
1097 char garbage[3];
1098 if ((sscanf(optarg, "%ld%1s", &gs.timeout, garbage)
1099 != 1)
1100 || (gs.timeout < 1)) {
1101 fprintf(stderr,
1102 "Invalid timeout argument: %s\n",
1103 optarg);
1104 frr_help_exit(1);
a6810074 1105 }
d62a17ae 1106 } break;
1107 case 'T': {
1108 char garbage[3];
1109 if ((sscanf(optarg, "%ld%1s", &gs.restart_timeout,
1110 garbage)
1111 != 1)
1112 || (gs.restart_timeout < 1)) {
1113 fprintf(stderr,
1114 "Invalid restart timeout argument: %s\n",
1115 optarg);
1116 frr_help_exit(1);
a6810074 1117 }
d62a17ae 1118 } break;
a6810074
DL
1119 default:
1120 fputs("Invalid option.\n", stderr);
4f04a76b 1121 frr_help_exit(1);
a6810074 1122 }
8b886ca7 1123 }
a6810074 1124
71e7975a
DL
1125 if (watch_only
1126 && (gs.start_command || gs.stop_command || gs.restart_command)) {
d87ae5cc 1127 fputs("Options -r/-s/-k are not used when --dry is active.\n",
a6810074 1128 stderr);
8b886ca7 1129 }
f168b713
DL
1130 if (!watch_only
1131 && (!gs.restart_command || !gs.start_command || !gs.stop_command)) {
1132 fprintf(stderr,
1133 "Options -s (start), -k (kill), and -r (restart) are required.\n");
1134 frr_help_exit(1);
8b886ca7 1135 }
8b886ca7 1136
a6810074
DL
1137 if (blankstr) {
1138 if (gs.restart_command)
1139 gs.restart_command =
d62a17ae 1140 translate_blanks(gs.restart_command, blankstr);
a6810074
DL
1141 if (gs.start_command)
1142 gs.start_command =
d62a17ae 1143 translate_blanks(gs.start_command, blankstr);
a6810074
DL
1144 if (gs.stop_command)
1145 gs.stop_command =
d62a17ae 1146 translate_blanks(gs.stop_command, blankstr);
065de903 1147 }
8b886ca7 1148
a6810074 1149 gs.restart.interval = gs.min_restart_interval;
8b886ca7 1150
4f04a76b
DL
1151 master = frr_init();
1152
dd8376fe 1153 zlog_set_level(ZLOG_DEST_MONITOR, ZLOG_DISABLED);
eb05883f 1154 if (watchfrr_di.daemon_mode) {
dd8376fe 1155 zlog_set_level(ZLOG_DEST_SYSLOG, MIN(gs.loglevel, LOG_DEBUG));
d62a17ae 1156 if (daemon(0, 0) < 0) {
2f4f11fa 1157 fprintf(stderr, "Watchfrr daemon failed: %s",
d62a17ae 1158 strerror(errno));
1159 exit(1);
4f04a76b
DL
1160 }
1161 } else
dd8376fe 1162 zlog_set_level(ZLOG_DEST_STDOUT, MIN(gs.loglevel, LOG_DEBUG));
8b886ca7 1163
a6810074 1164 watchfrr_vty_init();
8b886ca7 1165
eb05883f 1166 frr_vty_serv();
8b886ca7 1167
8b886ca7 1168 {
a6810074
DL
1169 int i;
1170 struct daemon *tail = NULL;
1171
1172 for (i = optind; i < argc; i++) {
1173 struct daemon *dmn;
1174
1175 if (!(dmn = (struct daemon *)calloc(1, sizeof(*dmn)))) {
1176 fprintf(stderr, "calloc(1,%u) failed: %s\n",
d7c0a89a 1177 (unsigned int)sizeof(*dmn),
a6810074
DL
1178 safe_strerror(errno));
1179 return 1;
1180 }
1181 dmn->name = dmn->restart.name = argv[i];
1182 dmn->state = DAEMON_INIT;
1183 gs.numdaemons++;
1184 gs.numdown++;
1185 dmn->fd = -1;
66e78ae6 1186 dmn->t_wakeup = NULL;
d62a17ae 1187 thread_add_timer_msec(master, wakeup_init, dmn,
1188 100 + (random() % 900),
66e78ae6 1189 &dmn->t_wakeup);
a6810074
DL
1190 dmn->restart.interval = gs.min_restart_interval;
1191 if (tail)
1192 tail->next = dmn;
1193 else
1194 gs.daemons = dmn;
1195 tail = dmn;
1196
f168b713 1197 if (!strcmp(dmn->name, special))
a6810074
DL
1198 gs.special = dmn;
1199 }
1200 }
1201 if (!gs.daemons) {
1202 fputs("Must specify one or more daemons to monitor.\n", stderr);
4f04a76b 1203 frr_help_exit(1);
a6810074 1204 }
f168b713
DL
1205 if (!watch_only && !gs.special) {
1206 fprintf(stderr, "\"%s\" daemon must be in daemon list\n",
1207 special);
4f04a76b 1208 frr_help_exit(1);
8b886ca7 1209 }
8b886ca7 1210
a6810074
DL
1211 /* Make sure we're not already running. */
1212 pid_output(pidfile);
1213
1214 /* Announce which daemons are being monitored. */
1215 {
1216 struct daemon *dmn;
1217 size_t len = 0;
1218
1219 for (dmn = gs.daemons; dmn; dmn = dmn->next)
1220 len += strlen(dmn->name) + 1;
1221
1222 {
1223 char buf[len + 1];
1224 char *p = buf;
1225
1226 for (dmn = gs.daemons; dmn; dmn = dmn->next) {
1227 if (p != buf)
1228 *p++ = ' ';
1229 strcpy(p, dmn->name);
1230 p += strlen(p);
1231 }
f168b713
DL
1232 zlog_notice("%s %s watching [%s]%s", progname,
1233 FRR_VERSION, buf,
1234 watch_only ? ", monitor mode" : "");
a6810074
DL
1235 }
1236 }
8b886ca7 1237
a6810074
DL
1238 {
1239 struct thread thread;
1240
1241 while (thread_fetch(master, &thread))
1242 thread_call(&thread);
1243 }
8b886ca7 1244
a6810074
DL
1245 systemd_send_stopping();
1246 /* Not reached. */
1247 return 0;
8b886ca7 1248}