]> git.proxmox.com Git - mirror_frr.git/blame - watchquagga/watchquagga.c
Merge branch 'cmaster' of ssh://stash.cumulusnetworks.com:7999/quag/quagga into cmaster
[mirror_frr.git] / watchquagga / watchquagga.c
CommitLineData
8b886ca7 1/*
8b886ca7 2 Monitor status of quagga daemons and restart if necessary.
3
4 Copyright (C) 2004 Andrew J. Schorr
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 */
20
a365534f 21#include <zebra.h>
8b886ca7 22#include <thread.h>
23#include <log.h>
52e66296 24#include <network.h>
8b886ca7 25#include <sigevent.h>
a365534f 26#include <lib/version.h>
6f594023 27#include <getopt.h>
a365534f 28#include <sys/un.h>
29#include <sys/wait.h>
837d16cc 30#include <memory.h>
8b886ca7 31
32#ifndef MIN
33#define MIN(X,Y) (((X) <= (Y)) ? (X) : (Y))
34#endif
35
36/* Macros to help randomize timers. */
37#define JITTER(X) ((random() % ((X)+1))-((X)/2))
38#define FUZZY(X) ((X)+JITTER((X)/20))
39
40#define DEFAULT_PERIOD 5
41#define DEFAULT_TIMEOUT 10
42#define DEFAULT_RESTART_TIMEOUT 20
43#define DEFAULT_LOGLEVEL LOG_INFO
44#define DEFAULT_MIN_RESTART 60
45#define DEFAULT_MAX_RESTART 600
6028df52 46#ifdef PATH_WATCHQUAGGA_PID
47#define DEFAULT_PIDFILE PATH_WATCHQUAGGA_PID
48#else
8b886ca7 49#define DEFAULT_PIDFILE STATEDIR "/watchquagga.pid"
6028df52 50#endif
16f6511e 51#ifdef DAEMON_VTY_DIR
52#define VTYDIR DAEMON_VTY_DIR
53#else
54#define VTYDIR STATEDIR
55#endif
8b886ca7 56
57#define PING_TOKEN "PING"
58
59/* Needs to be global, referenced somewhere inside libzebra. */
60struct thread_master *master;
61
62typedef enum
63{
64 MODE_MONITOR = 0,
65 MODE_GLOBAL_RESTART,
66 MODE_SEPARATE_RESTART,
67 MODE_PHASED_ZEBRA_RESTART,
68 MODE_PHASED_ALL_RESTART
69} watch_mode_t;
70
71static const char *mode_str[] =
72{
73 "monitor",
74 "global restart",
75 "individual daemon restart",
76 "phased zebra restart",
77 "phased global restart for any failure",
78};
79
80typedef enum
81{
82 PHASE_NONE = 0,
83 PHASE_STOPS_PENDING,
84 PHASE_WAITING_DOWN,
85 PHASE_ZEBRA_RESTART_PENDING,
86 PHASE_WAITING_ZEBRA_UP
87} restart_phase_t;
88
89static const char *phase_str[] =
90{
91 "None",
92 "Stop jobs running",
93 "Waiting for other daemons to come down",
94 "Zebra restart job running",
95 "Waiting for zebra to come up",
96 "Start jobs running",
97};
98
99#define PHASE_TIMEOUT (3*gs.restart_timeout)
100
098e240f 101struct restart_info
102{
103 const char *name;
104 const char *what;
105 pid_t pid;
106 struct timeval time;
107 long interval;
108 struct thread *t_kill;
109 int kills;
110};
111
112static struct global_state
113{
8b886ca7 114 watch_mode_t mode;
115 restart_phase_t phase;
116 struct thread *t_phase_hanging;
117 const char *vtydir;
118 long period;
119 long timeout;
120 long restart_timeout;
121 long min_restart_interval;
122 long max_restart_interval;
123 int do_ping;
124 struct daemon *daemons;
125 const char *restart_command;
126 const char *start_command;
127 const char *stop_command;
098e240f 128 struct restart_info restart;
8b886ca7 129 int unresponsive_restart;
130 int loglevel;
131 struct daemon *special; /* points to zebra when doing phased restart */
132 int numdaemons;
133 int numpids;
134 int numdown; /* # of daemons that are not UP or UNRESPONSIVE */
135} gs = {
136 .mode = MODE_MONITOR,
137 .phase = PHASE_NONE,
16f6511e 138 .vtydir = VTYDIR,
8b886ca7 139 .period = 1000*DEFAULT_PERIOD,
140 .timeout = DEFAULT_TIMEOUT,
141 .restart_timeout = DEFAULT_RESTART_TIMEOUT,
142 .loglevel = DEFAULT_LOGLEVEL,
143 .min_restart_interval = DEFAULT_MIN_RESTART,
144 .max_restart_interval = DEFAULT_MAX_RESTART,
145 .do_ping = 1,
8b886ca7 146};
147
148typedef enum
149{
150 DAEMON_INIT,
151 DAEMON_DOWN,
152 DAEMON_CONNECTING,
153 DAEMON_UP,
154 DAEMON_UNRESPONSIVE
155} daemon_state_t;
156
157#define IS_UP(DMN) \
158 (((DMN)->state == DAEMON_UP) || ((DMN)->state == DAEMON_UNRESPONSIVE))
159
160static const char *state_str[] =
161{
162 "Init",
163 "Down",
164 "Connecting",
165 "Up",
166 "Unresponsive",
167};
168
169struct daemon {
170 const char *name;
171 daemon_state_t state;
172 int fd;
173 struct timeval echo_sent;
174 u_int connect_tries;
175 struct thread *t_wakeup;
176 struct thread *t_read;
177 struct thread *t_write;
178 struct daemon *next;
179 struct restart_info restart;
180};
181
182static const struct option longopts[] =
183{
184 { "daemon", no_argument, NULL, 'd'},
185 { "statedir", required_argument, NULL, 'S'},
186 { "no-echo", no_argument, NULL, 'e'},
187 { "loglevel", required_argument, NULL, 'l'},
188 { "interval", required_argument, NULL, 'i'},
189 { "timeout", required_argument, NULL, 't'},
190 { "restart-timeout", required_argument, NULL, 'T'},
191 { "restart", required_argument, NULL, 'r'},
192 { "start-command", required_argument, NULL, 's'},
193 { "kill-command", required_argument, NULL, 'k'},
194 { "restart-all", required_argument, NULL, 'R'},
195 { "all-restart", no_argument, NULL, 'a'},
196 { "always-all-restart", no_argument, NULL, 'A'},
197 { "unresponsive-restart", no_argument, NULL, 'z'},
198 { "min-restart-interval", required_argument, NULL, 'm'},
199 { "max-restart-interval", required_argument, NULL, 'M'},
200 { "pid-file", required_argument, NULL, 'p'},
c8b40f86 201 { "blank-string", required_argument, NULL, 'b'},
8b886ca7 202 { "help", no_argument, NULL, 'h'},
203 { "version", no_argument, NULL, 'v'},
204 { NULL, 0, NULL, 0 }
205};
206
207static int try_connect(struct daemon *dmn);
208static int wakeup_send_echo(struct thread *t_wakeup);
209static void try_restart(struct daemon *dmn);
210static void phase_check(void);
211
212static int
213usage(const char *progname, int status)
214{
215 if (status != 0)
216 fprintf(stderr, "Try `%s --help' for more information.\n", progname);
217 else
218 printf("Usage : %s [OPTION...] <daemon name> ...\n\n\
219Watchdog program to monitor status of quagga daemons and try to restart\n\
220them if they are down or unresponsive. It determines whether a daemon is\n\
221up based on whether it can connect to the daemon's vty unix stream socket.\n\
222It then repeatedly sends echo commands over that socket to determine whether\n\
223the daemon is responsive. If the daemon crashes, we will receive an EOF\n\
224on the socket connection and know immediately that the daemon is down.\n\n\
225The daemons to be monitored should be listed on the command line.\n\n\
226This program can run in one of 5 modes:\n\n\
2270. Mode: %s.\n\
228 Just monitor and report on status changes. Example:\n\
229 %s -d zebra ospfd bgpd\n\n\
2301. Mode: %s.\n\
231 Whenever any daemon hangs or crashes, use the given command to restart\n\
232 them all. Example:\n\
233 %s -dz \\\n\
234 -R '/sbin/service zebra restart; /sbin/service ospfd restart' \\\n\
235 zebra ospfd\n\n\
2362. Mode: %s.\n\
237 When any single daemon hangs or crashes, restart only the daemon that's\n\
238 in trouble using the supplied restart command. Example:\n\
239 %s -dz -r '/sbin/service %%s restart' zebra ospfd bgpd\n\n\
2403. Mode: %s.\n\
241 The same as the previous mode, except that there is special treatment when\n\
242 the zebra daemon is in trouble. In that case, a phased restart approach\n\
243 is used: 1. stop all other daemons; 2. restart zebra; 3. start the other\n\
244 daemons. Example:\n\
245 %s -adz -r '/sbin/service %%s restart' \\\n\
246 -s '/sbin/service %%s start' \\\n\
247 -k '/sbin/service %%s stop' zebra ospfd bgpd\n\n\
2484. Mode: %s.\n\
249 This is the same as the previous mode, except that the phased restart\n\
250 procedure is used whenever any of the daemons hangs or crashes. Example:\n\
251 %s -Adz -r '/sbin/service %%s restart' \\\n\
252 -s '/sbin/service %%s start' \\\n\
253 -k '/sbin/service %%s stop' zebra ospfd bgpd\n\n\
254As of this writing, it is believed that mode 2 [%s]\n\
255is not safe, and mode 3 [%s] may not be safe with some of the\n\
256routing daemons.\n\n\
257In order to avoid attempting to restart the daemons in a fast loop,\n\
258the -m and -M options allow you to control the minimum delay between\n\
259restart commands. The minimum restart delay is recalculated each time\n\
260a restart is attempted: if the time since the last restart attempt exceeds\n\
261twice the -M value, then the restart delay is set to the -m value.\n\
262Otherwise, the interval is doubled (but capped at the -M value).\n\n\
263Options:\n\
264-d, --daemon Run in daemon mode. In this mode, error messages are sent\n\
265 to syslog instead of stdout.\n\
266-S, --statedir Set the vty socket directory (default is %s)\n\
267-e, --no-echo Do not ping the daemons to test responsiveness (this\n\
268 option is necessary if the daemons do not support the\n\
269 echo command)\n\
270-l, --loglevel Set the logging level (default is %d).\n\
271 The value should range from %d (LOG_EMERG) to %d (LOG_DEBUG),\n\
272 but it can be set higher than %d if extra-verbose debugging\n\
273 messages are desired.\n\
274-m, --min-restart-interval\n\
275 Set the minimum seconds to wait between invocations of daemon\n\
276 restart commands (default is %d).\n\
277-M, --max-restart-interval\n\
278 Set the maximum seconds to wait between invocations of daemon\n\
279 restart commands (default is %d).\n\
280-i, --interval Set the status polling interval in seconds (default is %d)\n\
281-t, --timeout Set the unresponsiveness timeout in seconds (default is %d)\n\
282-T, --restart-timeout\n\
283 Set the restart (kill) timeout in seconds (default is %d).\n\
284 If any background jobs are still running after this much\n\
285 time has elapsed, they will be killed.\n\
286-r, --restart Supply a Bourne shell command to use to restart a single\n\
287 daemon. The command string should include '%%s' where the\n\
288 name of the daemon should be substituted.\n\
289 Note that -r and -R are incompatible.\n\
290-s, --start-command\n\
291 Supply a Bourne shell to command to use to start a single\n\
292 daemon. The command string should include '%%s' where the\n\
293 name of the daemon should be substituted.\n\
294-k, --kill-command\n\
295 Supply a Bourne shell to command to use to stop a single\n\
296 daemon. The command string should include '%%s' where the\n\
297 name of the daemon should be substituted.\n\
298-R, --restart-all\n\
299 When one or more daemons is down, try to restart everything\n\
300 using the Bourne shell command supplied as the argument.\n\
301 Note that -r and -R are incompatible.\n\
302-z, --unresponsive-restart\n\
303 When a daemon is unresponsive, treat it as being down for\n\
304 restart purposes.\n\
305-a, --all-restart\n\
306 When zebra hangs or crashes, restart all daemons using\n\
307 this phased approach: 1. stop all other daemons; 2. restart\n\
308 zebra; 3. start other daemons. Requires -r, -s, and -k.\n\
309-A, --always-all-restart\n\
310 When any daemon (not just zebra) hangs or crashes, use the\n\
311 same phased restart mechanism described above for -a.\n\
312 Requires -r, -s, and -k.\n\
313-p, --pid-file Set process identifier file name\n\
314 (default is %s).\n\
c8b40f86 315-b, --blank-string\n\
316 When the supplied argument string is found in any of the\n\
317 various shell command arguments (-r, -s, -k, or -R), replace\n\
318 it with a space. This is an ugly hack to circumvent problems\n\
319 passing command-line arguments with embedded spaces.\n\
8b886ca7 320-v, --version Print program version\n\
321-h, --help Display this help and exit\n\
322", progname,mode_str[0],progname,mode_str[1],progname,mode_str[2],
323progname,mode_str[3],progname,mode_str[4],progname,mode_str[2],mode_str[3],
16f6511e 324VTYDIR,DEFAULT_LOGLEVEL,LOG_EMERG,LOG_DEBUG,LOG_DEBUG,
8b886ca7 325DEFAULT_MIN_RESTART,DEFAULT_MAX_RESTART,
326DEFAULT_PERIOD,DEFAULT_TIMEOUT,DEFAULT_RESTART_TIMEOUT,DEFAULT_PIDFILE);
327
328 return status;
329}
330
331static pid_t
88177fe3 332run_background(char *shell_cmd)
8b886ca7 333{
334 pid_t child;
335
336 switch (child = fork())
337 {
338 case -1:
339 zlog_err("fork failed, cannot run command [%s]: %s",
340 shell_cmd,safe_strerror(errno));
341 return -1;
342 case 0:
343 /* Child process. */
344 /* Use separate process group so child processes can be killed easily. */
345 if (setpgid(0,0) < 0)
346 zlog_warn("warning: setpgid(0,0) failed: %s",safe_strerror(errno));
347 {
88177fe3
DS
348 char shell[] = "sh";
349 char dashc[] = "-c";
350 char * const argv[4] = { shell, dashc, shell_cmd, NULL};
351 execv("/bin/sh", argv);
8b886ca7 352 zlog_err("execv(/bin/sh -c '%s') failed: %s",
353 shell_cmd,safe_strerror(errno));
354 _exit(127);
355 }
356 default:
357 /* Parent process: we will reap the child later. */
f2d8257f 358 zlog_err("Forked background command [pid %d]: %s",(int)child,shell_cmd);
8b886ca7 359 return child;
360 }
361}
362
363static struct timeval *
364time_elapsed(struct timeval *result, const struct timeval *start_time)
365{
366 gettimeofday(result,NULL);
367 result->tv_sec -= start_time->tv_sec;
368 result->tv_usec -= start_time->tv_usec;
369 while (result->tv_usec < 0)
370 {
371 result->tv_usec += 1000000L;
372 result->tv_sec--;
373 }
374 return result;
375}
376
377static int
378restart_kill(struct thread *t_kill)
379{
380 struct restart_info *restart = THREAD_ARG(t_kill);
381 struct timeval delay;
382
383 time_elapsed(&delay,&restart->time);
384 zlog_warn("Warning: %s %s child process %d still running after "
385 "%ld seconds, sending signal %d",
f2d8257f 386 restart->what,restart->name,(int)restart->pid,delay.tv_sec,
8b886ca7 387 (restart->kills ? SIGKILL : SIGTERM));
388 kill(-restart->pid,(restart->kills ? SIGKILL : SIGTERM));
389 restart->kills++;
390 restart->t_kill = thread_add_timer(master,restart_kill,restart,
391 gs.restart_timeout);
392 return 0;
393}
394
395static struct restart_info *
396find_child(pid_t child)
397{
398 if (gs.mode == MODE_GLOBAL_RESTART)
399 {
400 if (gs.restart.pid == child)
401 return &gs.restart;
402 }
403 else
404 {
405 struct daemon *dmn;
406 for (dmn = gs.daemons; dmn; dmn = dmn->next)
407 {
408 if (dmn->restart.pid == child)
409 return &dmn->restart;
410 }
411 }
412 return NULL;
413}
414
415static void
416sigchild(void)
417{
418 pid_t child;
419 int status;
420 const char *name;
421 const char *what;
422 struct restart_info *restart;
423
424 switch (child = waitpid(-1,&status,WNOHANG))
425 {
426 case -1:
427 zlog_err("waitpid failed: %s",safe_strerror(errno));
428 return;
429 case 0:
430 zlog_warn("SIGCHLD received, but waitpid did not reap a child");
431 return;
432 }
433
434 if ((restart = find_child(child)) != NULL)
435 {
436 name = restart->name;
437 what = restart->what;
438 restart->pid = 0;
439 gs.numpids--;
440 thread_cancel(restart->t_kill);
441 restart->t_kill = NULL;
442 /* Update restart time to reflect the time the command completed. */
443 gettimeofday(&restart->time,NULL);
444 }
445 else
446 {
447 zlog_err("waitpid returned status for an unknown child process %d",
f2d8257f 448 (int)child);
8b886ca7 449 name = "(unknown)";
450 what = "background";
451 }
452 if (WIFSTOPPED(status))
453 zlog_warn("warning: %s %s process %d is stopped",
f2d8257f 454 what,name,(int)child);
8b886ca7 455 else if (WIFSIGNALED(status))
456 zlog_warn("%s %s process %d terminated due to signal %d",
f2d8257f 457 what,name,(int)child,WTERMSIG(status));
8b886ca7 458 else if (WIFEXITED(status))
459 {
460 if (WEXITSTATUS(status) != 0)
461 zlog_warn("%s %s process %d exited with non-zero status %d",
f2d8257f 462 what,name,(int)child,WEXITSTATUS(status));
8b886ca7 463 else
f2d8257f 464 zlog_debug("%s %s process %d exited normally",what,name,(int)child);
8b886ca7 465 }
466 else
467 zlog_err("cannot interpret %s %s process %d wait status 0x%x",
f2d8257f 468 what,name,(int)child,status);
8b886ca7 469 phase_check();
470}
471
472static int
473run_job(struct restart_info *restart, const char *cmdtype, const char *command,
474 int force, int update_interval)
475{
476 struct timeval delay;
477
478 if (gs.loglevel > LOG_DEBUG+1)
479 zlog_debug("attempting to %s %s",cmdtype,restart->name);
480
481 if (restart->pid)
482 {
483 if (gs.loglevel > LOG_DEBUG+1)
484 zlog_debug("cannot %s %s, previous pid %d still running",
f2d8257f 485 cmdtype,restart->name,(int)restart->pid);
8b886ca7 486 return -1;
487 }
488
a8a8ddcd 489 /* Note: time_elapsed test must come before the force test, since we need
490 to make sure that delay is initialized for use below in updating the
491 restart interval. */
492 if ((time_elapsed(&delay,&restart->time)->tv_sec < restart->interval) &&
493 !force)
8b886ca7 494 {
495 if (gs.loglevel > LOG_DEBUG+1)
496 zlog_debug("postponing %s %s: "
497 "elapsed time %ld < retry interval %ld",
498 cmdtype,restart->name,(long)delay.tv_sec,restart->interval);
499 return -1;
500 }
501
502 gettimeofday(&restart->time,NULL);
503 restart->kills = 0;
504 {
505 char cmd[strlen(command)+strlen(restart->name)+1];
506 snprintf(cmd,sizeof(cmd),command,restart->name);
507 if ((restart->pid = run_background(cmd)) > 0)
508 {
509 restart->t_kill = thread_add_timer(master,restart_kill,restart,
510 gs.restart_timeout);
511 restart->what = cmdtype;
512 gs.numpids++;
513 }
514 else
515 restart->pid = 0;
516 }
517
518 /* Calculate the new restart interval. */
519 if (update_interval)
520 {
521 if (delay.tv_sec > 2*gs.max_restart_interval)
522 restart->interval = gs.min_restart_interval;
523 else if ((restart->interval *= 2) > gs.max_restart_interval)
524 restart->interval = gs.max_restart_interval;
525 if (gs.loglevel > LOG_DEBUG+1)
526 zlog_debug("restart %s interval is now %ld",
527 restart->name,restart->interval);
528 }
529 return restart->pid;
530}
531
532#define SET_READ_HANDLER(DMN) \
533 (DMN)->t_read = thread_add_read(master,handle_read,(DMN),(DMN)->fd)
534
535#define SET_WAKEUP_DOWN(DMN) \
536 (DMN)->t_wakeup = thread_add_timer_msec(master,wakeup_down,(DMN), \
537 FUZZY(gs.period))
538
539#define SET_WAKEUP_UNRESPONSIVE(DMN) \
540 (DMN)->t_wakeup = thread_add_timer_msec(master,wakeup_unresponsive,(DMN), \
541 FUZZY(gs.period))
542
543#define SET_WAKEUP_ECHO(DMN) \
544 (DMN)->t_wakeup = thread_add_timer_msec(master,wakeup_send_echo,(DMN), \
545 FUZZY(gs.period))
546
547static int
548wakeup_down(struct thread *t_wakeup)
549{
550 struct daemon *dmn = THREAD_ARG(t_wakeup);
551
552 dmn->t_wakeup = NULL;
553 if (try_connect(dmn) < 0)
554 SET_WAKEUP_DOWN(dmn);
555 if ((dmn->connect_tries > 1) && (dmn->state != DAEMON_UP))
556 try_restart(dmn);
557 return 0;
558}
559
560static int
561wakeup_init(struct thread *t_wakeup)
562{
563 struct daemon *dmn = THREAD_ARG(t_wakeup);
564
565 dmn->t_wakeup = NULL;
566 if (try_connect(dmn) < 0)
567 {
568 SET_WAKEUP_DOWN(dmn);
569 zlog_err("%s state -> down : initial connection attempt failed",
570 dmn->name);
571 dmn->state = DAEMON_DOWN;
572 }
573 return 0;
574}
575
576static void
577daemon_down(struct daemon *dmn, const char *why)
578{
579 if (IS_UP(dmn) || (dmn->state == DAEMON_INIT))
580 zlog_err("%s state -> down : %s",dmn->name,why);
581 else if (gs.loglevel > LOG_DEBUG)
582 zlog_debug("%s still down : %s",dmn->name,why);
583 if (IS_UP(dmn))
584 gs.numdown++;
585 dmn->state = DAEMON_DOWN;
586 if (dmn->fd >= 0)
587 {
588 close(dmn->fd);
589 dmn->fd = -1;
590 }
591 THREAD_OFF(dmn->t_read);
592 THREAD_OFF(dmn->t_write);
593 THREAD_OFF(dmn->t_wakeup);
594 if (try_connect(dmn) < 0)
595 SET_WAKEUP_DOWN(dmn);
596 phase_check();
597}
598
599static int
600handle_read(struct thread *t_read)
601{
602 struct daemon *dmn = THREAD_ARG(t_read);
603 static const char resp[sizeof(PING_TOKEN)+4] = PING_TOKEN "\n";
604 char buf[sizeof(resp)+100];
605 ssize_t rc;
606 struct timeval delay;
607
608 dmn->t_read = NULL;
609 if ((rc = read(dmn->fd,buf,sizeof(buf))) < 0)
610 {
611 char why[100];
612
518cde8d 613 if (ERRNO_IO_RETRY(errno))
8b886ca7 614 {
615 /* Pretend it never happened. */
616 SET_READ_HANDLER(dmn);
617 return 0;
618 }
619 snprintf(why,sizeof(why),"unexpected read error: %s",
620 safe_strerror(errno));
621 daemon_down(dmn,why);
622 return 0;
623 }
624 if (rc == 0)
625 {
626 daemon_down(dmn,"read returned EOF");
627 return 0;
628 }
629 if (!dmn->echo_sent.tv_sec)
630 {
631 char why[sizeof(buf)+100];
098e240f 632 snprintf(why,sizeof(why),"unexpected read returns %d bytes: %.*s",
633 (int)rc,(int)rc,buf);
8b886ca7 634 daemon_down(dmn,why);
635 return 0;
636 }
637
638 /* We are expecting an echo response: is there any chance that the
639 response would not be returned entirely in the first read? That
640 seems inconceivable... */
641 if ((rc != sizeof(resp)) || memcmp(buf,resp,sizeof(resp)))
642 {
643 char why[100+sizeof(buf)];
098e240f 644 snprintf(why,sizeof(why),"read returned bad echo response of %d bytes "
645 "(expecting %u): %.*s",
646 (int)rc,(u_int)sizeof(resp),(int)rc,buf);
8b886ca7 647 daemon_down(dmn,why);
648 return 0;
649 }
650
651 time_elapsed(&delay,&dmn->echo_sent);
652 dmn->echo_sent.tv_sec = 0;
653 if (dmn->state == DAEMON_UNRESPONSIVE)
654 {
655 if (delay.tv_sec < gs.timeout)
656 {
657 dmn->state = DAEMON_UP;
658 zlog_warn("%s state -> up : echo response received after %ld.%06ld "
659 "seconds", dmn->name,delay.tv_sec,delay.tv_usec);
660 }
661 else
662 zlog_warn("%s: slow echo response finally received after %ld.%06ld "
663 "seconds", dmn->name,delay.tv_sec,delay.tv_usec);
664 }
665 else if (gs.loglevel > LOG_DEBUG+1)
666 zlog_debug("%s: echo response received after %ld.%06ld seconds",
667 dmn->name,delay.tv_sec,delay.tv_usec);
668
669 SET_READ_HANDLER(dmn);
670 if (dmn->t_wakeup)
671 thread_cancel(dmn->t_wakeup);
672 SET_WAKEUP_ECHO(dmn);
673
674 return 0;
675}
676
677static void
678daemon_up(struct daemon *dmn, const char *why)
679{
680 dmn->state = DAEMON_UP;
681 gs.numdown--;
682 dmn->connect_tries = 0;
683 zlog_notice("%s state -> up : %s",dmn->name,why);
684 if (gs.do_ping)
685 SET_WAKEUP_ECHO(dmn);
686 phase_check();
687}
688
689static int
690check_connect(struct thread *t_write)
691{
692 struct daemon *dmn = THREAD_ARG(t_write);
693 int sockerr;
694 socklen_t reslen = sizeof(sockerr);
695
696 dmn->t_write = NULL;
697 if (getsockopt(dmn->fd,SOL_SOCKET,SO_ERROR,(char *)&sockerr,&reslen) < 0)
698 {
699 zlog_warn("%s: check_connect: getsockopt failed: %s",
700 dmn->name,safe_strerror(errno));
701 daemon_down(dmn,"getsockopt failed checking connection success");
702 return 0;
703 }
704 if ((reslen == sizeof(sockerr)) && sockerr)
705 {
706 char why[100];
707 snprintf(why,sizeof(why),
708 "getsockopt reports that connection attempt failed: %s",
709 safe_strerror(sockerr));
710 daemon_down(dmn,why);
711 return 0;
712 }
713
714 daemon_up(dmn,"delayed connect succeeded");
715 return 0;
716}
717
718static int
719wakeup_connect_hanging(struct thread *t_wakeup)
720{
721 struct daemon *dmn = THREAD_ARG(t_wakeup);
722 char why[100];
723
724 dmn->t_wakeup = NULL;
725 snprintf(why,sizeof(why),"connection attempt timed out after %ld seconds",
726 gs.timeout);
727 daemon_down(dmn,why);
728 return 0;
729}
730
731/* Making connection to protocol daemon. */
732static int
733try_connect(struct daemon *dmn)
734{
735 int sock;
736 struct sockaddr_un addr;
737 socklen_t len;
8b886ca7 738
739 if (gs.loglevel > LOG_DEBUG+1)
740 zlog_debug("%s: attempting to connect",dmn->name);
741 dmn->connect_tries++;
742
743 memset (&addr, 0, sizeof (struct sockaddr_un));
744 addr.sun_family = AF_UNIX;
745 snprintf(addr.sun_path, sizeof(addr.sun_path), "%s/%s.vty",
746 gs.vtydir,dmn->name);
6f0e3f6e 747#ifdef HAVE_STRUCT_SOCKADDR_UN_SUN_LEN
8b886ca7 748 len = addr.sun_len = SUN_LEN(&addr);
749#else
750 len = sizeof (addr.sun_family) + strlen (addr.sun_path);
6f0e3f6e 751#endif /* HAVE_STRUCT_SOCKADDR_UN_SUN_LEN */
8b886ca7 752
753 /* Quick check to see if we might succeed before we go to the trouble
754 of creating a socket. */
755 if (access(addr.sun_path, W_OK) < 0)
756 {
757 if (errno != ENOENT)
758 zlog_err("%s: access to socket %s denied: %s",
759 dmn->name,addr.sun_path,safe_strerror(errno));
760 return -1;
761 }
762
763 if ((sock = socket (AF_UNIX, SOCK_STREAM, 0)) < 0)
764 {
765 zlog_err("%s(%s): cannot make socket: %s",
766 __func__,addr.sun_path, safe_strerror(errno));
767 return -1;
768 }
769
52e66296 770 if (set_nonblocking(sock) < 0)
8b886ca7 771 {
52e66296 772 zlog_err("%s(%s): set_nonblocking(%d) failed",
773 __func__, addr.sun_path, sock);
8b886ca7 774 close(sock);
775 return -1;
776 }
777
778 if (connect (sock, (struct sockaddr *) &addr, len) < 0)
779 {
780 if ((errno != EINPROGRESS) && (errno != EWOULDBLOCK))
781 {
782 if (gs.loglevel > LOG_DEBUG)
783 zlog_debug("%s(%s): connect failed: %s",
784 __func__,addr.sun_path, safe_strerror(errno));
785 close (sock);
786 return -1;
787 }
788 if (gs.loglevel > LOG_DEBUG)
789 zlog_debug("%s: connection in progress",dmn->name);
790 dmn->state = DAEMON_CONNECTING;
791 dmn->fd = sock;
792 dmn->t_write = thread_add_write(master,check_connect,dmn,dmn->fd);
793 dmn->t_wakeup = thread_add_timer(master,wakeup_connect_hanging,dmn,
794 gs.timeout);
795 SET_READ_HANDLER(dmn);
796 return 0;
797 }
798
799 dmn->fd = sock;
800 SET_READ_HANDLER(dmn);
801 daemon_up(dmn,"connect succeeded");
802 return 1;
803}
804
805static int
806phase_hanging(struct thread *t_hanging)
807{
808 gs.t_phase_hanging = NULL;
809 zlog_err("Phase [%s] hanging for %ld seconds, aborting phased restart",
810 phase_str[gs.phase],PHASE_TIMEOUT);
811 gs.phase = PHASE_NONE;
812 return 0;
813}
814
815static void
816set_phase(restart_phase_t new_phase)
817{
818 gs.phase = new_phase;
819 if (gs.t_phase_hanging)
820 thread_cancel(gs.t_phase_hanging);
821 gs.t_phase_hanging = thread_add_timer(master,phase_hanging,NULL,
822 PHASE_TIMEOUT);
823}
824
825static void
826phase_check(void)
827{
828 switch (gs.phase)
829 {
830 case PHASE_NONE:
831 break;
832 case PHASE_STOPS_PENDING:
833 if (gs.numpids)
834 break;
835 zlog_info("Phased restart: all routing daemon stop jobs have completed.");
836 set_phase(PHASE_WAITING_DOWN);
837 /*FALLTHRU*/
838 case PHASE_WAITING_DOWN:
839 if (gs.numdown+IS_UP(gs.special) < gs.numdaemons)
840 break;
841 zlog_info("Phased restart: all routing daemons now down.");
842 run_job(&gs.special->restart,"restart",gs.restart_command,1,1);
843 set_phase(PHASE_ZEBRA_RESTART_PENDING);
844 /*FALLTHRU*/
845 case PHASE_ZEBRA_RESTART_PENDING:
846 if (gs.special->restart.pid)
847 break;
848 zlog_info("Phased restart: %s restart job completed.",gs.special->name);
849 set_phase(PHASE_WAITING_ZEBRA_UP);
850 /*FALLTHRU*/
851 case PHASE_WAITING_ZEBRA_UP:
852 if (!IS_UP(gs.special))
853 break;
854 zlog_info("Phased restart: %s is now up.",gs.special->name);
855 {
856 struct daemon *dmn;
857 for (dmn = gs.daemons; dmn; dmn = dmn->next)
858 {
859 if (dmn != gs.special)
a8a8ddcd 860 run_job(&dmn->restart,"start",gs.start_command,1,0);
8b886ca7 861 }
862 }
863 gs.phase = PHASE_NONE;
864 THREAD_OFF(gs.t_phase_hanging);
865 zlog_notice("Phased global restart has completed.");
866 break;
867 }
868}
869
870static void
871try_restart(struct daemon *dmn)
872{
873 switch (gs.mode)
874 {
875 case MODE_MONITOR:
876 return;
877 case MODE_GLOBAL_RESTART:
878 run_job(&gs.restart,"restart",gs.restart_command,0,1);
879 break;
880 case MODE_SEPARATE_RESTART:
881 run_job(&dmn->restart,"restart",gs.restart_command,0,1);
882 break;
883 case MODE_PHASED_ZEBRA_RESTART:
884 if (dmn != gs.special)
885 {
886 if ((gs.special->state == DAEMON_UP) && (gs.phase == PHASE_NONE))
887 run_job(&dmn->restart,"restart",gs.restart_command,0,1);
888 else
889 zlog_debug("%s: postponing restart attempt because master %s daemon "
890 "not up [%s], or phased restart in progress",
891 dmn->name,gs.special->name,state_str[gs.special->state]);
892 break;
893 }
894 /*FALLTHRU*/
895 case MODE_PHASED_ALL_RESTART:
896 if ((gs.phase != PHASE_NONE) || gs.numpids)
897 {
898 if (gs.loglevel > LOG_DEBUG+1)
899 zlog_debug("postponing phased global restart: restart already in "
900 "progress [%s], or outstanding child processes [%d]",
901 phase_str[gs.phase],gs.numpids);
902 break;
903 }
904 /* Is it too soon for a restart? */
905 {
906 struct timeval delay;
907 if (time_elapsed(&delay,&gs.special->restart.time)->tv_sec <
908 gs.special->restart.interval)
909 {
910 if (gs.loglevel > LOG_DEBUG+1)
911 zlog_debug("postponing phased global restart: "
912 "elapsed time %ld < retry interval %ld",
913 (long)delay.tv_sec,gs.special->restart.interval);
914 break;
915 }
916 }
71e7cd63 917 run_job(&gs.restart,"restart",gs.restart_command,0,1);
8b886ca7 918 break;
919 default:
920 zlog_err("error: unknown restart mode %d",gs.mode);
921 break;
922 }
923}
924
925static int
926wakeup_unresponsive(struct thread *t_wakeup)
927{
928 struct daemon *dmn = THREAD_ARG(t_wakeup);
929
930 dmn->t_wakeup = NULL;
931 if (dmn->state != DAEMON_UNRESPONSIVE)
932 zlog_err("%s: no longer unresponsive (now %s), "
933 "wakeup should have been cancelled!",
934 dmn->name,state_str[dmn->state]);
935 else
936 {
937 SET_WAKEUP_UNRESPONSIVE(dmn);
938 try_restart(dmn);
939 }
940 return 0;
941}
942
943static int
944wakeup_no_answer(struct thread *t_wakeup)
945{
946 struct daemon *dmn = THREAD_ARG(t_wakeup);
947
948 dmn->t_wakeup = NULL;
949 dmn->state = DAEMON_UNRESPONSIVE;
950 zlog_err("%s state -> unresponsive : no response yet to ping "
951 "sent %ld seconds ago",dmn->name,gs.timeout);
952 if (gs.unresponsive_restart)
953 {
954 SET_WAKEUP_UNRESPONSIVE(dmn);
955 try_restart(dmn);
956 }
957 return 0;
958}
959
960static int
961wakeup_send_echo(struct thread *t_wakeup)
962{
963 static const char echocmd[] = "echo " PING_TOKEN;
964 ssize_t rc;
965 struct daemon *dmn = THREAD_ARG(t_wakeup);
966
967 dmn->t_wakeup = NULL;
968 if (((rc = write(dmn->fd,echocmd,sizeof(echocmd))) < 0) ||
969 ((size_t)rc != sizeof(echocmd)))
970 {
971 char why[100+sizeof(echocmd)];
098e240f 972 snprintf(why,sizeof(why),"write '%s' returned %d instead of %u",
973 echocmd,(int)rc,(u_int)sizeof(echocmd));
8b886ca7 974 daemon_down(dmn,why);
975 }
976 else
977 {
978 gettimeofday(&dmn->echo_sent,NULL);
979 dmn->t_wakeup = thread_add_timer(master,wakeup_no_answer,dmn,gs.timeout);
980 }
981 return 0;
982}
983
984static void
985sigint(void)
986{
987 zlog_notice("Terminating on signal");
988 exit(0);
989}
990
991static int
992valid_command(const char *cmd)
993{
994 char *p;
995
996 return ((p = strchr(cmd,'%')) != NULL) && (*(p+1) == 's') && !strchr(p+1,'%');
997}
998
c8b40f86 999/* This is an ugly hack to circumvent problems with passing command-line
1000 arguments that contain spaces. The fix is to use a configuration file. */
1001static char *
1002translate_blanks(const char *cmd, const char *blankstr)
1003{
1004 char *res;
1005 char *p;
1006 size_t bslen = strlen(blankstr);
1007
1008 if (!(res = strdup(cmd)))
1009 {
1010 perror("strdup");
1011 exit(1);
1012 }
1013 while ((p = strstr(res,blankstr)) != NULL)
1014 {
1015 *p = ' ';
1016 if (bslen != 1)
1017 memmove(p+1,p+bslen,strlen(p+bslen)+1);
1018 }
1019 return res;
1020}
1021
8b886ca7 1022int
1023main(int argc, char **argv)
1024{
1025 const char *progname;
1026 int opt;
1027 int daemon_mode = 0;
1028 const char *pidfile = DEFAULT_PIDFILE;
1029 const char *special = "zebra";
c8b40f86 1030 const char *blankstr = NULL;
8b886ca7 1031 static struct quagga_signal_t my_signals[] =
1032 {
1033 {
1034 .signal = SIGINT,
1035 .handler = sigint,
1036 },
1037 {
1038 .signal = SIGTERM,
1039 .handler = sigint,
1040 },
1041 {
1042 .signal = SIGCHLD,
1043 .handler = sigchild,
1044 },
1045 };
1046
1047 if ((progname = strrchr (argv[0], '/')) != NULL)
1048 progname++;
1049 else
1050 progname = argv[0];
1051
098e240f 1052 gs.restart.name = "all";
c8b40f86 1053 while ((opt = getopt_long(argc, argv, "aAb:dek:l:m:M:i:p:r:R:S:s:t:T:zvh",
8b886ca7 1054 longopts, 0)) != EOF)
1055 {
1056 switch (opt)
1057 {
1058 case 0:
1059 break;
1060 case 'a':
1061 if ((gs.mode != MODE_MONITOR) && (gs.mode != MODE_SEPARATE_RESTART))
1062 {
1063 fputs("Ambiguous operating mode selected.\n",stderr);
1064 return usage(progname,1);
1065 }
1066 gs.mode = MODE_PHASED_ZEBRA_RESTART;
1067 break;
1068 case 'A':
1069 if ((gs.mode != MODE_MONITOR) && (gs.mode != MODE_SEPARATE_RESTART))
1070 {
1071 fputs("Ambiguous operating mode selected.\n",stderr);
1072 return usage(progname,1);
1073 }
1074 gs.mode = MODE_PHASED_ALL_RESTART;
1075 break;
c8b40f86 1076 case 'b':
1077 blankstr = optarg;
1078 break;
8b886ca7 1079 case 'd':
1080 daemon_mode = 1;
1081 break;
1082 case 'e':
1083 gs.do_ping = 0;
1084 break;
1085 case 'k':
1086 if (!valid_command(optarg))
1087 {
1088 fprintf(stderr,"Invalid kill command, must contain '%%s': %s\n",
1089 optarg);
1090 return usage(progname,1);
1091 }
1092 gs.stop_command = optarg;
1093 break;
1094 case 'l':
1095 {
1096 char garbage[3];
1097 if ((sscanf(optarg,"%d%1s",&gs.loglevel,garbage) != 1) ||
1098 (gs.loglevel < LOG_EMERG))
1099 {
1100 fprintf(stderr,"Invalid loglevel argument: %s\n",optarg);
1101 return usage(progname,1);
1102 }
1103 }
1104 break;
1105 case 'm':
1106 {
1107 char garbage[3];
1108 if ((sscanf(optarg,"%ld%1s",
1109 &gs.min_restart_interval,garbage) != 1) ||
1110 (gs.min_restart_interval < 0))
1111 {
1112 fprintf(stderr,"Invalid min_restart_interval argument: %s\n",
1113 optarg);
1114 return usage(progname,1);
1115 }
1116 }
1117 break;
1118 case 'M':
1119 {
1120 char garbage[3];
1121 if ((sscanf(optarg,"%ld%1s",
1122 &gs.max_restart_interval,garbage) != 1) ||
1123 (gs.max_restart_interval < 0))
1124 {
1125 fprintf(stderr,"Invalid max_restart_interval argument: %s\n",
1126 optarg);
1127 return usage(progname,1);
1128 }
1129 }
1130 break;
1131 case 'i':
1132 {
1133 char garbage[3];
1134 int period;
1135 if ((sscanf(optarg,"%d%1s",&period,garbage) != 1) ||
1136 (gs.period < 1))
1137 {
1138 fprintf(stderr,"Invalid interval argument: %s\n",optarg);
1139 return usage(progname,1);
1140 }
1141 gs.period = 1000*period;
1142 }
1143 break;
1144 case 'p':
1145 pidfile = optarg;
1146 break;
1147 case 'r':
1148 if ((gs.mode == MODE_GLOBAL_RESTART) ||
1149 (gs.mode == MODE_SEPARATE_RESTART))
1150 {
1151 fputs("Ambiguous operating mode selected.\n",stderr);
1152 return usage(progname,1);
1153 }
1154 if (!valid_command(optarg))
1155 {
1156 fprintf(stderr,
1157 "Invalid restart command, must contain '%%s': %s\n",
1158 optarg);
1159 return usage(progname,1);
1160 }
1161 gs.restart_command = optarg;
1162 if (gs.mode == MODE_MONITOR)
1163 gs.mode = MODE_SEPARATE_RESTART;
1164 break;
1165 case 'R':
1166 if (gs.mode != MODE_MONITOR)
1167 {
1168 fputs("Ambiguous operating mode selected.\n",stderr);
1169 return usage(progname,1);
1170 }
1171 if (strchr(optarg,'%'))
1172 {
1173 fprintf(stderr,
1174 "Invalid restart-all arg, must not contain '%%s': %s\n",
1175 optarg);
1176 return usage(progname,1);
1177 }
1178 gs.restart_command = optarg;
1179 gs.mode = MODE_GLOBAL_RESTART;
1180 break;
1181 case 's':
1182 if (!valid_command(optarg))
1183 {
1184 fprintf(stderr,"Invalid start command, must contain '%%s': %s\n",
1185 optarg);
1186 return usage(progname,1);
1187 }
1188 gs.start_command = optarg;
1189 break;
1190 case 'S':
1191 gs.vtydir = optarg;
1192 break;
1193 case 't':
1194 {
1195 char garbage[3];
1196 if ((sscanf(optarg,"%ld%1s",&gs.timeout,garbage) != 1) ||
1197 (gs.timeout < 1))
1198 {
1199 fprintf(stderr,"Invalid timeout argument: %s\n",optarg);
1200 return usage(progname,1);
1201 }
1202 }
1203 break;
1204 case 'T':
1205 {
1206 char garbage[3];
1207 if ((sscanf(optarg,"%ld%1s",&gs.restart_timeout,garbage) != 1) ||
1208 (gs.restart_timeout < 1))
1209 {
1210 fprintf(stderr,"Invalid restart timeout argument: %s\n",optarg);
1211 return usage(progname,1);
1212 }
1213 }
1214 break;
1215 case 'z':
1216 gs.unresponsive_restart = 1;
1217 break;
1218 case 'v':
1219 printf ("%s version %s\n", progname, QUAGGA_VERSION);
1220 puts("Copyright 2004 Andrew J. Schorr");
1221 return 0;
1222 case 'h':
1223 return usage(progname,0);
1224 default:
1225 fputs("Invalid option.\n",stderr);
1226 return usage(progname,1);
1227 }
1228 }
1229
1230 if (gs.unresponsive_restart && (gs.mode == MODE_MONITOR))
1231 {
1232 fputs("Option -z requires a -r or -R restart option.\n",stderr);
1233 return usage(progname,1);
1234 }
1235 switch (gs.mode)
1236 {
1237 case MODE_MONITOR:
1238 if (gs.restart_command || gs.start_command || gs.stop_command)
1239 {
1240 fprintf(stderr,"No kill/(re)start commands needed for %s mode.\n",
1241 mode_str[gs.mode]);
1242 return usage(progname,1);
1243 }
1244 break;
1245 case MODE_GLOBAL_RESTART:
1246 case MODE_SEPARATE_RESTART:
1247 if (!gs.restart_command || gs.start_command || gs.stop_command)
1248 {
1249 fprintf(stderr,"No start/kill commands needed in [%s] mode.\n",
1250 mode_str[gs.mode]);
1251 return usage(progname,1);
1252 }
1253 break;
1254 case MODE_PHASED_ZEBRA_RESTART:
1255 case MODE_PHASED_ALL_RESTART:
1256 if (!gs.restart_command || !gs.start_command || !gs.stop_command)
1257 {
1258 fprintf(stderr,
1259 "Need start, kill, and restart commands in [%s] mode.\n",
1260 mode_str[gs.mode]);
1261 return usage(progname,1);
1262 }
1263 break;
1264 }
1265
c8b40f86 1266 if (blankstr)
1267 {
1268 if (gs.restart_command)
1269 gs.restart_command = translate_blanks(gs.restart_command,blankstr);
1270 if (gs.start_command)
1271 gs.start_command = translate_blanks(gs.start_command,blankstr);
1272 if (gs.stop_command)
1273 gs.stop_command = translate_blanks(gs.stop_command,blankstr);
1274 }
1275
8b886ca7 1276 gs.restart.interval = gs.min_restart_interval;
1277 master = thread_master_create();
837d16cc 1278 signal_init (master, array_size(my_signals), my_signals);
8b886ca7 1279 srandom(time(NULL));
1280
1281 {
1282 int i;
1283 struct daemon *tail = NULL;
1284
1285 for (i = optind; i < argc; i++)
1286 {
1287 struct daemon *dmn;
1288
1289 if (!(dmn = (struct daemon *)calloc(1,sizeof(*dmn))))
1290 {
098e240f 1291 fprintf(stderr,"calloc(1,%u) failed: %s\n",
1292 (u_int)sizeof(*dmn), safe_strerror(errno));
8b886ca7 1293 return 1;
1294 }
1295 dmn->name = dmn->restart.name = argv[i];
1296 dmn->state = DAEMON_INIT;
1297 gs.numdaemons++;
1298 gs.numdown++;
1299 dmn->fd = -1;
1300 dmn->t_wakeup = thread_add_timer_msec(master,wakeup_init,dmn,
1301 100+(random() % 900));
1302 dmn->restart.interval = gs.min_restart_interval;
1303 if (tail)
1304 tail->next = dmn;
1305 else
1306 gs.daemons = dmn;
1307 tail = dmn;
1308
1309 if (((gs.mode == MODE_PHASED_ZEBRA_RESTART) ||
1310 (gs.mode == MODE_PHASED_ALL_RESTART)) &&
1311 !strcmp(dmn->name,special))
1312 gs.special = dmn;
1313 }
1314 }
1315 if (!gs.daemons)
1316 {
1317 fputs("Must specify one or more daemons to monitor.\n",stderr);
1318 return usage(progname,1);
1319 }
1320 if (((gs.mode == MODE_PHASED_ZEBRA_RESTART) ||
1321 (gs.mode == MODE_PHASED_ALL_RESTART)) && !gs.special)
1322 {
1323 fprintf(stderr,"In mode [%s], but cannot find master daemon %s\n",
1324 mode_str[gs.mode],special);
1325 return usage(progname,1);
1326 }
8b886ca7 1327
7c8ff89e 1328 zlog_default = openzlog(progname, ZLOG_NONE, 0,
8b886ca7 1329 LOG_CONS|LOG_NDELAY|LOG_PID, LOG_DAEMON);
1330 zlog_set_level(NULL, ZLOG_DEST_MONITOR, ZLOG_DISABLED);
1331 if (daemon_mode)
1332 {
1333 zlog_set_level(NULL, ZLOG_DEST_SYSLOG, MIN(gs.loglevel,LOG_DEBUG));
065de903
SH
1334 if (daemon (0, 0) < 0)
1335 {
1336 fprintf(stderr, "Watchquagga daemon failed: %s", strerror(errno));
1337 exit (1);
1338 }
8b886ca7 1339 }
1340 else
1341 zlog_set_level(NULL, ZLOG_DEST_STDOUT, MIN(gs.loglevel,LOG_DEBUG));
1342
1343 /* Make sure we're not already running. */
1344 pid_output (pidfile);
1345
1346 /* Announce which daemons are being monitored. */
1347 {
1348 struct daemon *dmn;
1349 size_t len = 0;
1350
1351 for (dmn = gs.daemons; dmn; dmn = dmn->next)
1352 len += strlen(dmn->name)+1;
1353
1354 {
1355 char buf[len+1];
1356 char *p = buf;
1357
1358 for (dmn = gs.daemons; dmn; dmn = dmn->next)
1359 {
1360 if (p != buf)
1361 *p++ = ' ';
1362 strcpy(p,dmn->name);
1363 p += strlen(p);
1364 }
1365 zlog_notice("%s %s watching [%s], mode [%s]",
1366 progname, QUAGGA_VERSION, buf, mode_str[gs.mode]);
1367 }
1368 }
1369
1370 {
1371 struct thread thread;
1372
1373 while (thread_fetch (master, &thread))
1374 thread_call (&thread);
1375 }
1376
1377 /* Not reached. */
1378 return 0;
1379}