]> git.proxmox.com Git - mirror_ovs.git/blob - lib/daemon.c
daemon: Don't call a normal exit from the monitor a "crash".
[mirror_ovs.git] / lib / daemon.c
1 /*
2 * Copyright (c) 2008, 2009, 2010 Nicira Networks.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <config.h>
18 #include "daemon.h"
19 #include <errno.h>
20 #include <fcntl.h>
21 #include <signal.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <sys/resource.h>
25 #include <sys/wait.h>
26 #include <sys/stat.h>
27 #include <unistd.h>
28 #include "command-line.h"
29 #include "fatal-signal.h"
30 #include "dirs.h"
31 #include "lockfile.h"
32 #include "process.h"
33 #include "socket-util.h"
34 #include "timeval.h"
35 #include "util.h"
36 #include "vlog.h"
37
38 VLOG_DEFINE_THIS_MODULE(daemon)
39
40 /* --detach: Should we run in the background? */
41 static bool detach;
42
43 /* --pidfile: Name of pidfile (null if none). */
44 static char *pidfile;
45
46 /* Device and inode of pidfile, so we can avoid reopening it. */
47 static dev_t pidfile_dev;
48 static ino_t pidfile_ino;
49
50 /* --overwrite-pidfile: Create pidfile even if one already exists and is
51 locked? */
52 static bool overwrite_pidfile;
53
54 /* --no-chdir: Should we chdir to "/"? */
55 static bool chdir_ = true;
56
57 /* File descriptor used by daemonize_start() and daemonize_complete(). */
58 static int daemonize_fd = -1;
59
60 /* --monitor: Should a supervisory process monitor the daemon and restart it if
61 * it dies due to an error signal? */
62 static bool monitor;
63
64 /* Returns the file name that would be used for a pidfile if 'name' were
65 * provided to set_pidfile(). The caller must free the returned string. */
66 char *
67 make_pidfile_name(const char *name)
68 {
69 return (!name
70 ? xasprintf("%s/%s.pid", ovs_rundir, program_name)
71 : abs_file_name(ovs_rundir, name));
72 }
73
74 /* Sets up a following call to daemonize() to create a pidfile named 'name'.
75 * If 'name' begins with '/', then it is treated as an absolute path.
76 * Otherwise, it is taken relative to RUNDIR, which is $(prefix)/var/run by
77 * default.
78 *
79 * If 'name' is null, then program_name followed by ".pid" is used. */
80 void
81 set_pidfile(const char *name)
82 {
83 free(pidfile);
84 pidfile = make_pidfile_name(name);
85 }
86
87 /* Returns an absolute path to the configured pidfile, or a null pointer if no
88 * pidfile is configured. The caller must not modify or free the returned
89 * string. */
90 const char *
91 get_pidfile(void)
92 {
93 return pidfile;
94 }
95
96 /* Sets that we do not chdir to "/". */
97 void
98 set_no_chdir(void)
99 {
100 chdir_ = false;
101 }
102
103 /* Will we chdir to "/" as part of daemonizing? */
104 bool
105 is_chdir_enabled(void)
106 {
107 return chdir_;
108 }
109
110 /* Normally, die_if_already_running() will terminate the program with a message
111 * if a locked pidfile already exists. If this function is called,
112 * die_if_already_running() will merely log a warning. */
113 void
114 ignore_existing_pidfile(void)
115 {
116 overwrite_pidfile = true;
117 }
118
119 /* Sets up a following call to daemonize() to detach from the foreground
120 * session, running this process in the background. */
121 void
122 set_detach(void)
123 {
124 detach = true;
125 }
126
127 /* Will daemonize() really detach? */
128 bool
129 get_detach(void)
130 {
131 return detach;
132 }
133
134 /* Sets up a following call to daemonize() to fork a supervisory process to
135 * monitor the daemon and restart it if it dies due to an error signal. */
136 void
137 daemon_set_monitor(void)
138 {
139 monitor = true;
140 }
141
142 /* If a pidfile has been configured and that pidfile already exists and is
143 * locked by a running process, returns the pid of the running process.
144 * Otherwise, returns 0. */
145 static pid_t
146 already_running(void)
147 {
148 pid_t pid = 0;
149 if (pidfile) {
150 int fd = open(pidfile, O_RDWR);
151 if (fd >= 0) {
152 struct flock lck;
153 lck.l_type = F_WRLCK;
154 lck.l_whence = SEEK_SET;
155 lck.l_start = 0;
156 lck.l_len = 0;
157 if (fcntl(fd, F_GETLK, &lck) != -1 && lck.l_type != F_UNLCK) {
158 pid = lck.l_pid;
159 }
160 close(fd);
161 }
162 }
163 return pid;
164 }
165
166 /* If a locked pidfile exists, issue a warning message and, unless
167 * ignore_existing_pidfile() has been called, terminate the program. */
168 void
169 die_if_already_running(void)
170 {
171 pid_t pid = already_running();
172 if (pid) {
173 if (!overwrite_pidfile) {
174 ovs_fatal(0, "%s: already running as pid %ld",
175 get_pidfile(), (long int) pid);
176 } else {
177 VLOG_WARN("%s: %s already running as pid %ld",
178 get_pidfile(), program_name, (long int) pid);
179 }
180 }
181 }
182
183 /* If a pidfile has been configured, creates it and stores the running
184 * process's pid in it. Ensures that the pidfile will be deleted when the
185 * process exits. */
186 static void
187 make_pidfile(void)
188 {
189 if (pidfile) {
190 /* Create pidfile via temporary file, so that observers never see an
191 * empty pidfile or an unlocked pidfile. */
192 long int pid = getpid();
193 char *tmpfile;
194 int fd;
195
196 tmpfile = xasprintf("%s.tmp%ld", pidfile, pid);
197 fatal_signal_add_file_to_unlink(tmpfile);
198 fd = open(tmpfile, O_CREAT | O_WRONLY | O_TRUNC, 0666);
199 if (fd >= 0) {
200 struct flock lck;
201 lck.l_type = F_WRLCK;
202 lck.l_whence = SEEK_SET;
203 lck.l_start = 0;
204 lck.l_len = 0;
205 if (fcntl(fd, F_SETLK, &lck) != -1) {
206 char *text = xasprintf("%ld\n", pid);
207 if (write(fd, text, strlen(text)) == strlen(text)) {
208 fatal_signal_add_file_to_unlink(pidfile);
209 if (rename(tmpfile, pidfile) < 0) {
210 VLOG_ERR("failed to rename \"%s\" to \"%s\": %s",
211 tmpfile, pidfile, strerror(errno));
212 fatal_signal_remove_file_to_unlink(pidfile);
213 close(fd);
214 } else {
215 /* Keep 'fd' open to retain the lock. */
216 struct stat s;
217
218 if (!fstat(fd, &s)) {
219 pidfile_dev = s.st_dev;
220 pidfile_ino = s.st_ino;
221 } else {
222 VLOG_ERR("%s: fstat failed: %s",
223 pidfile, strerror(errno));
224 }
225 }
226 free(text);
227 } else {
228 VLOG_ERR("%s: write failed: %s", tmpfile, strerror(errno));
229 close(fd);
230 }
231 } else {
232 VLOG_ERR("%s: fcntl failed: %s", tmpfile, strerror(errno));
233 close(fd);
234 }
235 } else {
236 VLOG_ERR("%s: create failed: %s", tmpfile, strerror(errno));
237 }
238 fatal_signal_remove_file_to_unlink(tmpfile);
239 free(tmpfile);
240 }
241 free(pidfile);
242 pidfile = NULL;
243 }
244
245 /* If configured with set_pidfile() or set_detach(), creates the pid file and
246 * detaches from the foreground session. */
247 void
248 daemonize(void)
249 {
250 daemonize_start();
251 daemonize_complete();
252 }
253
254 static pid_t
255 fork_and_wait_for_startup(int *fdp)
256 {
257 int fds[2];
258 pid_t pid;
259
260 if (pipe(fds) < 0) {
261 ovs_fatal(errno, "pipe failed");
262 }
263
264 pid = fork();
265 if (pid > 0) {
266 /* Running in parent process. */
267 char c;
268
269 close(fds[1]);
270 fatal_signal_fork();
271 if (read(fds[0], &c, 1) != 1) {
272 int retval;
273 int status;
274
275 do {
276 retval = waitpid(pid, &status, 0);
277 } while (retval == -1 && errno == EINTR);
278
279 if (retval == pid
280 && WIFEXITED(status)
281 && WEXITSTATUS(status)) {
282 /* Child exited with an error. Convey the same error to
283 * our parent process as a courtesy. */
284 exit(WEXITSTATUS(status));
285 }
286
287 ovs_fatal(errno, "fork child failed to signal startup");
288 }
289 close(fds[0]);
290 *fdp = -1;
291 } else if (!pid) {
292 /* Running in child process. */
293 close(fds[0]);
294 time_postfork();
295 lockfile_postfork();
296 *fdp = fds[1];
297 } else {
298 ovs_fatal(errno, "could not fork");
299 }
300
301 return pid;
302 }
303
304 static void
305 fork_notify_startup(int fd)
306 {
307 if (fd != -1) {
308 size_t bytes_written;
309 int error;
310
311 error = write_fully(fd, "", 1, &bytes_written);
312 if (error) {
313 ovs_fatal(error, "could not write to pipe");
314 }
315
316 close(fd);
317 }
318 }
319
320 static bool
321 should_restart(int status)
322 {
323 if (WIFSIGNALED(status)) {
324 static const int error_signals[] = {
325 SIGABRT, SIGALRM, SIGBUS, SIGFPE, SIGILL, SIGPIPE, SIGSEGV,
326 SIGXCPU, SIGXFSZ
327 };
328
329 size_t i;
330
331 for (i = 0; i < ARRAY_SIZE(error_signals); i++) {
332 if (error_signals[i] == WTERMSIG(status)) {
333 return true;
334 }
335 }
336 }
337 return false;
338 }
339
340 static void
341 monitor_daemon(pid_t daemon_pid)
342 {
343 /* XXX Should log daemon's stderr output at startup time. */
344 const char *saved_program_name;
345 time_t last_restart;
346 char *status_msg;
347 int crashes;
348
349 saved_program_name = program_name;
350 program_name = xasprintf("monitor(%s)", program_name);
351 status_msg = xstrdup("healthy");
352 last_restart = TIME_MIN;
353 crashes = 0;
354 for (;;) {
355 int retval;
356 int status;
357
358 proctitle_set("%s: monitoring pid %lu (%s)",
359 saved_program_name, (unsigned long int) daemon_pid,
360 status_msg);
361
362 do {
363 retval = waitpid(daemon_pid, &status, 0);
364 } while (retval == -1 && errno == EINTR);
365
366 if (retval == -1) {
367 ovs_fatal(errno, "waitpid failed");
368 } else if (retval == daemon_pid) {
369 char *s = process_status_msg(status);
370 if (should_restart(status)) {
371 free(status_msg);
372 status_msg = xasprintf("%d crashes: pid %lu died, %s",
373 ++crashes,
374 (unsigned long int) daemon_pid, s);
375 free(s);
376
377 if (WCOREDUMP(status)) {
378 /* Disable further core dumps to save disk space. */
379 struct rlimit r;
380
381 r.rlim_cur = 0;
382 r.rlim_max = 0;
383 if (setrlimit(RLIMIT_CORE, &r) == -1) {
384 VLOG_WARN("failed to disable core dumps: %s",
385 strerror(errno));
386 }
387 }
388
389 /* Throttle restarts to no more than once every 10 seconds. */
390 if (time(NULL) < last_restart + 10) {
391 VLOG_WARN("%s, waiting until 10 seconds since last "
392 "restart", status_msg);
393 for (;;) {
394 time_t now = time(NULL);
395 time_t wakeup = last_restart + 10;
396 if (now >= wakeup) {
397 break;
398 }
399 sleep(wakeup - now);
400 }
401 }
402 last_restart = time(NULL);
403
404 VLOG_ERR("%s, restarting", status_msg);
405 daemon_pid = fork_and_wait_for_startup(&daemonize_fd);
406 if (!daemon_pid) {
407 break;
408 }
409 } else {
410 VLOG_INFO("pid %lu died, %s, exiting",
411 (unsigned long int) daemon_pid, s);
412 free(s);
413 exit(0);
414 }
415 }
416 }
417 free(status_msg);
418
419 /* Running in new daemon process. */
420 proctitle_restore();
421 free((char *) program_name);
422 program_name = saved_program_name;
423 }
424
425 /* Close stdin, stdout, stderr. If we're started from e.g. an SSH session,
426 * then this keeps us from holding that session open artificially. */
427 static void
428 close_standard_fds(void)
429 {
430 int null_fd = get_null_fd();
431 if (null_fd >= 0) {
432 dup2(null_fd, STDIN_FILENO);
433 dup2(null_fd, STDOUT_FILENO);
434 dup2(null_fd, STDERR_FILENO);
435 }
436 }
437
438 /* If daemonization is configured, then starts daemonization, by forking and
439 * returning in the child process. The parent process hangs around until the
440 * child lets it know either that it completed startup successfully (by calling
441 * daemon_complete()) or that it failed to start up (by exiting with a nonzero
442 * exit code). */
443 void
444 daemonize_start(void)
445 {
446 daemonize_fd = -1;
447
448 if (detach) {
449 if (fork_and_wait_for_startup(&daemonize_fd) > 0) {
450 /* Running in parent process. */
451 exit(0);
452 }
453 /* Running in daemon or monitor process. */
454 }
455
456 if (monitor) {
457 int saved_daemonize_fd = daemonize_fd;
458 pid_t daemon_pid;
459
460 daemon_pid = fork_and_wait_for_startup(&daemonize_fd);
461 if (daemon_pid > 0) {
462 /* Running in monitor process. */
463 fork_notify_startup(saved_daemonize_fd);
464 close_standard_fds();
465 monitor_daemon(daemon_pid);
466 }
467 /* Running in daemon process. */
468 }
469
470 make_pidfile();
471
472 /* Make sure that the unixctl commands for vlog get registered in a
473 * daemon, even before the first log message. */
474 vlog_init();
475 }
476
477 /* If daemonization is configured, then this function notifies the parent
478 * process that the child process has completed startup successfully. */
479 void
480 daemonize_complete(void)
481 {
482 fork_notify_startup(daemonize_fd);
483
484 if (detach) {
485 setsid();
486 if (chdir_) {
487 ignore(chdir("/"));
488 }
489 close_standard_fds();
490 }
491 }
492
493 void
494 daemon_usage(void)
495 {
496 printf(
497 "\nDaemon options:\n"
498 " --detach run in background as daemon\n"
499 " --no-chdir do not chdir to '/'\n"
500 " --pidfile[=FILE] create pidfile (default: %s/%s.pid)\n"
501 " --overwrite-pidfile with --pidfile, start even if already "
502 "running\n",
503 ovs_rundir, program_name);
504 }
505
506 /* Opens and reads a PID from 'pidfile'. Returns the nonnegative PID if
507 * successful, otherwise a negative errno value. */
508 pid_t
509 read_pidfile(const char *pidfile)
510 {
511 char line[128];
512 struct flock lck;
513 struct stat s;
514 FILE *file;
515 int error;
516
517 if ((pidfile_ino || pidfile_dev)
518 && !stat(pidfile, &s)
519 && s.st_ino == pidfile_ino && s.st_dev == pidfile_dev) {
520 /* It's our own pidfile. We can't afford to open it, because closing
521 * *any* fd for a file that a process has locked also releases all the
522 * locks on that file.
523 *
524 * Fortunately, we know the associated pid anyhow: */
525 return getpid();
526 }
527
528 file = fopen(pidfile, "r");
529 if (!file) {
530 error = errno;
531 VLOG_WARN("%s: open: %s", pidfile, strerror(error));
532 goto error;
533 }
534
535 lck.l_type = F_WRLCK;
536 lck.l_whence = SEEK_SET;
537 lck.l_start = 0;
538 lck.l_len = 0;
539 if (fcntl(fileno(file), F_GETLK, &lck)) {
540 error = errno;
541 VLOG_WARN("%s: fcntl: %s", pidfile, strerror(error));
542 goto error;
543 }
544 if (lck.l_type == F_UNLCK) {
545 error = ESRCH;
546 VLOG_WARN("%s: pid file is not locked", pidfile);
547 goto error;
548 }
549
550 if (!fgets(line, sizeof line, file)) {
551 if (ferror(file)) {
552 error = errno;
553 VLOG_WARN("%s: read: %s", pidfile, strerror(error));
554 } else {
555 error = ESRCH;
556 VLOG_WARN("%s: read: unexpected end of file", pidfile);
557 }
558 goto error;
559 }
560
561 if (lck.l_pid != strtoul(line, NULL, 10)) {
562 error = ESRCH;
563 VLOG_WARN("l_pid (%ld) != %s pid (%s)",
564 (long int) lck.l_pid, pidfile, line);
565 goto error;
566 }
567
568 fclose(file);
569 return lck.l_pid;
570
571 error:
572 if (file) {
573 fclose(file);
574 }
575 return -error;
576 }