]> git.proxmox.com Git - mirror_ovs.git/blob - lib/daemon.c
daemon: Tolerate EINTR in fork_and_wait_for_startup().
[mirror_ovs.git] / lib / daemon.c
1 /*
2 * Copyright (c) 2008, 2009, 2010, 2011 Nicira Networks.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <config.h>
18 #include "daemon.h"
19 #include <errno.h>
20 #include <fcntl.h>
21 #include <signal.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <sys/resource.h>
25 #include <sys/wait.h>
26 #include <sys/stat.h>
27 #include <unistd.h>
28 #include "command-line.h"
29 #include "fatal-signal.h"
30 #include "dirs.h"
31 #include "lockfile.h"
32 #include "process.h"
33 #include "socket-util.h"
34 #include "timeval.h"
35 #include "util.h"
36 #include "vlog.h"
37
38 VLOG_DEFINE_THIS_MODULE(daemon);
39
40 /* --detach: Should we run in the background? */
41 static bool detach;
42
43 /* --pidfile: Name of pidfile (null if none). */
44 static char *pidfile;
45
46 /* Device and inode of pidfile, so we can avoid reopening it. */
47 static dev_t pidfile_dev;
48 static ino_t pidfile_ino;
49
50 /* --overwrite-pidfile: Create pidfile even if one already exists and is
51 locked? */
52 static bool overwrite_pidfile;
53
54 /* --no-chdir: Should we chdir to "/"? */
55 static bool chdir_ = true;
56
57 /* File descriptor used by daemonize_start() and daemonize_complete(). */
58 static int daemonize_fd = -1;
59
60 /* --monitor: Should a supervisory process monitor the daemon and restart it if
61 * it dies due to an error signal? */
62 static bool monitor;
63
64 /* Returns the file name that would be used for a pidfile if 'name' were
65 * provided to set_pidfile(). The caller must free the returned string. */
66 char *
67 make_pidfile_name(const char *name)
68 {
69 return (!name
70 ? xasprintf("%s/%s.pid", ovs_rundir(), program_name)
71 : abs_file_name(ovs_rundir(), name));
72 }
73
74 /* Sets up a following call to daemonize() to create a pidfile named 'name'.
75 * If 'name' begins with '/', then it is treated as an absolute path.
76 * Otherwise, it is taken relative to RUNDIR, which is $(prefix)/var/run by
77 * default.
78 *
79 * If 'name' is null, then program_name followed by ".pid" is used. */
80 void
81 set_pidfile(const char *name)
82 {
83 free(pidfile);
84 pidfile = make_pidfile_name(name);
85 }
86
87 /* Returns an absolute path to the configured pidfile, or a null pointer if no
88 * pidfile is configured. The caller must not modify or free the returned
89 * string. */
90 const char *
91 get_pidfile(void)
92 {
93 return pidfile;
94 }
95
96 /* Sets that we do not chdir to "/". */
97 void
98 set_no_chdir(void)
99 {
100 chdir_ = false;
101 }
102
103 /* Will we chdir to "/" as part of daemonizing? */
104 bool
105 is_chdir_enabled(void)
106 {
107 return chdir_;
108 }
109
110 /* Normally, die_if_already_running() will terminate the program with a message
111 * if a locked pidfile already exists. If this function is called,
112 * die_if_already_running() will merely log a warning. */
113 void
114 ignore_existing_pidfile(void)
115 {
116 overwrite_pidfile = true;
117 }
118
119 /* Sets up a following call to daemonize() to detach from the foreground
120 * session, running this process in the background. */
121 void
122 set_detach(void)
123 {
124 detach = true;
125 }
126
127 /* Will daemonize() really detach? */
128 bool
129 get_detach(void)
130 {
131 return detach;
132 }
133
134 /* Sets up a following call to daemonize() to fork a supervisory process to
135 * monitor the daemon and restart it if it dies due to an error signal. */
136 void
137 daemon_set_monitor(void)
138 {
139 monitor = true;
140 }
141
142 /* If a locked pidfile exists, issue a warning message and, unless
143 * ignore_existing_pidfile() has been called, terminate the program. */
144 void
145 die_if_already_running(void)
146 {
147 pid_t pid;
148 if (!pidfile) {
149 return;
150 }
151 pid = read_pidfile_if_exists(pidfile);
152 if (pid > 0) {
153 if (!overwrite_pidfile) {
154 VLOG_ERR("%s: %s already running as pid %ld, aborting",
155 get_pidfile(), program_name, (long int) pid);
156 ovs_fatal(0, "%s: already running as pid %ld",
157 get_pidfile(), (long int) pid);
158 } else {
159 VLOG_WARN("%s: %s already running as pid %ld",
160 get_pidfile(), program_name, (long int) pid);
161 }
162 }
163 }
164
165 /* If a pidfile has been configured, creates it and stores the running
166 * process's pid in it. Ensures that the pidfile will be deleted when the
167 * process exits. */
168 static void
169 make_pidfile(void)
170 {
171 if (pidfile) {
172 /* Create pidfile via temporary file, so that observers never see an
173 * empty pidfile or an unlocked pidfile. */
174 long int pid = getpid();
175 char *tmpfile;
176 int fd;
177
178 tmpfile = xasprintf("%s.tmp%ld", pidfile, pid);
179 fatal_signal_add_file_to_unlink(tmpfile);
180 fd = open(tmpfile, O_CREAT | O_WRONLY | O_TRUNC, 0666);
181 if (fd >= 0) {
182 struct flock lck;
183 lck.l_type = F_WRLCK;
184 lck.l_whence = SEEK_SET;
185 lck.l_start = 0;
186 lck.l_len = 0;
187 if (fcntl(fd, F_SETLK, &lck) != -1) {
188 char *text = xasprintf("%ld\n", pid);
189 if (write(fd, text, strlen(text)) == strlen(text)) {
190 fatal_signal_add_file_to_unlink(pidfile);
191 if (rename(tmpfile, pidfile) < 0) {
192 VLOG_ERR("failed to rename \"%s\" to \"%s\": %s",
193 tmpfile, pidfile, strerror(errno));
194 fatal_signal_remove_file_to_unlink(pidfile);
195 close(fd);
196 } else {
197 /* Keep 'fd' open to retain the lock. */
198 struct stat s;
199
200 if (!fstat(fd, &s)) {
201 pidfile_dev = s.st_dev;
202 pidfile_ino = s.st_ino;
203 } else {
204 VLOG_ERR("%s: fstat failed: %s",
205 pidfile, strerror(errno));
206 }
207 }
208 } else {
209 VLOG_ERR("%s: write failed: %s", tmpfile, strerror(errno));
210 close(fd);
211 }
212 free(text);
213 } else {
214 VLOG_ERR("%s: fcntl failed: %s", tmpfile, strerror(errno));
215 close(fd);
216 }
217 } else {
218 VLOG_ERR("%s: create failed: %s", tmpfile, strerror(errno));
219 }
220 fatal_signal_remove_file_to_unlink(tmpfile);
221 free(tmpfile);
222 }
223 free(pidfile);
224 pidfile = NULL;
225 }
226
227 /* If configured with set_pidfile() or set_detach(), creates the pid file and
228 * detaches from the foreground session. */
229 void
230 daemonize(void)
231 {
232 daemonize_start();
233 daemonize_complete();
234 }
235
236 static pid_t
237 fork_and_wait_for_startup(int *fdp)
238 {
239 int fds[2];
240 pid_t pid;
241
242 xpipe(fds);
243
244 pid = fork();
245 if (pid > 0) {
246 /* Running in parent process. */
247 size_t bytes_read;
248 char c;
249
250 close(fds[1]);
251 fatal_signal_fork();
252 if (read_fully(fds[0], &c, 1, &bytes_read) != 0) {
253 int retval;
254 int status;
255
256 do {
257 retval = waitpid(pid, &status, 0);
258 } while (retval == -1 && errno == EINTR);
259
260 if (retval == pid
261 && WIFEXITED(status)
262 && WEXITSTATUS(status)) {
263 /* Child exited with an error. Convey the same error to
264 * our parent process as a courtesy. */
265 exit(WEXITSTATUS(status));
266 }
267
268 VLOG_FATAL("fork child failed to signal startup (%s)",
269 strerror(errno));
270 }
271 close(fds[0]);
272 *fdp = -1;
273 } else if (!pid) {
274 /* Running in child process. */
275 close(fds[0]);
276 time_postfork();
277 lockfile_postfork();
278 *fdp = fds[1];
279 } else {
280 VLOG_FATAL("fork failed (%s)", strerror(errno));
281 }
282
283 return pid;
284 }
285
286 static void
287 fork_notify_startup(int fd)
288 {
289 if (fd != -1) {
290 size_t bytes_written;
291 int error;
292
293 error = write_fully(fd, "", 1, &bytes_written);
294 if (error) {
295 VLOG_FATAL("pipe write failed (%s)", strerror(error));
296 }
297
298 close(fd);
299 }
300 }
301
302 static bool
303 should_restart(int status)
304 {
305 if (WIFSIGNALED(status)) {
306 static const int error_signals[] = {
307 SIGABRT, SIGALRM, SIGBUS, SIGFPE, SIGILL, SIGPIPE, SIGSEGV,
308 SIGXCPU, SIGXFSZ
309 };
310
311 size_t i;
312
313 for (i = 0; i < ARRAY_SIZE(error_signals); i++) {
314 if (error_signals[i] == WTERMSIG(status)) {
315 return true;
316 }
317 }
318 }
319 return false;
320 }
321
322 static void
323 monitor_daemon(pid_t daemon_pid)
324 {
325 /* XXX Should log daemon's stderr output at startup time. */
326 const char *saved_program_name;
327 time_t last_restart;
328 char *status_msg;
329 int crashes;
330
331 saved_program_name = program_name;
332 program_name = xasprintf("monitor(%s)", program_name);
333 status_msg = xstrdup("healthy");
334 last_restart = TIME_MIN;
335 crashes = 0;
336 for (;;) {
337 int retval;
338 int status;
339
340 proctitle_set("%s: monitoring pid %lu (%s)",
341 saved_program_name, (unsigned long int) daemon_pid,
342 status_msg);
343
344 do {
345 retval = waitpid(daemon_pid, &status, 0);
346 } while (retval == -1 && errno == EINTR);
347
348 if (retval == -1) {
349 VLOG_FATAL("waitpid failed (%s)", strerror(errno));
350 } else if (retval == daemon_pid) {
351 char *s = process_status_msg(status);
352 if (should_restart(status)) {
353 free(status_msg);
354 status_msg = xasprintf("%d crashes: pid %lu died, %s",
355 ++crashes,
356 (unsigned long int) daemon_pid, s);
357 free(s);
358
359 if (WCOREDUMP(status)) {
360 /* Disable further core dumps to save disk space. */
361 struct rlimit r;
362
363 r.rlim_cur = 0;
364 r.rlim_max = 0;
365 if (setrlimit(RLIMIT_CORE, &r) == -1) {
366 VLOG_WARN("failed to disable core dumps: %s",
367 strerror(errno));
368 }
369 }
370
371 /* Throttle restarts to no more than once every 10 seconds. */
372 if (time(NULL) < last_restart + 10) {
373 VLOG_WARN("%s, waiting until 10 seconds since last "
374 "restart", status_msg);
375 for (;;) {
376 time_t now = time(NULL);
377 time_t wakeup = last_restart + 10;
378 if (now >= wakeup) {
379 break;
380 }
381 sleep(wakeup - now);
382 }
383 }
384 last_restart = time(NULL);
385
386 VLOG_ERR("%s, restarting", status_msg);
387 daemon_pid = fork_and_wait_for_startup(&daemonize_fd);
388 if (!daemon_pid) {
389 break;
390 }
391 } else {
392 VLOG_INFO("pid %lu died, %s, exiting",
393 (unsigned long int) daemon_pid, s);
394 free(s);
395 exit(0);
396 }
397 }
398 }
399 free(status_msg);
400
401 /* Running in new daemon process. */
402 proctitle_restore();
403 free((char *) program_name);
404 program_name = saved_program_name;
405 }
406
407 /* Close stdin, stdout, stderr. If we're started from e.g. an SSH session,
408 * then this keeps us from holding that session open artificially. */
409 static void
410 close_standard_fds(void)
411 {
412 int null_fd = get_null_fd();
413 if (null_fd >= 0) {
414 dup2(null_fd, STDIN_FILENO);
415 dup2(null_fd, STDOUT_FILENO);
416 dup2(null_fd, STDERR_FILENO);
417 }
418 }
419
420 /* If daemonization is configured, then starts daemonization, by forking and
421 * returning in the child process. The parent process hangs around until the
422 * child lets it know either that it completed startup successfully (by calling
423 * daemon_complete()) or that it failed to start up (by exiting with a nonzero
424 * exit code). */
425 void
426 daemonize_start(void)
427 {
428 daemonize_fd = -1;
429
430 if (detach) {
431 if (fork_and_wait_for_startup(&daemonize_fd) > 0) {
432 /* Running in parent process. */
433 exit(0);
434 }
435 /* Running in daemon or monitor process. */
436 }
437
438 if (monitor) {
439 int saved_daemonize_fd = daemonize_fd;
440 pid_t daemon_pid;
441
442 daemon_pid = fork_and_wait_for_startup(&daemonize_fd);
443 if (daemon_pid > 0) {
444 /* Running in monitor process. */
445 fork_notify_startup(saved_daemonize_fd);
446 close_standard_fds();
447 monitor_daemon(daemon_pid);
448 }
449 /* Running in daemon process. */
450 }
451
452 make_pidfile();
453
454 /* Make sure that the unixctl commands for vlog get registered in a
455 * daemon, even before the first log message. */
456 vlog_init();
457 }
458
459 /* If daemonization is configured, then this function notifies the parent
460 * process that the child process has completed startup successfully.
461 *
462 * Calling this function more than once has no additional effect. */
463 void
464 daemonize_complete(void)
465 {
466 fork_notify_startup(daemonize_fd);
467 daemonize_fd = -1;
468
469 if (detach) {
470 setsid();
471 if (chdir_) {
472 ignore(chdir("/"));
473 }
474 close_standard_fds();
475 detach = false;
476 }
477 }
478
479 void
480 daemon_usage(void)
481 {
482 printf(
483 "\nDaemon options:\n"
484 " --detach run in background as daemon\n"
485 " --no-chdir do not chdir to '/'\n"
486 " --pidfile[=FILE] create pidfile (default: %s/%s.pid)\n"
487 " --overwrite-pidfile with --pidfile, start even if already "
488 "running\n",
489 ovs_rundir(), program_name);
490 }
491
492 static pid_t
493 read_pidfile__(const char *pidfile, bool must_exist)
494 {
495 char line[128];
496 struct flock lck;
497 struct stat s;
498 FILE *file;
499 int error;
500
501 if ((pidfile_ino || pidfile_dev)
502 && !stat(pidfile, &s)
503 && s.st_ino == pidfile_ino && s.st_dev == pidfile_dev) {
504 /* It's our own pidfile. We can't afford to open it, because closing
505 * *any* fd for a file that a process has locked also releases all the
506 * locks on that file.
507 *
508 * Fortunately, we know the associated pid anyhow: */
509 return getpid();
510 }
511
512 file = fopen(pidfile, "r");
513 if (!file) {
514 if (errno == ENOENT && !must_exist) {
515 return 0;
516 }
517 error = errno;
518 VLOG_WARN("%s: open: %s", pidfile, strerror(error));
519 goto error;
520 }
521
522 lck.l_type = F_WRLCK;
523 lck.l_whence = SEEK_SET;
524 lck.l_start = 0;
525 lck.l_len = 0;
526 lck.l_pid = 0;
527 if (fcntl(fileno(file), F_GETLK, &lck)) {
528 error = errno;
529 VLOG_WARN("%s: fcntl: %s", pidfile, strerror(error));
530 goto error;
531 }
532 if (lck.l_type == F_UNLCK) {
533 error = ESRCH;
534 VLOG_WARN("%s: pid file is not locked", pidfile);
535 goto error;
536 }
537
538 if (!fgets(line, sizeof line, file)) {
539 if (ferror(file)) {
540 error = errno;
541 VLOG_WARN("%s: read: %s", pidfile, strerror(error));
542 } else {
543 error = ESRCH;
544 VLOG_WARN("%s: read: unexpected end of file", pidfile);
545 }
546 goto error;
547 }
548
549 if (lck.l_pid != strtoul(line, NULL, 10)) {
550 error = ESRCH;
551 VLOG_WARN("l_pid (%ld) != %s pid (%s)",
552 (long int) lck.l_pid, pidfile, line);
553 goto error;
554 }
555
556 fclose(file);
557 return lck.l_pid;
558
559 error:
560 if (file) {
561 fclose(file);
562 }
563 return -error;
564 }
565
566 /* Opens and reads a PID from 'pidfile'. Returns the positive PID if
567 * successful, otherwise a negative errno value. */
568 pid_t
569 read_pidfile(const char *pidfile)
570 {
571 return read_pidfile__(pidfile, true);
572 }
573
574
575 /* Opens and reads a PID from 'pidfile', if it exists. Returns 0 if 'pidfile'
576 * doesn't exist, the positive PID if successful, otherwise a negative errno
577 * value. */
578 pid_t
579 read_pidfile_if_exists(const char *pidfile)
580 {
581 return read_pidfile__(pidfile, false);
582 }