]>
Commit | Line | Data |
---|---|---|
2159de83 | 1 | # Copyright (c) 2010, 2011 Nicira Networks |
99155935 BP |
2 | # |
3 | # Licensed under the Apache License, Version 2.0 (the "License"); | |
4 | # you may not use this file except in compliance with the License. | |
5 | # You may obtain a copy of the License at: | |
6 | # | |
7 | # http://www.apache.org/licenses/LICENSE-2.0 | |
8 | # | |
9 | # Unless required by applicable law or agreed to in writing, software | |
10 | # distributed under the License is distributed on an "AS IS" BASIS, | |
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
12 | # See the License for the specific language governing permissions and | |
13 | # limitations under the License. | |
14 | ||
15 | import errno | |
16 | import fcntl | |
17 | import logging | |
18 | import os | |
19 | import resource | |
20 | import signal | |
21 | import sys | |
22 | import time | |
23 | ||
24 | import ovs.dirs | |
25 | import ovs.fatal_signal | |
26 | #import ovs.lockfile | |
27 | import ovs.process | |
28 | import ovs.socket_util | |
29 | import ovs.timeval | |
30 | import ovs.util | |
31 | ||
32 | # --detach: Should we run in the background? | |
33 | _detach = False | |
34 | ||
35 | # --pidfile: Name of pidfile (null if none). | |
36 | _pidfile = None | |
37 | ||
e4bd5e2a BP |
38 | # Our pidfile's inode and device, if we have created one. |
39 | _pidfile_dev = None | |
40 | _pidfile_ino = None | |
41 | ||
99155935 BP |
42 | # --overwrite-pidfile: Create pidfile even if one already exists and is locked? |
43 | _overwrite_pidfile = False | |
44 | ||
45 | # --no-chdir: Should we chdir to "/"? | |
46 | _chdir = True | |
47 | ||
48 | # --monitor: Should a supervisory process monitor the daemon and restart it if | |
49 | # it dies due to an error signal? | |
50 | _monitor = False | |
51 | ||
52 | # File descriptor used by daemonize_start() and daemonize_complete(). | |
53 | _daemonize_fd = None | |
54 | ||
998bb652 EJ |
55 | RESTART_EXIT_CODE = 5 |
56 | ||
cadc9871 | 57 | |
99155935 BP |
58 | def make_pidfile_name(name): |
59 | """Returns the file name that would be used for a pidfile if 'name' were | |
60 | provided to set_pidfile().""" | |
61 | if name is None or name == "": | |
62 | return "%s/%s.pid" % (ovs.dirs.RUNDIR, ovs.util.PROGRAM_NAME) | |
63 | else: | |
64 | return ovs.util.abs_file_name(ovs.dirs.RUNDIR, name) | |
65 | ||
cadc9871 | 66 | |
99155935 BP |
67 | def set_pidfile(name): |
68 | """Sets up a following call to daemonize() to create a pidfile named | |
69 | 'name'. If 'name' begins with '/', then it is treated as an absolute path. | |
70 | Otherwise, it is taken relative to ovs.util.RUNDIR, which is | |
71 | $(prefix)/var/run by default. | |
cadc9871 | 72 | |
99155935 BP |
73 | If 'name' is null, then ovs.util.PROGRAM_NAME followed by ".pid" is |
74 | used.""" | |
75 | global _pidfile | |
76 | _pidfile = make_pidfile_name(name) | |
77 | ||
cadc9871 | 78 | |
99155935 BP |
79 | def get_pidfile(): |
80 | """Returns an absolute path to the configured pidfile, or None if no | |
a7675d4e | 81 | pidfile is configured.""" |
99155935 BP |
82 | return _pidfile |
83 | ||
cadc9871 | 84 | |
99155935 BP |
85 | def set_no_chdir(): |
86 | """Sets that we do not chdir to "/".""" | |
87 | global _chdir | |
88 | _chdir = False | |
89 | ||
cadc9871 | 90 | |
99155935 BP |
91 | def is_chdir_enabled(): |
92 | """Will we chdir to "/" as part of daemonizing?""" | |
93 | return _chdir | |
94 | ||
cadc9871 | 95 | |
99155935 | 96 | def ignore_existing_pidfile(): |
00c08589 BP |
97 | """Normally, daemonize() or daemonize_start() will terminate the program |
98 | with a message if a locked pidfile already exists. If this function is | |
99 | called, an existing pidfile will be replaced, with a warning.""" | |
99155935 BP |
100 | global _overwrite_pidfile |
101 | _overwrite_pidfile = True | |
102 | ||
cadc9871 | 103 | |
99155935 BP |
104 | def set_detach(): |
105 | """Sets up a following call to daemonize() to detach from the foreground | |
106 | session, running this process in the background.""" | |
107 | global _detach | |
108 | _detach = True | |
109 | ||
cadc9871 | 110 | |
99155935 BP |
111 | def get_detach(): |
112 | """Will daemonize() really detach?""" | |
113 | return _detach | |
114 | ||
cadc9871 | 115 | |
99155935 BP |
116 | def set_monitor(): |
117 | """Sets up a following call to daemonize() to fork a supervisory process to | |
118 | monitor the daemon and restart it if it dies due to an error signal.""" | |
119 | global _monitor | |
120 | _monitor = True | |
121 | ||
cadc9871 | 122 | |
aacea8ba BP |
123 | def _fatal(msg): |
124 | logging.error(msg) | |
125 | sys.stderr.write("%s\n" % msg) | |
126 | sys.exit(1) | |
99155935 | 127 | |
cadc9871 | 128 | |
99155935 BP |
129 | def _make_pidfile(): |
130 | """If a pidfile has been configured, creates it and stores the running | |
131 | process's pid in it. Ensures that the pidfile will be deleted when the | |
132 | process exits.""" | |
aacea8ba | 133 | pid = os.getpid() |
99155935 | 134 | |
aacea8ba BP |
135 | # Create a temporary pidfile. |
136 | tmpfile = "%s.tmp%d" % (_pidfile, pid) | |
137 | ovs.fatal_signal.add_file_to_unlink(tmpfile) | |
138 | try: | |
139 | # This is global to keep Python from garbage-collecting and | |
140 | # therefore closing our file after this function exits. That would | |
141 | # unlock the lock for us, and we don't want that. | |
142 | global file | |
99155935 | 143 | |
591c2065 | 144 | file_handle = open(tmpfile, "w") |
aacea8ba BP |
145 | except IOError, e: |
146 | _fatal("%s: create failed (%s)" % (tmpfile, e.strerror)) | |
e4bd5e2a | 147 | |
aacea8ba | 148 | try: |
591c2065 | 149 | s = os.fstat(file_handle.fileno()) |
aacea8ba BP |
150 | except IOError, e: |
151 | _fatal("%s: fstat failed (%s)" % (tmpfile, e.strerror)) | |
99155935 | 152 | |
aacea8ba | 153 | try: |
591c2065 EJ |
154 | file_handle.write("%s\n" % pid) |
155 | file_handle.flush() | |
aacea8ba BP |
156 | except OSError, e: |
157 | _fatal("%s: write failed: %s" % (tmpfile, e.strerror)) | |
158 | ||
159 | try: | |
591c2065 | 160 | fcntl.lockf(file_handle, fcntl.LOCK_EX | fcntl.LOCK_NB) |
aacea8ba BP |
161 | except IOError, e: |
162 | _fatal("%s: fcntl failed: %s" % (tmpfile, e.strerror)) | |
163 | ||
164 | # Rename or link it to the correct name. | |
165 | if _overwrite_pidfile: | |
99155935 BP |
166 | try: |
167 | os.rename(tmpfile, _pidfile) | |
168 | except OSError, e: | |
aacea8ba BP |
169 | _fatal("failed to rename \"%s\" to \"%s\" (%s)" |
170 | % (tmpfile, _pidfile, e.strerror)) | |
171 | else: | |
172 | while True: | |
173 | try: | |
174 | os.link(tmpfile, _pidfile) | |
175 | error = 0 | |
176 | except OSError, e: | |
177 | error = e.errno | |
178 | if error == errno.EEXIST: | |
179 | _check_already_running() | |
180 | elif error != errno.EINTR: | |
181 | break | |
182 | if error: | |
183 | _fatal("failed to link \"%s\" as \"%s\" (%s)" | |
184 | % (tmpfile, _pidfile, os.strerror(error))) | |
99155935 | 185 | |
aacea8ba BP |
186 | # Ensure that the pidfile will get deleted on exit. |
187 | ovs.fatal_signal.add_file_to_unlink(_pidfile) | |
188 | ||
189 | # Delete the temporary pidfile if it still exists. | |
190 | if not _overwrite_pidfile: | |
191 | error = ovs.fatal_signal.unlink_file_now(tmpfile) | |
192 | if error: | |
193 | _fatal("%s: unlink failed (%s)" % (tmpfile, os.strerror(error))) | |
194 | ||
dbad9de1 BP |
195 | global _pidfile_dev |
196 | global _pidfile_ino | |
aacea8ba BP |
197 | _pidfile_dev = s.st_dev |
198 | _pidfile_ino = s.st_ino | |
e4bd5e2a | 199 | |
cadc9871 | 200 | |
99155935 BP |
201 | def daemonize(): |
202 | """If configured with set_pidfile() or set_detach(), creates the pid file | |
203 | and detaches from the foreground session.""" | |
204 | daemonize_start() | |
205 | daemonize_complete() | |
206 | ||
cadc9871 | 207 | |
99155935 BP |
208 | def _waitpid(pid, options): |
209 | while True: | |
210 | try: | |
211 | return os.waitpid(pid, options) | |
212 | except OSError, e: | |
213 | if e.errno == errno.EINTR: | |
214 | pass | |
215 | return -e.errno, 0 | |
216 | ||
cadc9871 | 217 | |
99155935 BP |
218 | def _fork_and_wait_for_startup(): |
219 | try: | |
220 | rfd, wfd = os.pipe() | |
221 | except OSError, e: | |
222 | sys.stderr.write("pipe failed: %s\n" % os.strerror(e.errno)) | |
223 | sys.exit(1) | |
224 | ||
225 | try: | |
226 | pid = os.fork() | |
227 | except OSError, e: | |
228 | sys.stderr.write("could not fork: %s\n" % os.strerror(e.errno)) | |
229 | sys.exit(1) | |
230 | ||
231 | if pid > 0: | |
232 | # Running in parent process. | |
233 | os.close(wfd) | |
234 | ovs.fatal_signal.fork() | |
af9a1442 BP |
235 | while True: |
236 | try: | |
237 | s = os.read(rfd, 1) | |
238 | error = 0 | |
239 | except OSError, e: | |
240 | s = "" | |
241 | error = e.errno | |
242 | if error != errno.EINTR: | |
243 | break | |
99155935 BP |
244 | if len(s) != 1: |
245 | retval, status = _waitpid(pid, 0) | |
246 | if (retval == pid and | |
247 | os.WIFEXITED(status) and os.WEXITSTATUS(status)): | |
248 | # Child exited with an error. Convey the same error to | |
249 | # our parent process as a courtesy. | |
250 | sys.exit(os.WEXITSTATUS(status)) | |
251 | else: | |
252 | sys.stderr.write("fork child failed to signal startup\n") | |
253 | sys.exit(1) | |
254 | ||
255 | os.close(rfd) | |
256 | else: | |
257 | # Running in parent process. | |
258 | os.close(rfd) | |
259 | ovs.timeval.postfork() | |
260 | #ovs.lockfile.postfork() | |
261 | ||
262 | global _daemonize_fd | |
263 | _daemonize_fd = wfd | |
264 | return pid | |
265 | ||
cadc9871 | 266 | |
99155935 BP |
267 | def _fork_notify_startup(fd): |
268 | if fd is not None: | |
269 | error, bytes_written = ovs.socket_util.write_fully(fd, "0") | |
270 | if error: | |
271 | sys.stderr.write("could not write to pipe\n") | |
272 | sys.exit(1) | |
273 | os.close(fd) | |
274 | ||
cadc9871 | 275 | |
99155935 | 276 | def _should_restart(status): |
998bb652 EJ |
277 | global RESTART_EXIT_CODE |
278 | ||
279 | if os.WIFEXITED(status) and os.WEXITSTATUS(status) == RESTART_EXIT_CODE: | |
280 | return True | |
281 | ||
99155935 BP |
282 | if os.WIFSIGNALED(status): |
283 | for signame in ("SIGABRT", "SIGALRM", "SIGBUS", "SIGFPE", "SIGILL", | |
284 | "SIGPIPE", "SIGSEGV", "SIGXCPU", "SIGXFSZ"): | |
523a3bc7 | 285 | if os.WTERMSIG(status) == getattr(signal, signame, None): |
99155935 BP |
286 | return True |
287 | return False | |
288 | ||
cadc9871 | 289 | |
99155935 BP |
290 | def _monitor_daemon(daemon_pid): |
291 | # XXX should log daemon's stderr output at startup time | |
292 | # XXX should use setproctitle module if available | |
293 | last_restart = None | |
294 | while True: | |
295 | retval, status = _waitpid(daemon_pid, 0) | |
296 | if retval < 0: | |
297 | sys.stderr.write("waitpid failed\n") | |
298 | sys.exit(1) | |
299 | elif retval == daemon_pid: | |
300 | status_msg = ("pid %d died, %s" | |
301 | % (daemon_pid, ovs.process.status_msg(status))) | |
cadc9871 | 302 | |
99155935 BP |
303 | if _should_restart(status): |
304 | if os.WCOREDUMP(status): | |
305 | # Disable further core dumps to save disk space. | |
306 | try: | |
307 | resource.setrlimit(resource.RLIMIT_CORE, (0, 0)) | |
308 | except resource.error: | |
309 | logging.warning("failed to disable core dumps") | |
310 | ||
311 | # Throttle restarts to no more than once every 10 seconds. | |
312 | if (last_restart is not None and | |
313 | ovs.timeval.msec() < last_restart + 10000): | |
314 | logging.warning("%s, waiting until 10 seconds since last " | |
315 | "restart" % status_msg) | |
316 | while True: | |
317 | now = ovs.timeval.msec() | |
318 | wakeup = last_restart + 10000 | |
319 | if now > wakeup: | |
320 | break | |
321 | print "sleep %f" % ((wakeup - now) / 1000.0) | |
322 | time.sleep((wakeup - now) / 1000.0) | |
323 | last_restart = ovs.timeval.msec() | |
324 | ||
325 | logging.error("%s, restarting" % status_msg) | |
326 | daemon_pid = _fork_and_wait_for_startup() | |
327 | if not daemon_pid: | |
328 | break | |
329 | else: | |
330 | logging.info("%s, exiting" % status_msg) | |
331 | sys.exit(0) | |
332 | ||
333 | # Running in new daemon process. | |
334 | ||
cadc9871 | 335 | |
99155935 BP |
336 | def _close_standard_fds(): |
337 | """Close stdin, stdout, stderr. If we're started from e.g. an SSH session, | |
338 | then this keeps us from holding that session open artificially.""" | |
339 | null_fd = ovs.socket_util.get_null_fd() | |
340 | if null_fd >= 0: | |
341 | os.dup2(null_fd, 0) | |
342 | os.dup2(null_fd, 1) | |
343 | os.dup2(null_fd, 2) | |
344 | ||
cadc9871 | 345 | |
99155935 BP |
346 | def daemonize_start(): |
347 | """If daemonization is configured, then starts daemonization, by forking | |
348 | and returning in the child process. The parent process hangs around until | |
349 | the child lets it know either that it completed startup successfully (by | |
350 | calling daemon_complete()) or that it failed to start up (by exiting with a | |
351 | nonzero exit code).""" | |
cadc9871 | 352 | |
99155935 BP |
353 | if _detach: |
354 | if _fork_and_wait_for_startup() > 0: | |
355 | # Running in parent process. | |
356 | sys.exit(0) | |
357 | # Running in daemon or monitor process. | |
358 | ||
359 | if _monitor: | |
360 | saved_daemonize_fd = _daemonize_fd | |
361 | daemon_pid = _fork_and_wait_for_startup() | |
362 | if daemon_pid > 0: | |
363 | # Running in monitor process. | |
364 | _fork_notify_startup(saved_daemonize_fd) | |
365 | _close_standard_fds() | |
366 | _monitor_daemon(daemon_pid) | |
367 | # Running in daemon process | |
cadc9871 | 368 | |
aacea8ba BP |
369 | if _pidfile: |
370 | _make_pidfile() | |
99155935 | 371 | |
cadc9871 | 372 | |
99155935 BP |
373 | def daemonize_complete(): |
374 | """If daemonization is configured, then this function notifies the parent | |
375 | process that the child process has completed startup successfully.""" | |
376 | _fork_notify_startup(_daemonize_fd) | |
377 | ||
378 | if _detach: | |
379 | os.setsid() | |
380 | if _chdir: | |
381 | os.chdir("/") | |
382 | _close_standard_fds() | |
383 | ||
cadc9871 | 384 | |
99155935 BP |
385 | def usage(): |
386 | sys.stdout.write(""" | |
387 | Daemon options: | |
388 | --detach run in background as daemon | |
389 | --no-chdir do not chdir to '/' | |
390 | --pidfile[=FILE] create pidfile (default: %s/%s.pid) | |
391 | --overwrite-pidfile with --pidfile, start even if already running | |
392 | """ % (ovs.dirs.RUNDIR, ovs.util.PROGRAM_NAME)) | |
393 | ||
cadc9871 | 394 | |
aacea8ba | 395 | def __read_pidfile(pidfile, delete_if_stale): |
e4bd5e2a BP |
396 | if _pidfile_dev is not None: |
397 | try: | |
398 | s = os.stat(pidfile) | |
399 | if s.st_ino == _pidfile_ino and s.st_dev == _pidfile_dev: | |
400 | # It's our own pidfile. We can't afford to open it, | |
401 | # because closing *any* fd for a file that a process | |
402 | # has locked also releases all the locks on that file. | |
403 | # | |
404 | # Fortunately, we know the associated pid anyhow. | |
405 | return os.getpid() | |
406 | except OSError: | |
407 | pass | |
408 | ||
99155935 | 409 | try: |
591c2065 | 410 | file_handle = open(pidfile, "r+") |
99155935 | 411 | except IOError, e: |
aacea8ba | 412 | if e.errno == errno.ENOENT and delete_if_stale: |
18e124a2 | 413 | return 0 |
aacea8ba | 414 | logging.warning("%s: open: %s" % (pidfile, e.strerror)) |
99155935 BP |
415 | return -e.errno |
416 | ||
417 | # Python fcntl doesn't directly support F_GETLK so we have to just try | |
aacea8ba | 418 | # to lock it. |
99155935 | 419 | try: |
591c2065 | 420 | fcntl.lockf(file_handle, fcntl.LOCK_EX | fcntl.LOCK_NB) |
aacea8ba BP |
421 | |
422 | # pidfile exists but wasn't locked by anyone. Now we have the lock. | |
423 | if not delete_if_stale: | |
591c2065 | 424 | file_handle.close() |
aacea8ba BP |
425 | logging.warning("%s: pid file is stale" % pidfile) |
426 | return -errno.ESRCH | |
427 | ||
428 | # Is the file we have locked still named 'pidfile'? | |
429 | try: | |
430 | raced = False | |
431 | s = os.stat(pidfile) | |
591c2065 | 432 | s2 = os.fstat(file_handle.fileno()) |
aacea8ba BP |
433 | if s.st_ino != s2.st_ino or s.st_dev != s2.st_dev: |
434 | raced = True | |
99155935 | 435 | except IOError: |
aacea8ba BP |
436 | raced = True |
437 | if raced: | |
438 | logging.warning("%s: lost race to delete pidfile" % pidfile) | |
0003748f | 439 | return -errno.EALREADY |
aacea8ba BP |
440 | |
441 | # We won the right to delete the stale pidfile. | |
442 | try: | |
443 | os.unlink(pidfile) | |
444 | except IOError, e: | |
c6b24dd7 | 445 | logging.warning("%s: failed to delete stale pidfile (%s)" |
aacea8ba BP |
446 | % (pidfile, e.strerror)) |
447 | return -e.errno | |
0233401f BP |
448 | else: |
449 | logging.debug("%s: deleted stale pidfile" % pidfile) | |
591c2065 | 450 | file_handle.close() |
0233401f | 451 | return 0 |
99155935 BP |
452 | except IOError, e: |
453 | if e.errno not in [errno.EACCES, errno.EAGAIN]: | |
aacea8ba | 454 | logging.warn("%s: fcntl: %s" % (pidfile, e.strerror)) |
99155935 BP |
455 | return -e.errno |
456 | ||
aacea8ba | 457 | # Someone else has the pidfile locked. |
99155935 BP |
458 | try: |
459 | try: | |
2a8859b0 | 460 | error = int(file_handle.readline()) |
99155935 BP |
461 | except IOError, e: |
462 | logging.warning("%s: read: %s" % (pidfile, e.strerror)) | |
2a8859b0 | 463 | error = -e.errno |
99155935 BP |
464 | except ValueError: |
465 | logging.warning("%s does not contain a pid" % pidfile) | |
2a8859b0 EJ |
466 | error = -errno.EINVAL |
467 | ||
468 | return error | |
99155935 BP |
469 | finally: |
470 | try: | |
591c2065 | 471 | file_handle.close() |
99155935 BP |
472 | except IOError: |
473 | pass | |
474 | ||
cadc9871 | 475 | |
18e124a2 BP |
476 | def read_pidfile(pidfile): |
477 | """Opens and reads a PID from 'pidfile'. Returns the positive PID if | |
478 | successful, otherwise a negative errno value.""" | |
18e124a2 BP |
479 | return __read_pidfile(pidfile, False) |
480 | ||
cadc9871 | 481 | |
aacea8ba BP |
482 | def _check_already_running(): |
483 | pid = __read_pidfile(_pidfile, True) | |
484 | if pid > 0: | |
485 | _fatal("%s: already running as pid %d, aborting" % (_pidfile, pid)) | |
486 | elif pid < 0: | |
487 | _fatal("%s: pidfile check failed (%s), aborting" | |
488 | % (_pidfile, os.strerror(pid))) | |
489 | ||
99155935 | 490 | |
b153e667 EJ |
491 | def add_args(parser): |
492 | """Populates 'parser', an ArgumentParser allocated using the argparse | |
493 | module, with the command line arguments required by the daemon module.""" | |
cadc9871 | 494 | |
b153e667 EJ |
495 | pidfile = make_pidfile_name(None) |
496 | ||
497 | group = parser.add_argument_group(title="Daemon Options") | |
498 | group.add_argument("--detach", action="store_true", | |
499 | help="Run in background as a daemon.") | |
500 | group.add_argument("--no-chdir", action="store_true", | |
501 | help="Do not chdir to '/'.") | |
502 | group.add_argument("--monitor", action="store_true", | |
503 | help="Monitor %s process." % ovs.util.PROGRAM_NAME) | |
504 | group.add_argument("--pidfile", nargs="?", default=pidfile, | |
505 | help="Create pidfile (default %s)." % pidfile) | |
506 | group.add_argument("--overwrite-pidfile", action="store_true", | |
507 | help="With --pidfile, start even if already running.") | |
508 | ||
509 | ||
510 | def handle_args(args): | |
511 | """Handles daemon module settings in 'args'. 'args' is an object | |
512 | containing values parsed by the parse_args() method of ArgumentParser. The | |
513 | parent ArgumentParser should have been prepared by add_args() before | |
514 | calling parse_args().""" | |
515 | ||
516 | if args.detach: | |
99155935 | 517 | set_detach() |
b153e667 EJ |
518 | |
519 | if args.no_chdir: | |
99155935 | 520 | set_no_chdir() |
b153e667 EJ |
521 | |
522 | if args.pidfile: | |
523 | set_pidfile(args.pidfile) | |
524 | ||
525 | if args.overwrite_pidfile: | |
99155935 | 526 | ignore_existing_pidfile() |
b153e667 EJ |
527 | |
528 | if args.monitor: | |
99155935 | 529 | set_monitor() |