]>
Commit | Line | Data |
---|---|---|
064af421 | 1 | /* |
6069edb0 | 2 | * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2015 Nicira, Inc. |
064af421 | 3 | * |
a14bc59f BP |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. | |
6 | * You may obtain a copy of the License at: | |
064af421 | 7 | * |
a14bc59f BP |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * | |
10 | * Unless required by applicable law or agreed to in writing, software | |
11 | * distributed under the License is distributed on an "AS IS" BASIS, | |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | * See the License for the specific language governing permissions and | |
14 | * limitations under the License. | |
064af421 BP |
15 | */ |
16 | ||
17 | #include <config.h> | |
e2ed6fbe | 18 | #include "backtrace.h" |
064af421 | 19 | #include "daemon.h" |
3834bcf2 | 20 | #include "daemon-private.h" |
064af421 BP |
21 | #include <errno.h> |
22 | #include <fcntl.h> | |
e91b927d AZ |
23 | #include <grp.h> |
24 | #include <pwd.h> | |
3762274e | 25 | #include <signal.h> |
064af421 BP |
26 | #include <stdlib.h> |
27 | #include <string.h> | |
3762274e | 28 | #include <sys/resource.h> |
95440284 | 29 | #include <sys/wait.h> |
309eaa2b | 30 | #include <sys/stat.h> |
064af421 | 31 | #include <unistd.h> |
e91b927d AZ |
32 | #if HAVE_LIBCAPNG |
33 | #include <cap-ng.h> | |
34 | #endif | |
40f0707c | 35 | #include "command-line.h" |
064af421 BP |
36 | #include "fatal-signal.h" |
37 | #include "dirs.h" | |
ac718c9d | 38 | #include "lockfile.h" |
728a8b14 | 39 | #include "ovs-thread.h" |
ff8decf1 | 40 | #include "process.h" |
b8781ff0 | 41 | #include "socket-util.h" |
03fbffbd | 42 | #include "timeval.h" |
064af421 | 43 | #include "util.h" |
e6211adc | 44 | #include "openvswitch/vlog.h" |
064af421 | 45 | |
a91dc444 | 46 | VLOG_DEFINE_THIS_MODULE(daemon_unix); |
5136ce49 | 47 | |
e91b927d AZ |
48 | #ifdef __linux__ |
49 | #define LINUX 1 | |
50 | #else | |
51 | #define LINUX 0 | |
52 | #endif | |
53 | ||
54 | #if HAVE_LIBCAPNG | |
55 | #define LIBCAPNG 1 | |
56 | #else | |
57 | #define LIBCAPNG 0 | |
58 | #endif | |
59 | ||
d4db8309 | 60 | /* --detach: Should we run in the background? */ |
3834bcf2 | 61 | bool detach; /* Was --detach specified? */ |
e8087a87 | 62 | static bool detached; /* Have we already detached? */ |
064af421 | 63 | |
d4db8309 | 64 | /* --pidfile: Name of pidfile (null if none). */ |
3834bcf2 | 65 | char *pidfile; |
064af421 | 66 | |
e4bd5e2a BP |
67 | /* Device and inode of pidfile, so we can avoid reopening it. */ |
68 | static dev_t pidfile_dev; | |
69 | static ino_t pidfile_ino; | |
70 | ||
d4db8309 BP |
71 | /* --overwrite-pidfile: Create pidfile even if one already exists and is |
72 | locked? */ | |
e7bd7d78 | 73 | static bool overwrite_pidfile; |
064af421 | 74 | |
d4db8309 | 75 | /* --no-chdir: Should we chdir to "/"? */ |
91a1e24d JP |
76 | static bool chdir_ = true; |
77 | ||
7943cd51 | 78 | /* File descriptor used by daemonize_start() and daemonize_complete(). */ |
e2ed6fbe | 79 | int daemonize_fd = -1; |
95440284 | 80 | |
ff8decf1 BP |
81 | /* --monitor: Should a supervisory process monitor the daemon and restart it if |
82 | * it dies due to an error signal? */ | |
ecd4a8fc | 83 | bool monitor; |
ff8decf1 | 84 | |
e91b927d AZ |
85 | /* --user: Only root can use this option. Switch to new uid:gid after |
86 | * initially running as root. */ | |
87 | static bool switch_user = false; | |
e91b927d AZ |
88 | static uid_t uid; |
89 | static gid_t gid; | |
90 | static char *user = NULL; | |
91 | static void daemon_become_new_user__(bool access_datapath); | |
92 | ||
aacea8ba BP |
93 | static void check_already_running(void); |
94 | static int lock_pidfile(FILE *, int command); | |
d6056bc7 GS |
95 | static pid_t fork_and_clean_up(void); |
96 | static void daemonize_post_detach(void); | |
aacea8ba | 97 | |
064af421 BP |
98 | /* Returns the file name that would be used for a pidfile if 'name' were |
99 | * provided to set_pidfile(). The caller must free the returned string. */ | |
3834bcf2 | 100 | char * |
d295e8e9 | 101 | make_pidfile_name(const char *name) |
064af421 | 102 | { |
daf03c53 | 103 | return (!name |
b43c6fe2 BP |
104 | ? xasprintf("%s/%s.pid", ovs_rundir(), program_name) |
105 | : abs_file_name(ovs_rundir(), name)); | |
064af421 BP |
106 | } |
107 | ||
91a1e24d JP |
108 | /* Sets that we do not chdir to "/". */ |
109 | void | |
110 | set_no_chdir(void) | |
111 | { | |
112 | chdir_ = false; | |
113 | } | |
114 | ||
00c08589 BP |
115 | /* Normally, daemonize() or damonize_start() will terminate the program with a |
116 | * message if a locked pidfile already exists. If this function is called, an | |
117 | * existing pidfile will be replaced, with a warning. */ | |
064af421 BP |
118 | void |
119 | ignore_existing_pidfile(void) | |
120 | { | |
e7bd7d78 | 121 | overwrite_pidfile = true; |
064af421 BP |
122 | } |
123 | ||
124 | /* Sets up a following call to daemonize() to detach from the foreground | |
125 | * session, running this process in the background. */ | |
126 | void | |
127 | set_detach(void) | |
128 | { | |
129 | detach = true; | |
130 | } | |
131 | ||
ff8decf1 BP |
132 | /* Sets up a following call to daemonize() to fork a supervisory process to |
133 | * monitor the daemon and restart it if it dies due to an error signal. */ | |
134 | void | |
135 | daemon_set_monitor(void) | |
136 | { | |
137 | monitor = true; | |
138 | } | |
139 | ||
d4db8309 BP |
140 | /* If a pidfile has been configured, creates it and stores the running |
141 | * process's pid in it. Ensures that the pidfile will be deleted when the | |
142 | * process exits. */ | |
064af421 BP |
143 | static void |
144 | make_pidfile(void) | |
145 | { | |
aacea8ba BP |
146 | long int pid = getpid(); |
147 | struct stat s; | |
148 | char *tmpfile; | |
149 | FILE *file; | |
150 | int error; | |
151 | ||
152 | /* Create a temporary pidfile. */ | |
2388a783 EJ |
153 | if (overwrite_pidfile) { |
154 | tmpfile = xasprintf("%s.tmp%ld", pidfile, pid); | |
155 | fatal_signal_add_file_to_unlink(tmpfile); | |
156 | } else { | |
157 | /* Everyone shares the same file which will be treated as a lock. To | |
158 | * avoid some uncomfortable race conditions, we can't set up the fatal | |
159 | * signal unlink until we've acquired it. */ | |
160 | tmpfile = xasprintf("%s.tmp", pidfile); | |
161 | } | |
162 | ||
163 | file = fopen(tmpfile, "a+"); | |
aacea8ba | 164 | if (!file) { |
10a89ef0 | 165 | VLOG_FATAL("%s: create failed (%s)", tmpfile, ovs_strerror(errno)); |
aacea8ba BP |
166 | } |
167 | ||
2388a783 EJ |
168 | error = lock_pidfile(file, F_SETLK); |
169 | if (error) { | |
170 | /* Looks like we failed to acquire the lock. Note that, if we failed | |
171 | * for some other reason (and '!overwrite_pidfile'), we will have | |
172 | * left 'tmpfile' as garbage in the file system. */ | |
10a89ef0 BP |
173 | VLOG_FATAL("%s: fcntl(F_SETLK) failed (%s)", tmpfile, |
174 | ovs_strerror(error)); | |
2388a783 EJ |
175 | } |
176 | ||
177 | if (!overwrite_pidfile) { | |
178 | /* We acquired the lock. Make sure to clean up on exit, and verify | |
179 | * that we're allowed to create the actual pidfile. */ | |
180 | fatal_signal_add_file_to_unlink(tmpfile); | |
181 | check_already_running(); | |
182 | } | |
183 | ||
aacea8ba | 184 | if (fstat(fileno(file), &s) == -1) { |
10a89ef0 | 185 | VLOG_FATAL("%s: fstat failed (%s)", tmpfile, ovs_strerror(errno)); |
aacea8ba BP |
186 | } |
187 | ||
2388a783 | 188 | if (ftruncate(fileno(file), 0) == -1) { |
10a89ef0 | 189 | VLOG_FATAL("%s: truncate failed (%s)", tmpfile, ovs_strerror(errno)); |
2388a783 EJ |
190 | } |
191 | ||
aacea8ba BP |
192 | fprintf(file, "%ld\n", pid); |
193 | if (fflush(file) == EOF) { | |
10a89ef0 | 194 | VLOG_FATAL("%s: write failed (%s)", tmpfile, ovs_strerror(errno)); |
aacea8ba BP |
195 | } |
196 | ||
2388a783 | 197 | error = rename(tmpfile, pidfile); |
aacea8ba | 198 | |
2388a783 EJ |
199 | /* Due to a race, 'tmpfile' may be owned by a different process, so we |
200 | * shouldn't delete it on exit. */ | |
201 | fatal_signal_remove_file_to_unlink(tmpfile); | |
202 | ||
203 | if (error < 0) { | |
204 | VLOG_FATAL("failed to rename \"%s\" to \"%s\" (%s)", | |
10a89ef0 | 205 | tmpfile, pidfile, ovs_strerror(errno)); |
aacea8ba BP |
206 | } |
207 | ||
208 | /* Ensure that the pidfile will get deleted on exit. */ | |
209 | fatal_signal_add_file_to_unlink(pidfile); | |
210 | ||
aacea8ba BP |
211 | /* Clean up. |
212 | * | |
213 | * We don't close 'file' because its file descriptor must remain open to | |
214 | * hold the lock. */ | |
215 | pidfile_dev = s.st_dev; | |
216 | pidfile_ino = s.st_ino; | |
217 | free(tmpfile); | |
064af421 BP |
218 | } |
219 | ||
8aee05cc BP |
220 | /* Calls fork() and on success returns its return value. On failure, logs an |
221 | * error and exits unsuccessfully. | |
222 | * | |
223 | * Post-fork, but before returning, this function calls a few other functions | |
224 | * that are generally useful if the child isn't planning to exec a new | |
225 | * process. */ | |
d6056bc7 | 226 | static pid_t |
8aee05cc BP |
227 | fork_and_clean_up(void) |
228 | { | |
728a8b14 | 229 | pid_t pid = xfork(); |
8aee05cc BP |
230 | if (pid > 0) { |
231 | /* Running in parent process. */ | |
232 | fatal_signal_fork(); | |
233 | } else if (!pid) { | |
234 | /* Running in child process. */ | |
8aee05cc | 235 | lockfile_postfork(); |
8aee05cc | 236 | } |
8aee05cc BP |
237 | return pid; |
238 | } | |
239 | ||
e6c5e539 BP |
240 | /* Forks, then: |
241 | * | |
242 | * - In the parent, waits for the child to signal that it has completed its | |
b925336a AA |
243 | * startup sequence. Then stores -1 in '*fdp' and returns the child's |
244 | * pid in '*child_pid' argument. | |
e6c5e539 | 245 | * |
b925336a AA |
246 | * - In the child, stores a fd in '*fdp' and returns 0 through '*child_pid' |
247 | * argument. The caller should pass the fd to fork_notify_startup() after | |
248 | * it finishes its startup sequence. | |
e6c5e539 | 249 | * |
b925336a AA |
250 | * Returns 0 on success. If something goes wrong and child process was not |
251 | * able to signal its readiness by calling fork_notify_startup(), then this | |
252 | * function returns -1. However, even in case of failure it still sets child | |
253 | * process id in '*child_pid'. */ | |
254 | static int | |
255 | fork_and_wait_for_startup(int *fdp, pid_t *child_pid) | |
7943cd51 BP |
256 | { |
257 | int fds[2]; | |
258 | pid_t pid; | |
b925336a | 259 | int ret = 0; |
7943cd51 | 260 | |
279c9e03 | 261 | xpipe(fds); |
7943cd51 | 262 | |
8aee05cc | 263 | pid = fork_and_clean_up(); |
7943cd51 BP |
264 | if (pid > 0) { |
265 | /* Running in parent process. */ | |
af9a1442 | 266 | size_t bytes_read; |
7943cd51 BP |
267 | char c; |
268 | ||
269 | close(fds[1]); | |
af9a1442 | 270 | if (read_fully(fds[0], &c, 1, &bytes_read) != 0) { |
7943cd51 BP |
271 | int retval; |
272 | int status; | |
273 | ||
274 | do { | |
275 | retval = waitpid(pid, &status, 0); | |
276 | } while (retval == -1 && errno == EINTR); | |
277 | ||
2c8fcc9c BP |
278 | if (retval == pid) { |
279 | if (WIFEXITED(status) && WEXITSTATUS(status)) { | |
280 | /* Child exited with an error. Convey the same error | |
281 | * to our parent process as a courtesy. */ | |
282 | exit(WEXITSTATUS(status)); | |
283 | } else { | |
284 | char *status_msg = process_status_msg(status); | |
b925336a AA |
285 | VLOG_ERR("fork child died before signaling startup (%s)", |
286 | status_msg); | |
287 | ret = -1; | |
2c8fcc9c BP |
288 | } |
289 | } else if (retval < 0) { | |
10a89ef0 | 290 | VLOG_FATAL("waitpid failed (%s)", ovs_strerror(errno)); |
2c8fcc9c | 291 | } else { |
428b2edd | 292 | OVS_NOT_REACHED(); |
7943cd51 | 293 | } |
7943cd51 | 294 | } |
e2ed6fbe | 295 | *fdp = fds[0]; |
7943cd51 BP |
296 | } else if (!pid) { |
297 | /* Running in child process. */ | |
298 | close(fds[0]); | |
7943cd51 | 299 | *fdp = fds[1]; |
7943cd51 | 300 | } |
b925336a AA |
301 | *child_pid = pid; |
302 | return ret; | |
7943cd51 BP |
303 | } |
304 | ||
305 | static void | |
306 | fork_notify_startup(int fd) | |
307 | { | |
308 | if (fd != -1) { | |
309 | size_t bytes_written; | |
310 | int error; | |
311 | ||
312 | error = write_fully(fd, "", 1, &bytes_written); | |
313 | if (error) { | |
10a89ef0 | 314 | VLOG_FATAL("pipe write failed (%s)", ovs_strerror(error)); |
7943cd51 | 315 | } |
7943cd51 BP |
316 | } |
317 | } | |
318 | ||
ff8decf1 BP |
319 | static bool |
320 | should_restart(int status) | |
321 | { | |
322 | if (WIFSIGNALED(status)) { | |
323 | static const int error_signals[] = { | |
f67c3295 BP |
324 | /* This list of signals is documented in daemon.man. If you |
325 | * change the list, update the documentation too. */ | |
ff8decf1 BP |
326 | SIGABRT, SIGALRM, SIGBUS, SIGFPE, SIGILL, SIGPIPE, SIGSEGV, |
327 | SIGXCPU, SIGXFSZ | |
328 | }; | |
329 | ||
330 | size_t i; | |
331 | ||
332 | for (i = 0; i < ARRAY_SIZE(error_signals); i++) { | |
333 | if (error_signals[i] == WTERMSIG(status)) { | |
334 | return true; | |
335 | } | |
336 | } | |
337 | } | |
338 | return false; | |
339 | } | |
340 | ||
341 | static void | |
342 | monitor_daemon(pid_t daemon_pid) | |
343 | { | |
ff8decf1 | 344 | /* XXX Should log daemon's stderr output at startup time. */ |
a9633ada | 345 | time_t last_restart; |
40f0707c | 346 | char *status_msg; |
cbbdf81c | 347 | int crashes; |
b925336a | 348 | bool child_ready = true; |
ff8decf1 | 349 | |
bc9fb3a9 | 350 | set_subprogram_name("monitor"); |
40f0707c | 351 | status_msg = xstrdup("healthy"); |
a9633ada | 352 | last_restart = TIME_MIN; |
cbbdf81c | 353 | crashes = 0; |
ff8decf1 BP |
354 | for (;;) { |
355 | int retval; | |
356 | int status; | |
357 | ||
5f383751 RB |
358 | ovs_cmdl_proctitle_set("monitoring pid %lu (%s)", |
359 | (unsigned long int) daemon_pid, status_msg); | |
40f0707c | 360 | |
b925336a | 361 | if (child_ready) { |
8ee45836 | 362 | int error; |
b925336a AA |
363 | do { |
364 | retval = waitpid(daemon_pid, &status, 0); | |
8ee45836 HH |
365 | error = retval == -1 ? errno : 0; |
366 | } while (error == EINTR); | |
367 | vlog_reopen_log_file(); | |
368 | if (error) { | |
369 | VLOG_FATAL("waitpid failed (%s)", ovs_strerror(error)); | |
b925336a AA |
370 | } |
371 | } | |
ff8decf1 | 372 | |
b925336a | 373 | if (!child_ready || retval == daemon_pid) { |
40f0707c | 374 | char *s = process_status_msg(status); |
40f0707c | 375 | if (should_restart(status)) { |
2bf9d87a BP |
376 | free(status_msg); |
377 | status_msg = xasprintf("%d crashes: pid %lu died, %s", | |
378 | ++crashes, | |
379 | (unsigned long int) daemon_pid, s); | |
380 | free(s); | |
381 | ||
7c2dd4c6 BP |
382 | if (WCOREDUMP(status)) { |
383 | /* Disable further core dumps to save disk space. */ | |
384 | struct rlimit r; | |
385 | ||
386 | r.rlim_cur = 0; | |
387 | r.rlim_max = 0; | |
388 | if (setrlimit(RLIMIT_CORE, &r) == -1) { | |
389 | VLOG_WARN("failed to disable core dumps: %s", | |
10a89ef0 | 390 | ovs_strerror(errno)); |
7c2dd4c6 BP |
391 | } |
392 | } | |
393 | ||
e2ed6fbe WT |
394 | log_received_backtrace(daemonize_fd); |
395 | ||
a9633ada BP |
396 | /* Throttle restarts to no more than once every 10 seconds. */ |
397 | if (time(NULL) < last_restart + 10) { | |
398 | VLOG_WARN("%s, waiting until 10 seconds since last " | |
399 | "restart", status_msg); | |
400 | for (;;) { | |
401 | time_t now = time(NULL); | |
402 | time_t wakeup = last_restart + 10; | |
403 | if (now >= wakeup) { | |
404 | break; | |
405 | } | |
275eebb9 | 406 | xsleep(wakeup - now); |
a9633ada BP |
407 | } |
408 | } | |
409 | last_restart = time(NULL); | |
410 | ||
40f0707c | 411 | VLOG_ERR("%s, restarting", status_msg); |
b925336a AA |
412 | child_ready = !fork_and_wait_for_startup(&daemonize_fd, |
413 | &daemon_pid); | |
414 | if (child_ready && !daemon_pid) { | |
415 | /* Child process needs to break out of monitoring | |
416 | * loop. */ | |
ff8decf1 BP |
417 | break; |
418 | } | |
419 | } else { | |
2bf9d87a BP |
420 | VLOG_INFO("pid %lu died, %s, exiting", |
421 | (unsigned long int) daemon_pid, s); | |
422 | free(s); | |
ff8decf1 BP |
423 | exit(0); |
424 | } | |
425 | } | |
426 | } | |
b2d06cb8 | 427 | free(status_msg); |
ff8decf1 BP |
428 | |
429 | /* Running in new daemon process. */ | |
5f383751 | 430 | ovs_cmdl_proctitle_restore(); |
ed1e8ded | 431 | set_subprogram_name(program_name); |
ff8decf1 BP |
432 | } |
433 | ||
95440284 BP |
434 | /* If daemonization is configured, then starts daemonization, by forking and |
435 | * returning in the child process. The parent process hangs around until the | |
436 | * child lets it know either that it completed startup successfully (by calling | |
c5992276 WT |
437 | * daemonize_complete()) or that it failed to start up (by exiting with a |
438 | * nonzero exit code). */ | |
95440284 | 439 | void |
e91b927d | 440 | daemonize_start(bool access_datapath) |
064af421 | 441 | { |
728a8b14 | 442 | assert_single_threaded(); |
7943cd51 | 443 | daemonize_fd = -1; |
95440284 | 444 | |
e91b927d AZ |
445 | if (switch_user) { |
446 | daemon_become_new_user__(access_datapath); | |
447 | switch_user = false; | |
448 | } | |
449 | ||
7943cd51 | 450 | if (detach) { |
b925336a AA |
451 | pid_t pid; |
452 | ||
453 | if (fork_and_wait_for_startup(&daemonize_fd, &pid)) { | |
454 | VLOG_FATAL("could not detach from foreground session"); | |
455 | } | |
456 | if (pid > 0) { | |
95440284 | 457 | /* Running in parent process. */ |
064af421 | 458 | exit(0); |
064af421 | 459 | } |
066f329e | 460 | |
ff8decf1 | 461 | /* Running in daemon or monitor process. */ |
066f329e | 462 | setsid(); |
ff8decf1 BP |
463 | } |
464 | ||
465 | if (monitor) { | |
466 | int saved_daemonize_fd = daemonize_fd; | |
467 | pid_t daemon_pid; | |
468 | ||
b925336a AA |
469 | if (fork_and_wait_for_startup(&daemonize_fd, &daemon_pid)) { |
470 | VLOG_FATAL("could not initiate process monitoring"); | |
471 | } | |
ff8decf1 BP |
472 | if (daemon_pid > 0) { |
473 | /* Running in monitor process. */ | |
474 | fork_notify_startup(saved_daemonize_fd); | |
44074bab BP |
475 | if (detach) { |
476 | close_standard_fds(); | |
477 | } | |
ff8decf1 BP |
478 | monitor_daemon(daemon_pid); |
479 | } | |
7943cd51 | 480 | /* Running in daemon process. */ |
064af421 | 481 | } |
7943cd51 | 482 | |
92fa2e92 BP |
483 | forbid_forking("running in daemon process"); |
484 | ||
aacea8ba BP |
485 | if (pidfile) { |
486 | make_pidfile(); | |
487 | } | |
df5d2ed9 BP |
488 | |
489 | /* Make sure that the unixctl commands for vlog get registered in a | |
490 | * daemon, even before the first log message. */ | |
491 | vlog_init(); | |
064af421 BP |
492 | } |
493 | ||
95440284 | 494 | /* If daemonization is configured, then this function notifies the parent |
e8087a87 BP |
495 | * process that the child process has completed startup successfully. It also |
496 | * call daemonize_post_detach(). | |
a7ff9bd7 BP |
497 | * |
498 | * Calling this function more than once has no additional effect. */ | |
95440284 BP |
499 | void |
500 | daemonize_complete(void) | |
501 | { | |
7ffd3f69 GS |
502 | if (pidfile) { |
503 | free(pidfile); | |
504 | pidfile = NULL; | |
505 | } | |
506 | ||
e8087a87 BP |
507 | if (!detached) { |
508 | detached = true; | |
509 | ||
510 | fork_notify_startup(daemonize_fd); | |
e8087a87 BP |
511 | daemonize_post_detach(); |
512 | } | |
513 | } | |
95440284 | 514 | |
e8087a87 BP |
515 | /* If daemonization is configured, then this function does traditional Unix |
516 | * daemonization behavior: join a new session, chdir to the root (if not | |
517 | * disabled), and close the standard file descriptors. | |
518 | * | |
519 | * It only makes sense to call this function as part of an implementation of a | |
520 | * special daemon subprocess. A normal daemon should just call | |
521 | * daemonize_complete(). */ | |
d6056bc7 | 522 | static void |
e8087a87 BP |
523 | daemonize_post_detach(void) |
524 | { | |
7943cd51 | 525 | if (detach) { |
95440284 BP |
526 | if (chdir_) { |
527 | ignore(chdir("/")); | |
528 | } | |
7943cd51 | 529 | close_standard_fds(); |
95440284 BP |
530 | } |
531 | } | |
532 | ||
064af421 BP |
533 | void |
534 | daemon_usage(void) | |
535 | { | |
536 | printf( | |
537 | "\nDaemon options:\n" | |
e7bd7d78 | 538 | " --detach run in background as daemon\n" |
c66caed4 AC |
539 | " --monitor creates a process to monitor this daemon\n" |
540 | " --user=username[:group] changes the effective daemon user:group\n" | |
91a1e24d | 541 | " --no-chdir do not chdir to '/'\n" |
e7bd7d78 JP |
542 | " --pidfile[=FILE] create pidfile (default: %s/%s.pid)\n" |
543 | " --overwrite-pidfile with --pidfile, start even if already " | |
544 | "running\n", | |
b43c6fe2 | 545 | ovs_rundir(), program_name); |
064af421 BP |
546 | } |
547 | ||
aacea8ba BP |
548 | static int |
549 | lock_pidfile__(FILE *file, int command, struct flock *lck) | |
550 | { | |
551 | int error; | |
552 | ||
553 | lck->l_type = F_WRLCK; | |
554 | lck->l_whence = SEEK_SET; | |
555 | lck->l_start = 0; | |
556 | lck->l_len = 0; | |
557 | lck->l_pid = 0; | |
558 | ||
559 | do { | |
560 | error = fcntl(fileno(file), command, lck) == -1 ? errno : 0; | |
561 | } while (error == EINTR); | |
562 | return error; | |
563 | } | |
564 | ||
565 | static int | |
566 | lock_pidfile(FILE *file, int command) | |
567 | { | |
568 | struct flock lck; | |
569 | ||
570 | return lock_pidfile__(file, command, &lck); | |
571 | } | |
572 | ||
18e124a2 | 573 | static pid_t |
396d492c | 574 | read_pidfile__(const char *pidfile_, bool delete_if_stale) |
064af421 | 575 | { |
aacea8ba | 576 | struct stat s, s2; |
064af421 | 577 | struct flock lck; |
aacea8ba | 578 | char line[128]; |
064af421 BP |
579 | FILE *file; |
580 | int error; | |
581 | ||
e4bd5e2a | 582 | if ((pidfile_ino || pidfile_dev) |
396d492c | 583 | && !stat(pidfile_, &s) |
e4bd5e2a BP |
584 | && s.st_ino == pidfile_ino && s.st_dev == pidfile_dev) { |
585 | /* It's our own pidfile. We can't afford to open it, because closing | |
586 | * *any* fd for a file that a process has locked also releases all the | |
587 | * locks on that file. | |
588 | * | |
589 | * Fortunately, we know the associated pid anyhow: */ | |
590 | return getpid(); | |
591 | } | |
592 | ||
396d492c | 593 | file = fopen(pidfile_, "r+"); |
064af421 | 594 | if (!file) { |
aacea8ba | 595 | if (errno == ENOENT && delete_if_stale) { |
18e124a2 BP |
596 | return 0; |
597 | } | |
064af421 | 598 | error = errno; |
396d492c | 599 | VLOG_WARN("%s: open: %s", pidfile_, ovs_strerror(error)); |
064af421 BP |
600 | goto error; |
601 | } | |
602 | ||
aacea8ba BP |
603 | error = lock_pidfile__(file, F_GETLK, &lck); |
604 | if (error) { | |
396d492c | 605 | VLOG_WARN("%s: fcntl: %s", pidfile_, ovs_strerror(error)); |
064af421 BP |
606 | goto error; |
607 | } | |
608 | if (lck.l_type == F_UNLCK) { | |
aacea8ba BP |
609 | /* pidfile exists but it isn't locked by anyone. We need to delete it |
610 | * so that a new pidfile can go in its place. But just calling | |
611 | * unlink(pidfile) makes a nasty race: what if someone else unlinks it | |
612 | * before we do and then replaces it by a valid pidfile? We'd unlink | |
613 | * their valid pidfile. We do a little dance to avoid the race, by | |
614 | * locking the invalid pidfile. Only one process can have the invalid | |
615 | * pidfile locked, and only that process has the right to unlink it. */ | |
616 | if (!delete_if_stale) { | |
617 | error = ESRCH; | |
396d492c | 618 | VLOG_DBG("%s: pid file is stale", pidfile_); |
aacea8ba BP |
619 | goto error; |
620 | } | |
621 | ||
622 | /* Get the lock. */ | |
623 | error = lock_pidfile(file, F_SETLK); | |
624 | if (error) { | |
625 | /* We lost a race with someone else doing the same thing. */ | |
396d492c | 626 | VLOG_WARN("%s: lost race to lock pidfile", pidfile_); |
aacea8ba BP |
627 | goto error; |
628 | } | |
629 | ||
396d492c JP |
630 | /* Is the file we have locked still named 'pidfile_'? */ |
631 | if (stat(pidfile_, &s) || fstat(fileno(file), &s2) | |
aacea8ba BP |
632 | || s.st_ino != s2.st_ino || s.st_dev != s2.st_dev) { |
633 | /* No. We lost a race with someone else who got the lock before | |
634 | * us, deleted the pidfile, and closed it (releasing the lock). */ | |
635 | error = EALREADY; | |
396d492c | 636 | VLOG_WARN("%s: lost race to delete pidfile", pidfile_); |
aacea8ba BP |
637 | goto error; |
638 | } | |
639 | ||
640 | /* We won the right to delete the stale pidfile. */ | |
396d492c | 641 | if (unlink(pidfile_)) { |
aacea8ba BP |
642 | error = errno; |
643 | VLOG_WARN("%s: failed to delete stale pidfile (%s)", | |
396d492c | 644 | pidfile_, ovs_strerror(error)); |
aacea8ba BP |
645 | goto error; |
646 | } | |
396d492c | 647 | VLOG_DBG("%s: deleted stale pidfile", pidfile_); |
aacea8ba BP |
648 | fclose(file); |
649 | return 0; | |
064af421 BP |
650 | } |
651 | ||
652 | if (!fgets(line, sizeof line, file)) { | |
653 | if (ferror(file)) { | |
654 | error = errno; | |
396d492c | 655 | VLOG_WARN("%s: read: %s", pidfile_, ovs_strerror(error)); |
064af421 BP |
656 | } else { |
657 | error = ESRCH; | |
396d492c | 658 | VLOG_WARN("%s: read: unexpected end of file", pidfile_); |
064af421 BP |
659 | } |
660 | goto error; | |
661 | } | |
662 | ||
663 | if (lck.l_pid != strtoul(line, NULL, 10)) { | |
aacea8ba BP |
664 | /* The process that has the pidfile locked is not the process that |
665 | * created it. It must be stale, with the process that has it locked | |
666 | * preparing to delete it. */ | |
064af421 | 667 | error = ESRCH; |
aacea8ba | 668 | VLOG_WARN("%s: stale pidfile for pid %s being deleted by pid %ld", |
396d492c | 669 | pidfile_, line, (long int) lck.l_pid); |
064af421 BP |
670 | goto error; |
671 | } | |
672 | ||
673 | fclose(file); | |
674 | return lck.l_pid; | |
675 | ||
676 | error: | |
677 | if (file) { | |
678 | fclose(file); | |
679 | } | |
680 | return -error; | |
681 | } | |
18e124a2 | 682 | |
396d492c | 683 | /* Opens and reads a PID from 'pidfile_'. Returns the positive PID if |
18e124a2 BP |
684 | * successful, otherwise a negative errno value. */ |
685 | pid_t | |
396d492c | 686 | read_pidfile(const char *pidfile_) |
18e124a2 | 687 | { |
396d492c | 688 | return read_pidfile__(pidfile_, false); |
18e124a2 BP |
689 | } |
690 | ||
aacea8ba BP |
691 | /* Checks whether a process with the given 'pidfile' is already running and, |
692 | * if so, aborts. If 'pidfile' is stale, deletes it. */ | |
693 | static void | |
694 | check_already_running(void) | |
18e124a2 | 695 | { |
aacea8ba BP |
696 | long int pid = read_pidfile__(pidfile, true); |
697 | if (pid > 0) { | |
698 | VLOG_FATAL("%s: already running as pid %ld, aborting", pidfile, pid); | |
699 | } else if (pid < 0) { | |
700 | VLOG_FATAL("%s: pidfile check failed (%s), aborting", | |
10a89ef0 | 701 | pidfile, ovs_strerror(-pid)); |
aacea8ba | 702 | } |
18e124a2 | 703 | } |
fda546bd GS |
704 | |
705 | \f | |
706 | /* stub functions for non-windows platform. */ | |
707 | ||
708 | void | |
709 | service_start(int *argc OVS_UNUSED, char **argv[] OVS_UNUSED) | |
710 | { | |
711 | } | |
712 | ||
713 | void | |
714 | service_stop(void) | |
715 | { | |
716 | } | |
717 | ||
718 | bool | |
719 | should_service_stop(void) | |
720 | { | |
721 | return false; | |
722 | } | |
e91b927d AZ |
723 | |
724 | \f | |
725 | static bool | |
726 | gid_matches(gid_t expected, gid_t value) | |
727 | { | |
728 | return expected == -1 || expected == value; | |
729 | } | |
730 | ||
731 | static bool | |
396d492c | 732 | gid_verify(gid_t gid_) |
e91b927d | 733 | { |
6a54bae1 | 734 | gid_t r, e; |
e91b927d | 735 | |
6a54bae1 YT |
736 | r = getgid(); |
737 | e = getegid(); | |
396d492c JP |
738 | return (gid_matches(gid_, r) && |
739 | gid_matches(gid_, e)); | |
e91b927d AZ |
740 | } |
741 | ||
742 | static void | |
396d492c | 743 | daemon_switch_group(gid_t gid_) |
e91b927d | 744 | { |
396d492c | 745 | if ((setgid(gid_) == -1) || !gid_verify(gid_)) { |
6a54bae1 | 746 | VLOG_FATAL("%s: fail to switch group to gid as %d, aborting", |
396d492c | 747 | pidfile, gid_); |
e91b927d AZ |
748 | } |
749 | } | |
750 | ||
751 | static bool | |
752 | uid_matches(uid_t expected, uid_t value) | |
753 | { | |
754 | return expected == -1 || expected == value; | |
755 | } | |
756 | ||
757 | static bool | |
396d492c | 758 | uid_verify(const uid_t uid_) |
e91b927d | 759 | { |
6a54bae1 | 760 | uid_t r, e; |
e91b927d | 761 | |
6a54bae1 YT |
762 | r = getuid(); |
763 | e = geteuid(); | |
396d492c JP |
764 | return (uid_matches(uid_, r) && |
765 | uid_matches(uid_, e)); | |
e91b927d AZ |
766 | } |
767 | ||
768 | static void | |
396d492c | 769 | daemon_switch_user(const uid_t uid_, const char *user_) |
e91b927d | 770 | { |
396d492c | 771 | if ((setuid(uid_) == -1) || !uid_verify(uid_)) { |
e91b927d | 772 | VLOG_FATAL("%s: fail to switch user to %s, aborting", |
396d492c | 773 | pidfile, user_); |
e91b927d AZ |
774 | } |
775 | } | |
776 | ||
777 | /* Use portable Unix APIs to switch uid:gid, when datapath | |
778 | * access is not required. On Linux systems, all capabilities | |
779 | * will be dropped. */ | |
780 | static void | |
781 | daemon_become_new_user_unix(void) | |
782 | { | |
783 | /* "Setuid Demystified" by Hao Chen, etc outlines some caveats of | |
784 | * around unix system call setuid() and friends. This implementation | |
785 | * mostly follow the advice given by the paper. The paper is | |
786 | * published in 2002, so things could have changed. */ | |
787 | ||
788 | /* Change both real and effective uid and gid will permanently | |
789 | * drop the process' privilege. "Setuid Demystified" suggested | |
790 | * that calling getuid() after each setuid() call to verify they | |
791 | * are actually set, because checking return code alone is not | |
792 | * sufficient. */ | |
6a54bae1 | 793 | daemon_switch_group(gid); |
e91b927d AZ |
794 | if (user && initgroups(user, gid) == -1) { |
795 | VLOG_FATAL("%s: fail to add supplementary group gid %d, " | |
796 | "aborting", pidfile, gid); | |
797 | } | |
6a54bae1 | 798 | daemon_switch_user(uid, user); |
e91b927d AZ |
799 | } |
800 | ||
801 | /* Linux specific implementation of daemon_become_new_user() | |
802 | * using libcap-ng. */ | |
e91b927d | 803 | static void |
6e6271d2 | 804 | daemon_become_new_user_linux(bool access_datapath OVS_UNUSED) |
e91b927d | 805 | { |
6e6271d2 | 806 | #if defined __linux__ && HAVE_LIBCAPNG |
e91b927d AZ |
807 | int ret; |
808 | ||
809 | ret = capng_get_caps_process(); | |
810 | ||
811 | if (!ret) { | |
812 | if (capng_have_capabilities(CAPNG_SELECT_CAPS) > CAPNG_NONE) { | |
813 | const capng_type_t cap_sets = CAPNG_EFFECTIVE|CAPNG_PERMITTED; | |
814 | ||
815 | capng_clear(CAPNG_SELECT_BOTH); | |
816 | ||
817 | ret = capng_update(CAPNG_ADD, cap_sets, CAP_IPC_LOCK) | |
818 | || capng_update(CAPNG_ADD, cap_sets, CAP_NET_BIND_SERVICE); | |
819 | ||
820 | if (access_datapath && !ret) { | |
821 | ret = capng_update(CAPNG_ADD, cap_sets, CAP_NET_ADMIN) | |
cf114a7f FL |
822 | || capng_update(CAPNG_ADD, cap_sets, CAP_NET_RAW) |
823 | || capng_update(CAPNG_ADD, cap_sets, CAP_NET_BROADCAST); | |
e91b927d AZ |
824 | } |
825 | } else { | |
826 | ret = -1; | |
827 | } | |
828 | } | |
829 | ||
830 | if (!ret) { | |
831 | /* CAPNG_INIT_SUPP_GRP will be a better choice than | |
832 | * CAPNG_DROP_SUPP_GRP. However this enum value is only defined | |
833 | * with libcap-ng higher than version 0.7.4, which is not wildly | |
834 | * available on many Linux distributions yet. Taking a more | |
835 | * conservative approach to make sure OVS behaves consistently. | |
836 | * | |
837 | * XXX We may change this for future OVS releases. | |
838 | */ | |
839 | ret = capng_change_id(uid, gid, CAPNG_DROP_SUPP_GRP | |
840 | | CAPNG_CLEAR_BOUNDING); | |
841 | } | |
842 | ||
843 | if (ret) { | |
844 | VLOG_FATAL("%s: libcap-ng fail to switch to user and group " | |
845 | "%d:%d, aborting", pidfile, uid, gid); | |
846 | } | |
e91b927d | 847 | #endif |
6e6271d2 | 848 | } |
e91b927d AZ |
849 | |
850 | static void | |
851 | daemon_become_new_user__(bool access_datapath) | |
852 | { | |
3de44dd1 AZ |
853 | /* If vlog file has been created, change its owner to the non-root user |
854 | * as specifed by the --user option. */ | |
de929213 | 855 | vlog_change_owner_unix(uid, gid); |
3de44dd1 | 856 | |
e91b927d AZ |
857 | if (LINUX) { |
858 | if (LIBCAPNG) { | |
859 | daemon_become_new_user_linux(access_datapath); | |
860 | } else { | |
861 | VLOG_FATAL("%s: fail to downgrade user using libcap-ng. " | |
862 | "(libcap-ng is not configured at compile time), " | |
863 | "aborting.", pidfile); | |
864 | } | |
865 | } else { | |
866 | daemon_become_new_user_unix(); | |
867 | } | |
868 | } | |
869 | ||
870 | /* Noramlly, user switch is embedded within daemonize_start(). | |
871 | * However, there in case the user switch needs to be done | |
872 | * before daemonize_start(), the following API can be used. */ | |
873 | void | |
874 | daemon_become_new_user(bool access_datapath) | |
875 | { | |
876 | assert_single_threaded(); | |
877 | if (switch_user) { | |
878 | daemon_become_new_user__(access_datapath); | |
6069edb0 | 879 | /* daemonize_start() should not switch user again. */ |
e91b927d AZ |
880 | switch_user = false; |
881 | } | |
882 | } | |
883 | ||
884 | /* Return the maximun suggested buffer size for both getpwname_r() | |
885 | * and getgrnam_r(). | |
886 | * | |
887 | * This size may still not be big enough. in case getpwname_r() | |
888 | * and friends return ERANGE, a larger buffer should be supplied to | |
889 | * retry. (The man page did not specify the max size to stop at, we | |
890 | * will keep trying with doubling the buffer size for each round until | |
891 | * the size wrapps around size_t. */ | |
892 | static size_t | |
893 | get_sysconf_buffer_size(void) | |
894 | { | |
895 | size_t bufsize, pwd_bs = 0, grp_bs = 0; | |
896 | const size_t default_bufsize = 1024; | |
897 | ||
898 | errno = 0; | |
899 | if ((pwd_bs = sysconf(_SC_GETPW_R_SIZE_MAX)) == -1) { | |
900 | if (errno) { | |
901 | VLOG_FATAL("%s: Read initial passwordd struct size " | |
902 | "failed (%s), aborting. ", pidfile, | |
903 | ovs_strerror(errno)); | |
904 | } | |
905 | } | |
906 | ||
907 | if ((grp_bs = sysconf(_SC_GETGR_R_SIZE_MAX)) == -1) { | |
908 | if (errno) { | |
909 | VLOG_FATAL("%s: Read initial group struct size " | |
910 | "failed (%s), aborting. ", pidfile, | |
911 | ovs_strerror(errno)); | |
912 | } | |
913 | } | |
914 | ||
915 | bufsize = MAX(pwd_bs, grp_bs); | |
916 | return bufsize ? bufsize : default_bufsize; | |
917 | } | |
918 | ||
919 | /* Try to double the size of '*buf', return true | |
920 | * if successful, and '*sizep' will be updated with | |
921 | * the new size. Otherwise, return false. */ | |
922 | static bool | |
923 | enlarge_buffer(char **buf, size_t *sizep) | |
924 | { | |
925 | size_t newsize = *sizep * 2; | |
926 | ||
927 | if (newsize > *sizep) { | |
928 | *buf = xrealloc(*buf, newsize); | |
929 | *sizep = newsize; | |
930 | return true; | |
931 | } | |
932 | ||
933 | return false; | |
934 | } | |
935 | ||
936 | /* Parse and sanity check user_spec. | |
937 | * | |
938 | * If successful, set global variables 'uid' and 'gid' | |
939 | * with the parsed results. Global variable 'user' | |
940 | * will be pointing to a string that stores the name | |
941 | * of the user to be switched into. | |
942 | * | |
943 | * Also set 'switch_to_new_user' to true, The actual | |
944 | * user switching is done as soon as daemonize_start() | |
945 | * is called. I/O access before calling daemonize_start() | |
946 | * will still be with root's credential. */ | |
947 | void | |
948 | daemon_set_new_user(const char *user_spec) | |
949 | { | |
950 | char *pos = strchr(user_spec, ':'); | |
951 | size_t init_bufsize, bufsize; | |
952 | ||
953 | init_bufsize = get_sysconf_buffer_size(); | |
954 | uid = getuid(); | |
955 | gid = getgid(); | |
956 | ||
957 | if (geteuid() || uid) { | |
958 | VLOG_FATAL("%s: only root can use --user option", pidfile); | |
959 | } | |
960 | ||
961 | user_spec += strspn(user_spec, " \t\r\n"); | |
962 | size_t len = pos ? pos - user_spec : strlen(user_spec); | |
963 | char *buf; | |
964 | struct passwd pwd, *res; | |
965 | int e; | |
966 | ||
967 | bufsize = init_bufsize; | |
968 | buf = xmalloc(bufsize); | |
969 | if (len) { | |
970 | user = xmemdup0(user_spec, len); | |
971 | ||
972 | while ((e = getpwnam_r(user, &pwd, buf, bufsize, &res)) == ERANGE) { | |
973 | if (!enlarge_buffer(&buf, &bufsize)) { | |
974 | break; | |
975 | } | |
976 | } | |
977 | ||
978 | if (e != 0) { | |
979 | VLOG_FATAL("%s: Failed to retrive user %s's uid (%s), aborting.", | |
980 | pidfile, user, ovs_strerror(e)); | |
981 | } | |
eaf2aa9e CE |
982 | if (res == NULL) { |
983 | VLOG_FATAL("%s: user %s not found, aborting.", pidfile, user); | |
984 | } | |
e91b927d AZ |
985 | } else { |
986 | /* User name is not specified, use current user. */ | |
987 | while ((e = getpwuid_r(uid, &pwd, buf, bufsize, &res)) == ERANGE) { | |
988 | if (!enlarge_buffer(&buf, &bufsize)) { | |
989 | break; | |
990 | } | |
991 | } | |
992 | ||
993 | if (e != 0) { | |
994 | VLOG_FATAL("%s: Failed to retrive current user's name " | |
995 | "(%s), aborting.", pidfile, ovs_strerror(e)); | |
996 | } | |
997 | user = xstrdup(pwd.pw_name); | |
998 | } | |
999 | ||
1000 | uid = pwd.pw_uid; | |
1001 | gid = pwd.pw_gid; | |
1002 | free(buf); | |
1003 | ||
1004 | if (pos) { | |
1005 | char *grpstr = pos + 1; | |
1006 | grpstr += strspn(grpstr, " \t\r\n"); | |
1007 | ||
1008 | if (*grpstr) { | |
71f21279 | 1009 | struct group grp, *gres; |
e91b927d AZ |
1010 | |
1011 | bufsize = init_bufsize; | |
1012 | buf = xmalloc(bufsize); | |
71f21279 | 1013 | while ((e = getgrnam_r(grpstr, &grp, buf, bufsize, &gres)) |
e91b927d AZ |
1014 | == ERANGE) { |
1015 | if (!enlarge_buffer(&buf, &bufsize)) { | |
1016 | break; | |
1017 | } | |
1018 | } | |
1019 | ||
1020 | if (e) { | |
1021 | VLOG_FATAL("%s: Failed to get group entry for %s, " | |
1022 | "(%s), aborting.", pidfile, grpstr, | |
1023 | ovs_strerror(e)); | |
1024 | } | |
71f21279 | 1025 | if (gres == NULL) { |
eaf2aa9e CE |
1026 | VLOG_FATAL("%s: group %s not found, aborting.", pidfile, |
1027 | grpstr); | |
1028 | } | |
e91b927d AZ |
1029 | |
1030 | if (gid != grp.gr_gid) { | |
1031 | char **mem; | |
1032 | ||
1033 | for (mem = grp.gr_mem; *mem; ++mem) { | |
1034 | if (!strcmp(*mem, user)) { | |
1035 | break; | |
1036 | } | |
1037 | } | |
1038 | ||
1039 | if (!*mem) { | |
1040 | VLOG_FATAL("%s: Invalid --user option %s (user %s is " | |
1041 | "not in group %s), aborting.", pidfile, | |
1042 | user_spec, user, grpstr); | |
1043 | } | |
1044 | gid = grp.gr_gid; | |
1045 | } | |
1046 | free(buf); | |
1047 | } | |
1048 | } | |
1049 | ||
6069edb0 | 1050 | switch_user = true; |
e91b927d | 1051 | } |