lib/daemon-unix.c

   1 /*
   2  * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2015 Nicira, Inc.
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at:
   7  *
   8  *     http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #include <config.h>
  18 #include "daemon.h"
  19 #include "daemon-private.h"
  20 #include <errno.h>
  21 #include <fcntl.h>
  22 #include <grp.h>
  23 #include <pwd.h>
  24 #include <signal.h>
  25 #include <stdlib.h>
  26 #include <string.h>
  27 #include <sys/resource.h>
  28 #include <sys/wait.h>
  29 #include <sys/stat.h>
  30 #include <unistd.h>
  31 #if HAVE_LIBCAPNG
  32 #include <cap-ng.h>
  33 #endif
  34 #include "command-line.h"
  35 #include "fatal-signal.h"
  36 #include "dirs.h"
  37 #include "lockfile.h"
  38 #include "ovs-thread.h"
  39 #include "process.h"
  40 #include "socket-util.h"
  41 #include "timeval.h"
  42 #include "util.h"
  43 #include "openvswitch/vlog.h"
  44
  45 VLOG_DEFINE_THIS_MODULE(daemon_unix);
  46
  47 #ifdef __linux__
  48 #define LINUX 1
  49 #else
  50 #define LINUX 0
  51 #endif
  52
  53 #if HAVE_LIBCAPNG
  54 #define LIBCAPNG 1
  55 #else
  56 #define LIBCAPNG 0
  57 #endif
  58
  59 /* --detach: Should we run in the background? */
  60 bool detach;                    /* Was --detach specified? */
  61 static bool detached;           /* Have we already detached? */
  62
  63 /* --pidfile: Name of pidfile (null if none). */
  64 char *pidfile;
  65
  66 /* Device and inode of pidfile, so we can avoid reopening it. */
  67 static dev_t pidfile_dev;
  68 static ino_t pidfile_ino;
  69
  70 /* --overwrite-pidfile: Create pidfile even if one already exists and is
  71    locked? */
  72 static bool overwrite_pidfile;
  73
  74 /* --no-chdir: Should we chdir to "/"? */
  75 static bool chdir_ = true;
  76
  77 /* File descriptor used by daemonize_start() and daemonize_complete(). */
  78 static int daemonize_fd = -1;
  79
  80 /* --monitor: Should a supervisory process monitor the daemon and restart it if
  81  * it dies due to an error signal? */
  82 static bool monitor;
  83
  84 /* --user: Only root can use this option. Switch to new uid:gid after
  85  * initially running as root.  */
  86 static bool switch_user = false;
  87 static uid_t uid;
  88 static gid_t gid;
  89 static char *user = NULL;
  90 static void daemon_become_new_user__(bool access_datapath);
  91
  92 static void check_already_running(void);
  93 static int lock_pidfile(FILE *, int command);
  94 static pid_t fork_and_clean_up(void);
  95 static void daemonize_post_detach(void);
  96
  97 /* Returns the file name that would be used for a pidfile if 'name' were
  98  * provided to set_pidfile().  The caller must free the returned string. */
  99 char *
 100 make_pidfile_name(const char *name)
 101 {
 102     return (!name
 103             ? xasprintf("%s/%s.pid", ovs_rundir(), program_name)
 104             : abs_file_name(ovs_rundir(), name));
 105 }
 106
 107 /* Sets that we do not chdir to "/". */
 108 void
 109 set_no_chdir(void)
 110 {
 111     chdir_ = false;
 112 }
 113
 114 /* Normally, daemonize() or damonize_start() will terminate the program with a
 115  * message if a locked pidfile already exists.  If this function is called, an
 116  * existing pidfile will be replaced, with a warning. */
 117 void
 118 ignore_existing_pidfile(void)
 119 {
 120     overwrite_pidfile = true;
 121 }
 122
 123 /* Sets up a following call to daemonize() to detach from the foreground
 124  * session, running this process in the background.  */
 125 void
 126 set_detach(void)
 127 {
 128     detach = true;
 129 }
 130
 131 /* Sets up a following call to daemonize() to fork a supervisory process to
 132  * monitor the daemon and restart it if it dies due to an error signal.  */
 133 void
 134 daemon_set_monitor(void)
 135 {
 136     monitor = true;
 137 }
 138
 139 /* If a pidfile has been configured, creates it and stores the running
 140  * process's pid in it.  Ensures that the pidfile will be deleted when the
 141  * process exits. */
 142 static void
 143 make_pidfile(void)
 144 {
 145     long int pid = getpid();
 146     struct stat s;
 147     char *tmpfile;
 148     FILE *file;
 149     int error;
 150
 151     /* Create a temporary pidfile. */
 152     if (overwrite_pidfile) {
 153         tmpfile = xasprintf("%s.tmp%ld", pidfile, pid);
 154         fatal_signal_add_file_to_unlink(tmpfile);
 155     } else {
 156         /* Everyone shares the same file which will be treated as a lock.  To
 157          * avoid some uncomfortable race conditions, we can't set up the fatal
 158          * signal unlink until we've acquired it. */
 159         tmpfile = xasprintf("%s.tmp", pidfile);
 160     }
 161
 162     file = fopen(tmpfile, "a+");
 163     if (!file) {
 164         VLOG_FATAL("%s: create failed (%s)", tmpfile, ovs_strerror(errno));
 165     }
 166
 167     error = lock_pidfile(file, F_SETLK);
 168     if (error) {
 169         /* Looks like we failed to acquire the lock.  Note that, if we failed
 170          * for some other reason (and '!overwrite_pidfile'), we will have
 171          * left 'tmpfile' as garbage in the file system. */
 172         VLOG_FATAL("%s: fcntl(F_SETLK) failed (%s)", tmpfile,
 173                    ovs_strerror(error));
 174     }
 175
 176     if (!overwrite_pidfile) {
 177         /* We acquired the lock.  Make sure to clean up on exit, and verify
 178          * that we're allowed to create the actual pidfile. */
 179         fatal_signal_add_file_to_unlink(tmpfile);
 180         check_already_running();
 181     }
 182
 183     if (fstat(fileno(file), &s) == -1) {
 184         VLOG_FATAL("%s: fstat failed (%s)", tmpfile, ovs_strerror(errno));
 185     }
 186
 187     if (ftruncate(fileno(file), 0) == -1) {
 188         VLOG_FATAL("%s: truncate failed (%s)", tmpfile, ovs_strerror(errno));
 189     }
 190
 191     fprintf(file, "%ld\n", pid);
 192     if (fflush(file) == EOF) {
 193         VLOG_FATAL("%s: write failed (%s)", tmpfile, ovs_strerror(errno));
 194     }
 195
 196     error = rename(tmpfile, pidfile);
 197
 198     /* Due to a race, 'tmpfile' may be owned by a different process, so we
 199      * shouldn't delete it on exit. */
 200     fatal_signal_remove_file_to_unlink(tmpfile);
 201
 202     if (error < 0) {
 203         VLOG_FATAL("failed to rename \"%s\" to \"%s\" (%s)",
 204                    tmpfile, pidfile, ovs_strerror(errno));
 205     }
 206
 207     /* Ensure that the pidfile will get deleted on exit. */
 208     fatal_signal_add_file_to_unlink(pidfile);
 209
 210     /* Clean up.
 211      *
 212      * We don't close 'file' because its file descriptor must remain open to
 213      * hold the lock. */
 214     pidfile_dev = s.st_dev;
 215     pidfile_ino = s.st_ino;
 216     free(tmpfile);
 217 }
 218
 219 /* Calls fork() and on success returns its return value.  On failure, logs an
 220  * error and exits unsuccessfully.
 221  *
 222  * Post-fork, but before returning, this function calls a few other functions
 223  * that are generally useful if the child isn't planning to exec a new
 224  * process. */
 225 static pid_t
 226 fork_and_clean_up(void)
 227 {
 228     pid_t pid = xfork();
 229     if (pid > 0) {
 230         /* Running in parent process. */
 231         fatal_signal_fork();
 232     } else if (!pid) {
 233         /* Running in child process. */
 234         lockfile_postfork();
 235     }
 236     return pid;
 237 }
 238
 239 /* Forks, then:
 240  *
 241  *   - In the parent, waits for the child to signal that it has completed its
 242  *     startup sequence.  Then stores -1 in '*fdp' and returns the child's
 243  *     pid in '*child_pid' argument.
 244  *
 245  *   - In the child, stores a fd in '*fdp' and returns 0 through '*child_pid'
 246  *     argument.  The caller should pass the fd to fork_notify_startup() after
 247  *     it finishes its startup sequence.
 248  *
 249  * Returns 0 on success.  If something goes wrong and child process was not
 250  * able to signal its readiness by calling fork_notify_startup(), then this
 251  * function returns -1. However, even in case of failure it still sets child
 252  * process id in '*child_pid'. */
 253 static int
 254 fork_and_wait_for_startup(int *fdp, pid_t *child_pid)
 255 {
 256     int fds[2];
 257     pid_t pid;
 258     int ret = 0;
 259
 260     xpipe(fds);
 261
 262     pid = fork_and_clean_up();
 263     if (pid > 0) {
 264         /* Running in parent process. */
 265         size_t bytes_read;
 266         char c;
 267
 268         close(fds[1]);
 269         if (read_fully(fds[0], &c, 1, &bytes_read) != 0) {
 270             int retval;
 271             int status;
 272
 273             do {
 274                 retval = waitpid(pid, &status, 0);
 275             } while (retval == -1 && errno == EINTR);
 276
 277             if (retval == pid) {
 278                 if (WIFEXITED(status) && WEXITSTATUS(status)) {
 279                     /* Child exited with an error.  Convey the same error
 280                      * to our parent process as a courtesy. */
 281                     exit(WEXITSTATUS(status));
 282                 } else {
 283                     char *status_msg = process_status_msg(status);
 284                     VLOG_ERR("fork child died before signaling startup (%s)",
 285                              status_msg);
 286                     ret = -1;
 287                 }
 288             } else if (retval < 0) {
 289                 VLOG_FATAL("waitpid failed (%s)", ovs_strerror(errno));
 290             } else {
 291                 OVS_NOT_REACHED();
 292             }
 293         }
 294         close(fds[0]);
 295         *fdp = -1;
 296     } else if (!pid) {
 297         /* Running in child process. */
 298         close(fds[0]);
 299         *fdp = fds[1];
 300     }
 301     *child_pid = pid;
 302     return ret;
 303 }
 304
 305 static void
 306 fork_notify_startup(int fd)
 307 {
 308     if (fd != -1) {
 309         size_t bytes_written;
 310         int error;
 311
 312         error = write_fully(fd, "", 1, &bytes_written);
 313         if (error) {
 314             VLOG_FATAL("pipe write failed (%s)", ovs_strerror(error));
 315         }
 316
 317         close(fd);
 318     }
 319 }
 320
 321 static bool
 322 should_restart(int status)
 323 {
 324     if (WIFSIGNALED(status)) {
 325         static const int error_signals[] = {
 326             /* This list of signals is documented in daemon.man.  If you
 327              * change the list, update the documentation too. */
 328             SIGABRT, SIGALRM, SIGBUS, SIGFPE, SIGILL, SIGPIPE, SIGSEGV,
 329             SIGXCPU, SIGXFSZ
 330         };
 331
 332         size_t i;
 333
 334         for (i = 0; i < ARRAY_SIZE(error_signals); i++) {
 335             if (error_signals[i] == WTERMSIG(status)) {
 336                 return true;
 337             }
 338         }
 339     }
 340     return false;
 341 }
 342
 343 static void
 344 monitor_daemon(pid_t daemon_pid)
 345 {
 346     /* XXX Should log daemon's stderr output at startup time. */
 347     time_t last_restart;
 348     char *status_msg;
 349     int crashes;
 350     bool child_ready = true;
 351
 352     set_subprogram_name("monitor");
 353     status_msg = xstrdup("healthy");
 354     last_restart = TIME_MIN;
 355     crashes = 0;
 356     for (;;) {
 357         int retval;
 358         int status;
 359
 360         ovs_cmdl_proctitle_set("monitoring pid %lu (%s)",
 361                                (unsigned long int) daemon_pid, status_msg);
 362
 363         if (child_ready) {
 364             int error;
 365             do {
 366                 retval = waitpid(daemon_pid, &status, 0);
 367                 error = retval == -1 ? errno : 0;
 368             } while (error == EINTR);
 369             vlog_reopen_log_file();
 370             if (error) {
 371                 VLOG_FATAL("waitpid failed (%s)", ovs_strerror(error));
 372             }
 373         }
 374
 375         if (!child_ready || retval == daemon_pid) {
 376             char *s = process_status_msg(status);
 377             if (should_restart(status)) {
 378                 free(status_msg);
 379                 status_msg = xasprintf("%d crashes: pid %lu died, %s",
 380                                        ++crashes,
 381                                        (unsigned long int) daemon_pid, s);
 382                 free(s);
 383
 384                 if (WCOREDUMP(status)) {
 385                     /* Disable further core dumps to save disk space. */
 386                     struct rlimit r;
 387
 388                     r.rlim_cur = 0;
 389                     r.rlim_max = 0;
 390                     if (setrlimit(RLIMIT_CORE, &r) == -1) {
 391                         VLOG_WARN("failed to disable core dumps: %s",
 392                                   ovs_strerror(errno));
 393                     }
 394                 }
 395
 396                 /* Throttle restarts to no more than once every 10 seconds. */
 397                 if (time(NULL) < last_restart + 10) {
 398                     VLOG_WARN("%s, waiting until 10 seconds since last "
 399                               "restart", status_msg);
 400                     for (;;) {
 401                         time_t now = time(NULL);
 402                         time_t wakeup = last_restart + 10;
 403                         if (now >= wakeup) {
 404                             break;
 405                         }
 406                         xsleep(wakeup - now);
 407                     }
 408                 }
 409                 last_restart = time(NULL);
 410
 411                 VLOG_ERR("%s, restarting", status_msg);
 412                 child_ready = !fork_and_wait_for_startup(&daemonize_fd,
 413                                                          &daemon_pid);
 414                 if (child_ready && !daemon_pid) {
 415                     /* Child process needs to break out of monitoring
 416                      * loop. */
 417                     break;
 418                 }
 419             } else {
 420                 VLOG_INFO("pid %lu died, %s, exiting",
 421                           (unsigned long int) daemon_pid, s);
 422                 free(s);
 423                 exit(0);
 424             }
 425         }
 426     }
 427     free(status_msg);
 428
 429     /* Running in new daemon process. */
 430     ovs_cmdl_proctitle_restore();
 431     set_subprogram_name("");
 432 }
 433
 434 /* If daemonization is configured, then starts daemonization, by forking and
 435  * returning in the child process.  The parent process hangs around until the
 436  * child lets it know either that it completed startup successfully (by calling
 437  * daemon_complete()) or that it failed to start up (by exiting with a nonzero
 438  * exit code). */
 439 void
 440 daemonize_start(bool access_datapath)
 441 {
 442     assert_single_threaded();
 443     daemonize_fd = -1;
 444
 445     if (switch_user) {
 446         daemon_become_new_user__(access_datapath);
 447         switch_user = false;
 448     }
 449
 450     if (detach) {
 451         pid_t pid;
 452
 453         if (fork_and_wait_for_startup(&daemonize_fd, &pid)) {
 454             VLOG_FATAL("could not detach from foreground session");
 455         }
 456         if (pid > 0) {
 457             /* Running in parent process. */
 458             exit(0);
 459         }
 460
 461         /* Running in daemon or monitor process. */
 462         setsid();
 463     }
 464
 465     if (monitor) {
 466         int saved_daemonize_fd = daemonize_fd;
 467         pid_t daemon_pid;
 468
 469         if (fork_and_wait_for_startup(&daemonize_fd, &daemon_pid)) {
 470             VLOG_FATAL("could not initiate process monitoring");
 471         }
 472         if (daemon_pid > 0) {
 473             /* Running in monitor process. */
 474             fork_notify_startup(saved_daemonize_fd);
 475             if (detach) {
 476                 close_standard_fds();
 477             }
 478             monitor_daemon(daemon_pid);
 479         }
 480         /* Running in daemon process. */
 481     }
 482
 483     forbid_forking("running in daemon process");
 484
 485     if (pidfile) {
 486         make_pidfile();
 487     }
 488
 489     /* Make sure that the unixctl commands for vlog get registered in a
 490      * daemon, even before the first log message. */
 491     vlog_init();
 492 }
 493
 494 /* If daemonization is configured, then this function notifies the parent
 495  * process that the child process has completed startup successfully.  It also
 496  * call daemonize_post_detach().
 497  *
 498  * Calling this function more than once has no additional effect. */
 499 void
 500 daemonize_complete(void)
 501 {
 502     if (pidfile) {
 503         free(pidfile);
 504         pidfile = NULL;
 505     }
 506
 507     if (!detached) {
 508         detached = true;
 509
 510         fork_notify_startup(daemonize_fd);
 511         daemonize_fd = -1;
 512         daemonize_post_detach();
 513     }
 514 }
 515
 516 /* If daemonization is configured, then this function does traditional Unix
 517  * daemonization behavior: join a new session, chdir to the root (if not
 518  * disabled), and close the standard file descriptors.
 519  *
 520  * It only makes sense to call this function as part of an implementation of a
 521  * special daemon subprocess.  A normal daemon should just call
 522  * daemonize_complete(). */
 523 static void
 524 daemonize_post_detach(void)
 525 {
 526     if (detach) {
 527         if (chdir_) {
 528             ignore(chdir("/"));
 529         }
 530         close_standard_fds();
 531     }
 532 }
 533
 534 void
 535 daemon_usage(void)
 536 {
 537     printf(
 538         "\nDaemon options:\n"
 539         "  --detach                run in background as daemon\n"
 540         "  --monitor               creates a process to monitor this daemon\n"
 541         "  --user=username[:group] changes the effective daemon user:group\n"
 542         "  --no-chdir              do not chdir to '/'\n"
 543         "  --pidfile[=FILE]        create pidfile (default: %s/%s.pid)\n"
 544         "  --overwrite-pidfile     with --pidfile, start even if already "
 545                                    "running\n",
 546         ovs_rundir(), program_name);
 547 }
 548
 549 static int
 550 lock_pidfile__(FILE *file, int command, struct flock *lck)
 551 {
 552     int error;
 553
 554     lck->l_type = F_WRLCK;
 555     lck->l_whence = SEEK_SET;
 556     lck->l_start = 0;
 557     lck->l_len = 0;
 558     lck->l_pid = 0;
 559
 560     do {
 561         error = fcntl(fileno(file), command, lck) == -1 ? errno : 0;
 562     } while (error == EINTR);
 563     return error;
 564 }
 565
 566 static int
 567 lock_pidfile(FILE *file, int command)
 568 {
 569     struct flock lck;
 570
 571     return lock_pidfile__(file, command, &lck);
 572 }
 573
 574 static pid_t
 575 read_pidfile__(const char *pidfile_, bool delete_if_stale)
 576 {
 577     struct stat s, s2;
 578     struct flock lck;
 579     char line[128];
 580     FILE *file;
 581     int error;
 582
 583     if ((pidfile_ino || pidfile_dev)
 584         && !stat(pidfile_, &s)
 585         && s.st_ino == pidfile_ino && s.st_dev == pidfile_dev) {
 586         /* It's our own pidfile.  We can't afford to open it, because closing
 587          * *any* fd for a file that a process has locked also releases all the
 588          * locks on that file.
 589          *
 590          * Fortunately, we know the associated pid anyhow: */
 591         return getpid();
 592     }
 593
 594     file = fopen(pidfile_, "r+");
 595     if (!file) {
 596         if (errno == ENOENT && delete_if_stale) {
 597             return 0;
 598         }
 599         error = errno;
 600         VLOG_WARN("%s: open: %s", pidfile_, ovs_strerror(error));
 601         goto error;
 602     }
 603
 604     error = lock_pidfile__(file, F_GETLK, &lck);
 605     if (error) {
 606         VLOG_WARN("%s: fcntl: %s", pidfile_, ovs_strerror(error));
 607         goto error;
 608     }
 609     if (lck.l_type == F_UNLCK) {
 610         /* pidfile exists but it isn't locked by anyone.  We need to delete it
 611          * so that a new pidfile can go in its place.  But just calling
 612          * unlink(pidfile) makes a nasty race: what if someone else unlinks it
 613          * before we do and then replaces it by a valid pidfile?  We'd unlink
 614          * their valid pidfile.  We do a little dance to avoid the race, by
 615          * locking the invalid pidfile.  Only one process can have the invalid
 616          * pidfile locked, and only that process has the right to unlink it. */
 617         if (!delete_if_stale) {
 618             error = ESRCH;
 619             VLOG_DBG("%s: pid file is stale", pidfile_);
 620             goto error;
 621         }
 622
 623         /* Get the lock. */
 624         error = lock_pidfile(file, F_SETLK);
 625         if (error) {
 626             /* We lost a race with someone else doing the same thing. */
 627             VLOG_WARN("%s: lost race to lock pidfile", pidfile_);
 628             goto error;
 629         }
 630
 631         /* Is the file we have locked still named 'pidfile_'? */
 632         if (stat(pidfile_, &s) || fstat(fileno(file), &s2)
 633             || s.st_ino != s2.st_ino || s.st_dev != s2.st_dev) {
 634             /* No.  We lost a race with someone else who got the lock before
 635              * us, deleted the pidfile, and closed it (releasing the lock). */
 636             error = EALREADY;
 637             VLOG_WARN("%s: lost race to delete pidfile", pidfile_);
 638             goto error;
 639         }
 640
 641         /* We won the right to delete the stale pidfile. */
 642         if (unlink(pidfile_)) {
 643             error = errno;
 644             VLOG_WARN("%s: failed to delete stale pidfile (%s)",
 645                       pidfile_, ovs_strerror(error));
 646             goto error;
 647         }
 648         VLOG_DBG("%s: deleted stale pidfile", pidfile_);
 649         fclose(file);
 650         return 0;
 651     }
 652
 653     if (!fgets(line, sizeof line, file)) {
 654         if (ferror(file)) {
 655             error = errno;
 656             VLOG_WARN("%s: read: %s", pidfile_, ovs_strerror(error));
 657         } else {
 658             error = ESRCH;
 659             VLOG_WARN("%s: read: unexpected end of file", pidfile_);
 660         }
 661         goto error;
 662     }
 663
 664     if (lck.l_pid != strtoul(line, NULL, 10)) {
 665         /* The process that has the pidfile locked is not the process that
 666          * created it.  It must be stale, with the process that has it locked
 667          * preparing to delete it. */
 668         error = ESRCH;
 669         VLOG_WARN("%s: stale pidfile for pid %s being deleted by pid %ld",
 670                   pidfile_, line, (long int) lck.l_pid);
 671         goto error;
 672     }
 673
 674     fclose(file);
 675     return lck.l_pid;
 676
 677 error:
 678     if (file) {
 679         fclose(file);
 680     }
 681     return -error;
 682 }
 683
 684 /* Opens and reads a PID from 'pidfile_'.  Returns the positive PID if
 685  * successful, otherwise a negative errno value. */
 686 pid_t
 687 read_pidfile(const char *pidfile_)
 688 {
 689     return read_pidfile__(pidfile_, false);
 690 }
 691
 692 /* Checks whether a process with the given 'pidfile' is already running and,
 693  * if so, aborts.  If 'pidfile' is stale, deletes it. */
 694 static void
 695 check_already_running(void)
 696 {
 697     long int pid = read_pidfile__(pidfile, true);
 698     if (pid > 0) {
 699         VLOG_FATAL("%s: already running as pid %ld, aborting", pidfile, pid);
 700     } else if (pid < 0) {
 701         VLOG_FATAL("%s: pidfile check failed (%s), aborting",
 702                    pidfile, ovs_strerror(-pid));
 703     }
 704 }
 705
 706 \f
 707 /* stub functions for non-windows platform. */
 708
 709 void
 710 service_start(int *argc OVS_UNUSED, char **argv[] OVS_UNUSED)
 711 {
 712 }
 713
 714 void
 715 service_stop(void)
 716 {
 717 }
 718
 719 bool
 720 should_service_stop(void)
 721 {
 722     return false;
 723 }
 724
 725 \f
 726 static bool
 727 gid_matches(gid_t expected, gid_t value)
 728 {
 729     return expected == -1 || expected == value;
 730 }
 731
 732 static bool
 733 gid_verify(gid_t gid_)
 734 {
 735     gid_t r, e;
 736
 737     r = getgid();
 738     e = getegid();
 739     return (gid_matches(gid_, r) &&
 740             gid_matches(gid_, e));
 741 }
 742
 743 static void
 744 daemon_switch_group(gid_t gid_)
 745 {
 746     if ((setgid(gid_) == -1) || !gid_verify(gid_)) {
 747         VLOG_FATAL("%s: fail to switch group to gid as %d, aborting",
 748                    pidfile, gid_);
 749     }
 750 }
 751
 752 static bool
 753 uid_matches(uid_t expected, uid_t value)
 754 {
 755     return expected == -1 || expected == value;
 756 }
 757
 758 static bool
 759 uid_verify(const uid_t uid_)
 760 {
 761     uid_t r, e;
 762
 763     r = getuid();
 764     e = geteuid();
 765     return (uid_matches(uid_, r) &&
 766             uid_matches(uid_, e));
 767 }
 768
 769 static void
 770 daemon_switch_user(const uid_t uid_, const char *user_)
 771 {
 772     if ((setuid(uid_) == -1) || !uid_verify(uid_)) {
 773         VLOG_FATAL("%s: fail to switch user to %s, aborting",
 774                    pidfile, user_);
 775     }
 776 }
 777
 778 /* Use portable Unix APIs to switch uid:gid, when datapath
 779  * access is not required.  On Linux systems, all capabilities
 780  * will be dropped.  */
 781 static void
 782 daemon_become_new_user_unix(void)
 783 {
 784     /* "Setuid Demystified" by Hao Chen, etc outlines some caveats of
 785      * around unix system call setuid() and friends. This implementation
 786      * mostly follow the advice given by the paper.  The paper is
 787      * published in 2002, so things could have changed.  */
 788
 789     /* Change both real and effective uid and gid will permanently
 790      * drop the process' privilege.  "Setuid Demystified" suggested
 791      * that calling getuid() after each setuid() call to verify they
 792      * are actually set, because checking return code alone is not
 793      * sufficient.  */
 794     daemon_switch_group(gid);
 795     if (user && initgroups(user, gid) == -1) {
 796         VLOG_FATAL("%s: fail to add supplementary group gid %d, "
 797                    "aborting", pidfile, gid);
 798     }
 799     daemon_switch_user(uid, user);
 800 }
 801
 802 /* Linux specific implementation of daemon_become_new_user()
 803  * using libcap-ng.   */
 804 static void
 805 daemon_become_new_user_linux(bool access_datapath OVS_UNUSED)
 806 {
 807 #if defined __linux__ &&  HAVE_LIBCAPNG
 808     int ret;
 809
 810     ret = capng_get_caps_process();
 811
 812     if (!ret) {
 813         if (capng_have_capabilities(CAPNG_SELECT_CAPS) > CAPNG_NONE) {
 814             const capng_type_t cap_sets = CAPNG_EFFECTIVE|CAPNG_PERMITTED;
 815
 816             capng_clear(CAPNG_SELECT_BOTH);
 817
 818             ret = capng_update(CAPNG_ADD, cap_sets, CAP_IPC_LOCK)
 819                   || capng_update(CAPNG_ADD, cap_sets, CAP_NET_BIND_SERVICE);
 820
 821             if (access_datapath && !ret) {
 822                 ret = capng_update(CAPNG_ADD, cap_sets, CAP_NET_ADMIN)
 823                       || capng_update(CAPNG_ADD, cap_sets, CAP_NET_RAW)
 824                       || capng_update(CAPNG_ADD, cap_sets, CAP_NET_BROADCAST);
 825             }
 826         } else {
 827             ret = -1;
 828         }
 829     }
 830
 831     if (!ret) {
 832         /* CAPNG_INIT_SUPP_GRP will be a better choice than
 833          * CAPNG_DROP_SUPP_GRP. However this enum value is only defined
 834          * with libcap-ng higher than version 0.7.4, which is not wildly
 835          * available on many Linux distributions yet. Taking a more
 836          * conservative approach to make sure OVS behaves consistently.
 837          *
 838          * XXX We may change this for future OVS releases.
 839          */
 840         ret = capng_change_id(uid, gid, CAPNG_DROP_SUPP_GRP
 841                               | CAPNG_CLEAR_BOUNDING);
 842     }
 843
 844     if (ret) {
 845         VLOG_FATAL("%s: libcap-ng fail to switch to user and group "
 846                    "%d:%d, aborting", pidfile, uid, gid);
 847     }
 848 #endif
 849 }
 850
 851 static void
 852 daemon_become_new_user__(bool access_datapath)
 853 {
 854     /* If vlog file has been created, change its owner to the non-root user
 855      * as specifed by the --user option.  */
 856     vlog_change_owner_unix(uid, gid);
 857
 858     if (LINUX) {
 859         if (LIBCAPNG) {
 860             daemon_become_new_user_linux(access_datapath);
 861         } else {
 862             VLOG_FATAL("%s: fail to downgrade user using libcap-ng. "
 863                        "(libcap-ng is not configured at compile time), "
 864                        "aborting.", pidfile);
 865         }
 866     } else {
 867         daemon_become_new_user_unix();
 868     }
 869 }
 870
 871 /* Noramlly, user switch is embedded within daemonize_start().
 872  * However, there in case the user switch needs to be done
 873  * before daemonize_start(), the following API can be used.  */
 874 void
 875 daemon_become_new_user(bool access_datapath)
 876 {
 877     assert_single_threaded();
 878     if (switch_user) {
 879         daemon_become_new_user__(access_datapath);
 880         /* daemonize_start() should not switch user again. */
 881         switch_user = false;
 882     }
 883 }
 884
 885 /* Return the maximun suggested buffer size for both getpwname_r()
 886  * and getgrnam_r().
 887  *
 888  * This size may still not be big enough. in case getpwname_r()
 889  * and friends return ERANGE, a larger buffer should be supplied to
 890  * retry. (The man page did not specify the max size to stop at, we
 891  * will keep trying with doubling the buffer size for each round until
 892  * the size wrapps around size_t.  */
 893 static size_t
 894 get_sysconf_buffer_size(void)
 895 {
 896     size_t bufsize, pwd_bs = 0, grp_bs = 0;
 897     const size_t default_bufsize = 1024;
 898
 899     errno = 0;
 900     if ((pwd_bs = sysconf(_SC_GETPW_R_SIZE_MAX)) == -1) {
 901         if (errno) {
 902             VLOG_FATAL("%s: Read initial passwordd struct size "
 903                        "failed (%s), aborting. ", pidfile,
 904                        ovs_strerror(errno));
 905         }
 906     }
 907
 908     if ((grp_bs = sysconf(_SC_GETGR_R_SIZE_MAX)) == -1) {
 909         if (errno) {
 910             VLOG_FATAL("%s: Read initial group struct size "
 911                        "failed (%s), aborting. ", pidfile,
 912                        ovs_strerror(errno));
 913         }
 914     }
 915
 916     bufsize = MAX(pwd_bs, grp_bs);
 917     return bufsize ? bufsize : default_bufsize;
 918 }
 919
 920 /* Try to double the size of '*buf', return true
 921  * if successful, and '*sizep' will be updated with
 922  * the new size. Otherwise, return false.  */
 923 static bool
 924 enlarge_buffer(char **buf, size_t *sizep)
 925 {
 926     size_t newsize = *sizep * 2;
 927
 928     if (newsize > *sizep) {
 929         *buf = xrealloc(*buf, newsize);
 930         *sizep = newsize;
 931         return true;
 932     }
 933
 934     return false;
 935 }
 936
 937 /* Parse and sanity check user_spec.
 938  *
 939  * If successful, set global variables 'uid' and 'gid'
 940  * with the parsed results. Global variable 'user'
 941  * will be pointing to a string that stores the name
 942  * of the user to be switched into.
 943  *
 944  * Also set 'switch_to_new_user' to true, The actual
 945  * user switching is done as soon as daemonize_start()
 946  * is called. I/O access before calling daemonize_start()
 947  * will still be with root's credential.  */
 948 void
 949 daemon_set_new_user(const char *user_spec)
 950 {
 951     char *pos = strchr(user_spec, ':');
 952     size_t init_bufsize, bufsize;
 953
 954     init_bufsize = get_sysconf_buffer_size();
 955     uid = getuid();
 956     gid = getgid();
 957
 958     if (geteuid() || uid) {
 959         VLOG_FATAL("%s: only root can use --user option", pidfile);
 960     }
 961
 962     user_spec += strspn(user_spec, " \t\r\n");
 963     size_t len = pos ? pos - user_spec : strlen(user_spec);
 964     char *buf;
 965     struct passwd pwd, *res;
 966     int e;
 967
 968     bufsize = init_bufsize;
 969     buf = xmalloc(bufsize);
 970     if (len) {
 971         user = xmemdup0(user_spec, len);
 972
 973         while ((e = getpwnam_r(user, &pwd, buf, bufsize, &res)) == ERANGE) {
 974             if (!enlarge_buffer(&buf, &bufsize)) {
 975                 break;
 976             }
 977         }
 978
 979         if (e != 0) {
 980             VLOG_FATAL("%s: Failed to retrive user %s's uid (%s), aborting.",
 981                        pidfile, user, ovs_strerror(e));
 982         }
 983         if (res == NULL) {
 984             VLOG_FATAL("%s: user %s not found, aborting.", pidfile, user);
 985         }
 986     } else {
 987         /* User name is not specified, use current user.  */
 988         while ((e = getpwuid_r(uid, &pwd, buf, bufsize, &res)) == ERANGE) {
 989             if (!enlarge_buffer(&buf, &bufsize)) {
 990                 break;
 991             }
 992         }
 993
 994         if (e != 0) {
 995             VLOG_FATAL("%s: Failed to retrive current user's name "
 996                        "(%s), aborting.", pidfile, ovs_strerror(e));
 997         }
 998         user = xstrdup(pwd.pw_name);
 999     }
1000
1001     uid = pwd.pw_uid;
1002     gid = pwd.pw_gid;
1003     free(buf);
1004
1005     if (pos) {
1006         char *grpstr = pos + 1;
1007         grpstr += strspn(grpstr, " \t\r\n");
1008
1009         if (*grpstr) {
1010             struct group grp, *gres;
1011
1012             bufsize = init_bufsize;
1013             buf = xmalloc(bufsize);
1014             while ((e = getgrnam_r(grpstr, &grp, buf, bufsize, &gres))
1015                          == ERANGE) {
1016                 if (!enlarge_buffer(&buf, &bufsize)) {
1017                     break;
1018                 }
1019             }
1020
1021             if (e) {
1022                 VLOG_FATAL("%s: Failed to get group entry for %s, "
1023                            "(%s), aborting.", pidfile, grpstr,
1024                            ovs_strerror(e));
1025             }
1026             if (gres == NULL) {
1027                 VLOG_FATAL("%s: group %s not found, aborting.", pidfile,
1028                            grpstr);
1029             }
1030
1031             if (gid != grp.gr_gid) {
1032                 char **mem;
1033
1034                 for (mem = grp.gr_mem; *mem; ++mem) {
1035                     if (!strcmp(*mem, user)) {
1036                         break;
1037                     }
1038                 }
1039
1040                 if (!*mem) {
1041                     VLOG_FATAL("%s: Invalid --user option %s (user %s is "
1042                                "not in group %s), aborting.", pidfile,
1043                                user_spec, user, grpstr);
1044                 }
1045                 gid = grp.gr_gid;
1046             }
1047             free(buf);
1048         }
1049     }
1050
1051     switch_user = true;
1052 }