src/core/execute.c

   1 /***
   2   This file is part of systemd.
   3
   4   Copyright 2010 Lennart Poettering
   5
   6   systemd is free software; you can redistribute it and/or modify it
   7   under the terms of the GNU Lesser General Public License as published by
   8   the Free Software Foundation; either version 2.1 of the License, or
   9   (at your option) any later version.
  10
  11   systemd is distributed in the hope that it will be useful, but
  12   WITHOUT ANY WARRANTY; without even the implied warranty of
  13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14   Lesser General Public License for more details.
  15
  16   You should have received a copy of the GNU Lesser General Public License
  17   along with systemd; If not, see <http://www.gnu.org/licenses/>.
  18 ***/
  19
  20 #include <errno.h>
  21 #include <fcntl.h>
  22 #include <glob.h>
  23 #include <grp.h>
  24 #include <poll.h>
  25 #include <signal.h>
  26 #include <string.h>
  27 #include <sys/capability.h>
  28 #include <sys/eventfd.h>
  29 #include <sys/mman.h>
  30 #include <sys/personality.h>
  31 #include <sys/prctl.h>
  32 #include <sys/shm.h>
  33 #include <sys/socket.h>
  34 #include <sys/stat.h>
  35 #include <sys/types.h>
  36 #include <sys/un.h>
  37 #include <unistd.h>
  38 #include <utmpx.h>
  39
  40 #ifdef HAVE_PAM
  41 #include <security/pam_appl.h>
  42 #endif
  43
  44 #ifdef HAVE_SELINUX
  45 #include <selinux/selinux.h>
  46 #endif
  47
  48 #ifdef HAVE_SECCOMP
  49 #include <seccomp.h>
  50 #endif
  51
  52 #ifdef HAVE_APPARMOR
  53 #include <sys/apparmor.h>
  54 #endif
  55
  56 #include "sd-messages.h"
  57
  58 #include "af-list.h"
  59 #include "alloc-util.h"
  60 #ifdef HAVE_APPARMOR
  61 #include "apparmor-util.h"
  62 #endif
  63 #include "async.h"
  64 #include "barrier.h"
  65 #include "cap-list.h"
  66 #include "capability-util.h"
  67 #include "def.h"
  68 #include "env-util.h"
  69 #include "errno-list.h"
  70 #include "execute.h"
  71 #include "exit-status.h"
  72 #include "fd-util.h"
  73 #include "fileio.h"
  74 #include "format-util.h"
  75 #include "fs-util.h"
  76 #include "glob-util.h"
  77 #include "io-util.h"
  78 #include "ioprio.h"
  79 #include "log.h"
  80 #include "macro.h"
  81 #include "missing.h"
  82 #include "mkdir.h"
  83 #include "namespace.h"
  84 #include "parse-util.h"
  85 #include "path-util.h"
  86 #include "process-util.h"
  87 #include "rlimit-util.h"
  88 #include "rm-rf.h"
  89 #ifdef HAVE_SECCOMP
  90 #include "seccomp-util.h"
  91 #endif
  92 #include "securebits.h"
  93 #include "selinux-util.h"
  94 #include "signal-util.h"
  95 #include "smack-util.h"
  96 #include "special.h"
  97 #include "string-table.h"
  98 #include "string-util.h"
  99 #include "strv.h"
 100 #include "syslog-util.h"
 101 #include "terminal-util.h"
 102 #include "unit.h"
 103 #include "user-util.h"
 104 #include "util.h"
 105 #include "utmp-wtmp.h"
 106
 107 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
 108 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
 109
 110 /* This assumes there is a 'tty' group */
 111 #define TTY_MODE 0620
 112
 113 #define SNDBUF_SIZE (8*1024*1024)
 114
 115 static int shift_fds(int fds[], unsigned n_fds) {
 116         int start, restart_from;
 117
 118         if (n_fds <= 0)
 119                 return 0;
 120
 121         /* Modifies the fds array! (sorts it) */
 122
 123         assert(fds);
 124
 125         start = 0;
 126         for (;;) {
 127                 int i;
 128
 129                 restart_from = -1;
 130
 131                 for (i = start; i < (int) n_fds; i++) {
 132                         int nfd;
 133
 134                         /* Already at right index? */
 135                         if (fds[i] == i+3)
 136                                 continue;
 137
 138                         nfd = fcntl(fds[i], F_DUPFD, i + 3);
 139                         if (nfd < 0)
 140                                 return -errno;
 141
 142                         safe_close(fds[i]);
 143                         fds[i] = nfd;
 144
 145                         /* Hmm, the fd we wanted isn't free? Then
 146                          * let's remember that and try again from here */
 147                         if (nfd != i+3 && restart_from < 0)
 148                                 restart_from = i;
 149                 }
 150
 151                 if (restart_from < 0)
 152                         break;
 153
 154                 start = restart_from;
 155         }
 156
 157         return 0;
 158 }
 159
 160 static int flags_fds(const int fds[], unsigned n_storage_fds, unsigned n_socket_fds, bool nonblock) {
 161         unsigned i, n_fds;
 162         int r;
 163
 164         n_fds = n_storage_fds + n_socket_fds;
 165         if (n_fds <= 0)
 166                 return 0;
 167
 168         assert(fds);
 169
 170         /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags.
 171          * O_NONBLOCK only applies to socket activation though. */
 172
 173         for (i = 0; i < n_fds; i++) {
 174
 175                 if (i < n_socket_fds) {
 176                         r = fd_nonblock(fds[i], nonblock);
 177                         if (r < 0)
 178                                 return r;
 179                 }
 180
 181                 /* We unconditionally drop FD_CLOEXEC from the fds,
 182                  * since after all we want to pass these fds to our
 183                  * children */
 184
 185                 r = fd_cloexec(fds[i], false);
 186                 if (r < 0)
 187                         return r;
 188         }
 189
 190         return 0;
 191 }
 192
 193 static const char *exec_context_tty_path(const ExecContext *context) {
 194         assert(context);
 195
 196         if (context->stdio_as_fds)
 197                 return NULL;
 198
 199         if (context->tty_path)
 200                 return context->tty_path;
 201
 202         return "/dev/console";
 203 }
 204
 205 static void exec_context_tty_reset(const ExecContext *context, const ExecParameters *p) {
 206         const char *path;
 207
 208         assert(context);
 209
 210         path = exec_context_tty_path(context);
 211
 212         if (context->tty_vhangup) {
 213                 if (p && p->stdin_fd >= 0)
 214                         (void) terminal_vhangup_fd(p->stdin_fd);
 215                 else if (path)
 216                         (void) terminal_vhangup(path);
 217         }
 218
 219         if (context->tty_reset) {
 220                 if (p && p->stdin_fd >= 0)
 221                         (void) reset_terminal_fd(p->stdin_fd, true);
 222                 else if (path)
 223                         (void) reset_terminal(path);
 224         }
 225
 226         if (context->tty_vt_disallocate && path)
 227                 (void) vt_disallocate(path);
 228 }
 229
 230 static bool is_terminal_input(ExecInput i) {
 231         return IN_SET(i,
 232                       EXEC_INPUT_TTY,
 233                       EXEC_INPUT_TTY_FORCE,
 234                       EXEC_INPUT_TTY_FAIL);
 235 }
 236
 237 static bool is_terminal_output(ExecOutput o) {
 238         return IN_SET(o,
 239                       EXEC_OUTPUT_TTY,
 240                       EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
 241                       EXEC_OUTPUT_KMSG_AND_CONSOLE,
 242                       EXEC_OUTPUT_JOURNAL_AND_CONSOLE);
 243 }
 244
 245 static bool exec_context_needs_term(const ExecContext *c) {
 246         assert(c);
 247
 248         /* Return true if the execution context suggests we should set $TERM to something useful. */
 249
 250         if (is_terminal_input(c->std_input))
 251                 return true;
 252
 253         if (is_terminal_output(c->std_output))
 254                 return true;
 255
 256         if (is_terminal_output(c->std_error))
 257                 return true;
 258
 259         return !!c->tty_path;
 260 }
 261
 262 static int open_null_as(int flags, int nfd) {
 263         int fd, r;
 264
 265         assert(nfd >= 0);
 266
 267         fd = open("/dev/null", flags|O_NOCTTY);
 268         if (fd < 0)
 269                 return -errno;
 270
 271         if (fd != nfd) {
 272                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
 273                 safe_close(fd);
 274         } else
 275                 r = nfd;
 276
 277         return r;
 278 }
 279
 280 static int connect_journal_socket(int fd, uid_t uid, gid_t gid) {
 281         union sockaddr_union sa = {
 282                 .un.sun_family = AF_UNIX,
 283                 .un.sun_path = "/run/systemd/journal/stdout",
 284         };
 285         uid_t olduid = UID_INVALID;
 286         gid_t oldgid = GID_INVALID;
 287         int r;
 288
 289         if (gid != GID_INVALID) {
 290                 oldgid = getgid();
 291
 292                 r = setegid(gid);
 293                 if (r < 0)
 294                         return -errno;
 295         }
 296
 297         if (uid != UID_INVALID) {
 298                 olduid = getuid();
 299
 300                 r = seteuid(uid);
 301                 if (r < 0) {
 302                         r = -errno;
 303                         goto restore_gid;
 304                 }
 305         }
 306
 307         r = connect(fd, &sa.sa, SOCKADDR_UN_LEN(sa.un));
 308         if (r < 0)
 309                 r = -errno;
 310
 311         /* If we fail to restore the uid or gid, things will likely
 312            fail later on. This should only happen if an LSM interferes. */
 313
 314         if (uid != UID_INVALID)
 315                 (void) seteuid(olduid);
 316
 317  restore_gid:
 318         if (gid != GID_INVALID)
 319                 (void) setegid(oldgid);
 320
 321         return r;
 322 }
 323
 324 static int connect_logger_as(
 325                 Unit *unit,
 326                 const ExecContext *context,
 327                 ExecOutput output,
 328                 const char *ident,
 329                 int nfd,
 330                 uid_t uid,
 331                 gid_t gid) {
 332
 333         int fd, r;
 334
 335         assert(context);
 336         assert(output < _EXEC_OUTPUT_MAX);
 337         assert(ident);
 338         assert(nfd >= 0);
 339
 340         fd = socket(AF_UNIX, SOCK_STREAM, 0);
 341         if (fd < 0)
 342                 return -errno;
 343
 344         r = connect_journal_socket(fd, uid, gid);
 345         if (r < 0)
 346                 return r;
 347
 348         if (shutdown(fd, SHUT_RD) < 0) {
 349                 safe_close(fd);
 350                 return -errno;
 351         }
 352
 353         (void) fd_inc_sndbuf(fd, SNDBUF_SIZE);
 354
 355         dprintf(fd,
 356                 "%s\n"
 357                 "%s\n"
 358                 "%i\n"
 359                 "%i\n"
 360                 "%i\n"
 361                 "%i\n"
 362                 "%i\n",
 363                 context->syslog_identifier ? context->syslog_identifier : ident,
 364                 unit->id,
 365                 context->syslog_priority,
 366                 !!context->syslog_level_prefix,
 367                 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
 368                 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
 369                 is_terminal_output(output));
 370
 371         if (fd == nfd)
 372                 return nfd;
 373
 374         r = dup2(fd, nfd) < 0 ? -errno : nfd;
 375         safe_close(fd);
 376
 377         return r;
 378 }
 379 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
 380         int fd, r;
 381
 382         assert(path);
 383         assert(nfd >= 0);
 384
 385         fd = open_terminal(path, mode | O_NOCTTY);
 386         if (fd < 0)
 387                 return fd;
 388
 389         if (fd != nfd) {
 390                 r = dup2(fd, nfd) < 0 ? -errno : nfd;
 391                 safe_close(fd);
 392         } else
 393                 r = nfd;
 394
 395         return r;
 396 }
 397
 398 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
 399
 400         if (is_terminal_input(std_input) && !apply_tty_stdin)
 401                 return EXEC_INPUT_NULL;
 402
 403         if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
 404                 return EXEC_INPUT_NULL;
 405
 406         return std_input;
 407 }
 408
 409 static int fixup_output(ExecOutput std_output, int socket_fd) {
 410
 411         if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
 412                 return EXEC_OUTPUT_INHERIT;
 413
 414         return std_output;
 415 }
 416
 417 static int setup_input(
 418                 const ExecContext *context,
 419                 const ExecParameters *params,
 420                 int socket_fd,
 421                 int named_iofds[3]) {
 422
 423         ExecInput i;
 424
 425         assert(context);
 426         assert(params);
 427
 428         if (params->stdin_fd >= 0) {
 429                 if (dup2(params->stdin_fd, STDIN_FILENO) < 0)
 430                         return -errno;
 431
 432                 /* Try to make this the controlling tty, if it is a tty, and reset it */
 433                 (void) ioctl(STDIN_FILENO, TIOCSCTTY, context->std_input == EXEC_INPUT_TTY_FORCE);
 434                 (void) reset_terminal_fd(STDIN_FILENO, true);
 435
 436                 return STDIN_FILENO;
 437         }
 438
 439         i = fixup_input(context->std_input, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
 440
 441         switch (i) {
 442
 443         case EXEC_INPUT_NULL:
 444                 return open_null_as(O_RDONLY, STDIN_FILENO);
 445
 446         case EXEC_INPUT_TTY:
 447         case EXEC_INPUT_TTY_FORCE:
 448         case EXEC_INPUT_TTY_FAIL: {
 449                 int fd, r;
 450
 451                 fd = acquire_terminal(exec_context_tty_path(context),
 452                                       i == EXEC_INPUT_TTY_FAIL,
 453                                       i == EXEC_INPUT_TTY_FORCE,
 454                                       false,
 455                                       USEC_INFINITY);
 456                 if (fd < 0)
 457                         return fd;
 458
 459                 if (fd != STDIN_FILENO) {
 460                         r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
 461                         safe_close(fd);
 462                 } else
 463                         r = STDIN_FILENO;
 464
 465                 return r;
 466         }
 467
 468         case EXEC_INPUT_SOCKET:
 469                 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
 470
 471         case EXEC_INPUT_NAMED_FD:
 472                 (void) fd_nonblock(named_iofds[STDIN_FILENO], false);
 473                 return dup2(named_iofds[STDIN_FILENO], STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
 474
 475         default:
 476                 assert_not_reached("Unknown input type");
 477         }
 478 }
 479
 480 static int setup_output(
 481                 Unit *unit,
 482                 const ExecContext *context,
 483                 const ExecParameters *params,
 484                 int fileno,
 485                 int socket_fd,
 486                 int named_iofds[3],
 487                 const char *ident,
 488                 uid_t uid,
 489                 gid_t gid,
 490                 dev_t *journal_stream_dev,
 491                 ino_t *journal_stream_ino) {
 492
 493         ExecOutput o;
 494         ExecInput i;
 495         int r;
 496
 497         assert(unit);
 498         assert(context);
 499         assert(params);
 500         assert(ident);
 501         assert(journal_stream_dev);
 502         assert(journal_stream_ino);
 503
 504         if (fileno == STDOUT_FILENO && params->stdout_fd >= 0) {
 505
 506                 if (dup2(params->stdout_fd, STDOUT_FILENO) < 0)
 507                         return -errno;
 508
 509                 return STDOUT_FILENO;
 510         }
 511
 512         if (fileno == STDERR_FILENO && params->stderr_fd >= 0) {
 513                 if (dup2(params->stderr_fd, STDERR_FILENO) < 0)
 514                         return -errno;
 515
 516                 return STDERR_FILENO;
 517         }
 518
 519         i = fixup_input(context->std_input, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
 520         o = fixup_output(context->std_output, socket_fd);
 521
 522         if (fileno == STDERR_FILENO) {
 523                 ExecOutput e;
 524                 e = fixup_output(context->std_error, socket_fd);
 525
 526                 /* This expects the input and output are already set up */
 527
 528                 /* Don't change the stderr file descriptor if we inherit all
 529                  * the way and are not on a tty */
 530                 if (e == EXEC_OUTPUT_INHERIT &&
 531                     o == EXEC_OUTPUT_INHERIT &&
 532                     i == EXEC_INPUT_NULL &&
 533                     !is_terminal_input(context->std_input) &&
 534                     getppid () != 1)
 535                         return fileno;
 536
 537                 /* Duplicate from stdout if possible */
 538                 if ((e == o && e != EXEC_OUTPUT_NAMED_FD) || e == EXEC_OUTPUT_INHERIT)
 539                         return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
 540
 541                 o = e;
 542
 543         } else if (o == EXEC_OUTPUT_INHERIT) {
 544                 /* If input got downgraded, inherit the original value */
 545                 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
 546                         return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
 547
 548                 /* If the input is connected to anything that's not a /dev/null, inherit that... */
 549                 if (i != EXEC_INPUT_NULL)
 550                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
 551
 552                 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
 553                 if (getppid() != 1)
 554                         return fileno;
 555
 556                 /* We need to open /dev/null here anew, to get the right access mode. */
 557                 return open_null_as(O_WRONLY, fileno);
 558         }
 559
 560         switch (o) {
 561
 562         case EXEC_OUTPUT_NULL:
 563                 return open_null_as(O_WRONLY, fileno);
 564
 565         case EXEC_OUTPUT_TTY:
 566                 if (is_terminal_input(i))
 567                         return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
 568
 569                 /* We don't reset the terminal if this is just about output */
 570                 return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
 571
 572         case EXEC_OUTPUT_SYSLOG:
 573         case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
 574         case EXEC_OUTPUT_KMSG:
 575         case EXEC_OUTPUT_KMSG_AND_CONSOLE:
 576         case EXEC_OUTPUT_JOURNAL:
 577         case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
 578                 r = connect_logger_as(unit, context, o, ident, fileno, uid, gid);
 579                 if (r < 0) {
 580                         log_unit_error_errno(unit, r, "Failed to connect %s to the journal socket, ignoring: %m", fileno == STDOUT_FILENO ? "stdout" : "stderr");
 581                         r = open_null_as(O_WRONLY, fileno);
 582                 } else {
 583                         struct stat st;
 584
 585                         /* If we connected this fd to the journal via a stream, patch the device/inode into the passed
 586                          * parameters, but only then. This is useful so that we can set $JOURNAL_STREAM that permits
 587                          * services to detect whether they are connected to the journal or not. */
 588
 589                         if (fstat(fileno, &st) >= 0) {
 590                                 *journal_stream_dev = st.st_dev;
 591                                 *journal_stream_ino = st.st_ino;
 592                         }
 593                 }
 594                 return r;
 595
 596         case EXEC_OUTPUT_SOCKET:
 597                 assert(socket_fd >= 0);
 598                 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
 599
 600         case EXEC_OUTPUT_NAMED_FD:
 601                 (void) fd_nonblock(named_iofds[fileno], false);
 602                 return dup2(named_iofds[fileno], fileno) < 0 ? -errno : fileno;
 603
 604         default:
 605                 assert_not_reached("Unknown error type");
 606         }
 607 }
 608
 609 static int chown_terminal(int fd, uid_t uid) {
 610         struct stat st;
 611
 612         assert(fd >= 0);
 613
 614         /* Before we chown/chmod the TTY, let's ensure this is actually a tty */
 615         if (isatty(fd) < 1)
 616                 return 0;
 617
 618         /* This might fail. What matters are the results. */
 619         (void) fchown(fd, uid, -1);
 620         (void) fchmod(fd, TTY_MODE);
 621
 622         if (fstat(fd, &st) < 0)
 623                 return -errno;
 624
 625         if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
 626                 return -EPERM;
 627
 628         return 0;
 629 }
 630
 631 static int setup_confirm_stdio(const char *vc, int *_saved_stdin, int *_saved_stdout) {
 632         _cleanup_close_ int fd = -1, saved_stdin = -1, saved_stdout = -1;
 633         int r;
 634
 635         assert(_saved_stdin);
 636         assert(_saved_stdout);
 637
 638         saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
 639         if (saved_stdin < 0)
 640                 return -errno;
 641
 642         saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
 643         if (saved_stdout < 0)
 644                 return -errno;
 645
 646         fd = acquire_terminal(vc, false, false, false, DEFAULT_CONFIRM_USEC);
 647         if (fd < 0)
 648                 return fd;
 649
 650         r = chown_terminal(fd, getuid());
 651         if (r < 0)
 652                 return r;
 653
 654         r = reset_terminal_fd(fd, true);
 655         if (r < 0)
 656                 return r;
 657
 658         if (dup2(fd, STDIN_FILENO) < 0)
 659                 return -errno;
 660
 661         if (dup2(fd, STDOUT_FILENO) < 0)
 662                 return -errno;
 663
 664         if (fd >= 2)
 665                 safe_close(fd);
 666         fd = -1;
 667
 668         *_saved_stdin = saved_stdin;
 669         *_saved_stdout = saved_stdout;
 670
 671         saved_stdin = saved_stdout = -1;
 672
 673         return 0;
 674 }
 675
 676 static void write_confirm_error_fd(int err, int fd, const Unit *u) {
 677         assert(err < 0);
 678
 679         if (err == -ETIMEDOUT)
 680                 dprintf(fd, "Confirmation question timed out for %s, assuming positive response.\n", u->id);
 681         else {
 682                 errno = -err;
 683                 dprintf(fd, "Couldn't ask confirmation for %s: %m, assuming positive response.\n", u->id);
 684         }
 685 }
 686
 687 static void write_confirm_error(int err, const char *vc, const Unit *u) {
 688         _cleanup_close_ int fd = -1;
 689
 690         assert(vc);
 691
 692         fd = open_terminal(vc, O_WRONLY|O_NOCTTY|O_CLOEXEC);
 693         if (fd < 0)
 694                 return;
 695
 696         write_confirm_error_fd(err, fd, u);
 697 }
 698
 699 static int restore_confirm_stdio(int *saved_stdin, int *saved_stdout) {
 700         int r = 0;
 701
 702         assert(saved_stdin);
 703         assert(saved_stdout);
 704
 705         release_terminal();
 706
 707         if (*saved_stdin >= 0)
 708                 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
 709                         r = -errno;
 710
 711         if (*saved_stdout >= 0)
 712                 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
 713                         r = -errno;
 714
 715         *saved_stdin = safe_close(*saved_stdin);
 716         *saved_stdout = safe_close(*saved_stdout);
 717
 718         return r;
 719 }
 720
 721 enum {
 722         CONFIRM_PRETEND_FAILURE = -1,
 723         CONFIRM_PRETEND_SUCCESS =  0,
 724         CONFIRM_EXECUTE = 1,
 725 };
 726
 727 static int ask_for_confirmation(const char *vc, Unit *u, const char *cmdline) {
 728         int saved_stdout = -1, saved_stdin = -1, r;
 729         _cleanup_free_ char *e = NULL;
 730         char c;
 731
 732         /* For any internal errors, assume a positive response. */
 733         r = setup_confirm_stdio(vc, &saved_stdin, &saved_stdout);
 734         if (r < 0) {
 735                 write_confirm_error(r, vc, u);
 736                 return CONFIRM_EXECUTE;
 737         }
 738
 739         /* confirm_spawn might have been disabled while we were sleeping. */
 740         if (manager_is_confirm_spawn_disabled(u->manager)) {
 741                 r = 1;
 742                 goto restore_stdio;
 743         }
 744
 745         e = ellipsize(cmdline, 60, 100);
 746         if (!e) {
 747                 log_oom();
 748                 r = CONFIRM_EXECUTE;
 749                 goto restore_stdio;
 750         }
 751
 752         for (;;) {
 753                 r = ask_char(&c, "yfshiDjcn", "Execute %s? [y, f, s – h for help] ", e);
 754                 if (r < 0) {
 755                         write_confirm_error_fd(r, STDOUT_FILENO, u);
 756                         r = CONFIRM_EXECUTE;
 757                         goto restore_stdio;
 758                 }
 759
 760                 switch (c) {
 761                 case 'c':
 762                         printf("Resuming normal execution.\n");
 763                         manager_disable_confirm_spawn();
 764                         r = 1;
 765                         break;
 766                 case 'D':
 767                         unit_dump(u, stdout, "  ");
 768                         continue; /* ask again */
 769                 case 'f':
 770                         printf("Failing execution.\n");
 771                         r = CONFIRM_PRETEND_FAILURE;
 772                         break;
 773                 case 'h':
 774                         printf("  c - continue, proceed without asking anymore\n"
 775                                "  D - dump, show the state of the unit\n"
 776                                "  f - fail, don't execute the command and pretend it failed\n"
 777                                "  h - help\n"
 778                                "  i - info, show a short summary of the unit\n"
 779                                "  j - jobs, show jobs that are in progress\n"
 780                                "  s - skip, don't execute the command and pretend it succeeded\n"
 781                                "  y - yes, execute the command\n");
 782                         continue; /* ask again */
 783                 case 'i':
 784                         printf("  Description: %s\n"
 785                                "  Unit:        %s\n"
 786                                "  Command:     %s\n",
 787                                u->id, u->description, cmdline);
 788                         continue; /* ask again */
 789                 case 'j':
 790                         manager_dump_jobs(u->manager, stdout, "  ");
 791                         continue; /* ask again */
 792                 case 'n':
 793                         /* 'n' was removed in favor of 'f'. */
 794                         printf("Didn't understand 'n', did you mean 'f'?\n");
 795                         continue; /* ask again */
 796                 case 's':
 797                         printf("Skipping execution.\n");
 798                         r = CONFIRM_PRETEND_SUCCESS;
 799                         break;
 800                 case 'y':
 801                         r = CONFIRM_EXECUTE;
 802                         break;
 803                 default:
 804                         assert_not_reached("Unhandled choice");
 805                 }
 806                 break;
 807         }
 808
 809 restore_stdio:
 810         restore_confirm_stdio(&saved_stdin, &saved_stdout);
 811         return r;
 812 }
 813
 814 static int get_fixed_user(const ExecContext *c, const char **user,
 815                           uid_t *uid, gid_t *gid,
 816                           const char **home, const char **shell) {
 817         int r;
 818         const char *name;
 819
 820         assert(c);
 821
 822         if (!c->user)
 823                 return 0;
 824
 825         /* Note that we don't set $HOME or $SHELL if they are not particularly enlightening anyway
 826          * (i.e. are "/" or "/bin/nologin"). */
 827
 828         name = c->user;
 829         r = get_user_creds_clean(&name, uid, gid, home, shell);
 830         if (r < 0)
 831                 return r;
 832
 833         *user = name;
 834         return 0;
 835 }
 836
 837 static int get_fixed_group(const ExecContext *c, const char **group, gid_t *gid) {
 838         int r;
 839         const char *name;
 840
 841         assert(c);
 842
 843         if (!c->group)
 844                 return 0;
 845
 846         name = c->group;
 847         r = get_group_creds(&name, gid);
 848         if (r < 0)
 849                 return r;
 850
 851         *group = name;
 852         return 0;
 853 }
 854
 855 static int get_supplementary_groups(const ExecContext *c, const char *user,
 856                                     const char *group, gid_t gid,
 857                                     gid_t **supplementary_gids, int *ngids) {
 858         char **i;
 859         int r, k = 0;
 860         int ngroups_max;
 861         bool keep_groups = false;
 862         gid_t *groups = NULL;
 863         _cleanup_free_ gid_t *l_gids = NULL;
 864
 865         assert(c);
 866
 867         /*
 868          * If user is given, then lookup GID and supplementary groups list.
 869          * We avoid NSS lookups for gid=0. Also we have to initialize groups
 870          * here and as early as possible so we keep the list of supplementary
 871          * groups of the caller.
 872          */
 873         if (user && gid_is_valid(gid) && gid != 0) {
 874                 /* First step, initialize groups from /etc/groups */
 875                 if (initgroups(user, gid) < 0)
 876                         return -errno;
 877
 878                 keep_groups = true;
 879         }
 880
 881         if (!c->supplementary_groups)
 882                 return 0;
 883
 884         /*
 885          * If SupplementaryGroups= was passed then NGROUPS_MAX has to
 886          * be positive, otherwise fail.
 887          */
 888         errno = 0;
 889         ngroups_max = (int) sysconf(_SC_NGROUPS_MAX);
 890         if (ngroups_max <= 0) {
 891                 if (errno > 0)
 892                         return -errno;
 893                 else
 894                         return -EOPNOTSUPP; /* For all other values */
 895         }
 896
 897         l_gids = new(gid_t, ngroups_max);
 898         if (!l_gids)
 899                 return -ENOMEM;
 900
 901         if (keep_groups) {
 902                 /*
 903                  * Lookup the list of groups that the user belongs to, we
 904                  * avoid NSS lookups here too for gid=0.
 905                  */
 906                 k = ngroups_max;
 907                 if (getgrouplist(user, gid, l_gids, &k) < 0)
 908                         return -EINVAL;
 909         } else
 910                 k = 0;
 911
 912         STRV_FOREACH(i, c->supplementary_groups) {
 913                 const char *g;
 914
 915                 if (k >= ngroups_max)
 916                         return -E2BIG;
 917
 918                 g = *i;
 919                 r = get_group_creds(&g, l_gids+k);
 920                 if (r < 0)
 921                         return r;
 922
 923                 k++;
 924         }
 925
 926         /*
 927          * Sets ngids to zero to drop all supplementary groups, happens
 928          * when we are under root and SupplementaryGroups= is empty.
 929          */
 930         if (k == 0) {
 931                 *ngids = 0;
 932                 return 0;
 933         }
 934
 935         /* Otherwise get the final list of supplementary groups */
 936         groups = memdup(l_gids, sizeof(gid_t) * k);
 937         if (!groups)
 938                 return -ENOMEM;
 939
 940         *supplementary_gids = groups;
 941         *ngids = k;
 942
 943         groups = NULL;
 944
 945         return 0;
 946 }
 947
 948 static int enforce_groups(const ExecContext *context, gid_t gid,
 949                           gid_t *supplementary_gids, int ngids) {
 950         int r;
 951
 952         assert(context);
 953
 954         /* Handle SupplementaryGroups= even if it is empty */
 955         if (context->supplementary_groups) {
 956                 r = maybe_setgroups(ngids, supplementary_gids);
 957                 if (r < 0)
 958                         return r;
 959         }
 960
 961         if (gid_is_valid(gid)) {
 962                 /* Then set our gids */
 963                 if (setresgid(gid, gid, gid) < 0)
 964                         return -errno;
 965         }
 966
 967         return 0;
 968 }
 969
 970 static int enforce_user(const ExecContext *context, uid_t uid) {
 971         assert(context);
 972
 973         if (!uid_is_valid(uid))
 974                 return 0;
 975
 976         /* Sets (but doesn't look up) the uid and make sure we keep the
 977          * capabilities while doing so. */
 978
 979         if (context->capability_ambient_set != 0) {
 980
 981                 /* First step: If we need to keep capabilities but
 982                  * drop privileges we need to make sure we keep our
 983                  * caps, while we drop privileges. */
 984                 if (uid != 0) {
 985                         int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
 986
 987                         if (prctl(PR_GET_SECUREBITS) != sb)
 988                                 if (prctl(PR_SET_SECUREBITS, sb) < 0)
 989                                         return -errno;
 990                 }
 991         }
 992
 993         /* Second step: actually set the uids */
 994         if (setresuid(uid, uid, uid) < 0)
 995                 return -errno;
 996
 997         /* At this point we should have all necessary capabilities but
 998            are otherwise a normal user. However, the caps might got
 999            corrupted due to the setresuid() so we need clean them up
1000            later. This is done outside of this call. */
1001
1002         return 0;
1003 }
1004
1005 #ifdef HAVE_PAM
1006
1007 static int null_conv(
1008                 int num_msg,
1009                 const struct pam_message **msg,
1010                 struct pam_response **resp,
1011                 void *appdata_ptr) {
1012
1013         /* We don't support conversations */
1014
1015         return PAM_CONV_ERR;
1016 }
1017
1018 #endif
1019
1020 static int setup_pam(
1021                 const char *name,
1022                 const char *user,
1023                 uid_t uid,
1024                 gid_t gid,
1025                 const char *tty,
1026                 char ***env,
1027                 int fds[], unsigned n_fds) {
1028
1029 #ifdef HAVE_PAM
1030
1031         static const struct pam_conv conv = {
1032                 .conv = null_conv,
1033                 .appdata_ptr = NULL
1034         };
1035
1036         _cleanup_(barrier_destroy) Barrier barrier = BARRIER_NULL;
1037         pam_handle_t *handle = NULL;
1038         sigset_t old_ss;
1039         int pam_code = PAM_SUCCESS, r;
1040         char **nv, **e = NULL;
1041         bool close_session = false;
1042         pid_t pam_pid = 0, parent_pid;
1043         int flags = 0;
1044
1045         assert(name);
1046         assert(user);
1047         assert(env);
1048
1049         /* We set up PAM in the parent process, then fork. The child
1050          * will then stay around until killed via PR_GET_PDEATHSIG or
1051          * systemd via the cgroup logic. It will then remove the PAM
1052          * session again. The parent process will exec() the actual
1053          * daemon. We do things this way to ensure that the main PID
1054          * of the daemon is the one we initially fork()ed. */
1055
1056         r = barrier_create(&barrier);
1057         if (r < 0)
1058                 goto fail;
1059
1060         if (log_get_max_level() < LOG_DEBUG)
1061                 flags |= PAM_SILENT;
1062
1063         pam_code = pam_start(name, user, &conv, &handle);
1064         if (pam_code != PAM_SUCCESS) {
1065                 handle = NULL;
1066                 goto fail;
1067         }
1068
1069         if (tty) {
1070                 pam_code = pam_set_item(handle, PAM_TTY, tty);
1071                 if (pam_code != PAM_SUCCESS)
1072                         goto fail;
1073         }
1074
1075         STRV_FOREACH(nv, *env) {
1076                 pam_code = pam_putenv(handle, *nv);
1077                 if (pam_code != PAM_SUCCESS)
1078                         goto fail;
1079         }
1080
1081         pam_code = pam_acct_mgmt(handle, flags);
1082         if (pam_code != PAM_SUCCESS)
1083                 goto fail;
1084
1085         pam_code = pam_open_session(handle, flags);
1086         if (pam_code != PAM_SUCCESS)
1087                 goto fail;
1088
1089         close_session = true;
1090
1091         e = pam_getenvlist(handle);
1092         if (!e) {
1093                 pam_code = PAM_BUF_ERR;
1094                 goto fail;
1095         }
1096
1097         /* Block SIGTERM, so that we know that it won't get lost in
1098          * the child */
1099
1100         assert_se(sigprocmask_many(SIG_BLOCK, &old_ss, SIGTERM, -1) >= 0);
1101
1102         parent_pid = getpid();
1103
1104         pam_pid = fork();
1105         if (pam_pid < 0) {
1106                 r = -errno;
1107                 goto fail;
1108         }
1109
1110         if (pam_pid == 0) {
1111                 int sig, ret = EXIT_PAM;
1112
1113                 /* The child's job is to reset the PAM session on
1114                  * termination */
1115                 barrier_set_role(&barrier, BARRIER_CHILD);
1116
1117                 /* This string must fit in 10 chars (i.e. the length
1118                  * of "/sbin/init"), to look pretty in /bin/ps */
1119                 rename_process("(sd-pam)");
1120
1121                 /* Make sure we don't keep open the passed fds in this
1122                 child. We assume that otherwise only those fds are
1123                 open here that have been opened by PAM. */
1124                 close_many(fds, n_fds);
1125
1126                 /* Drop privileges - we don't need any to pam_close_session
1127                  * and this will make PR_SET_PDEATHSIG work in most cases.
1128                  * If this fails, ignore the error - but expect sd-pam threads
1129                  * to fail to exit normally */
1130
1131                 r = maybe_setgroups(0, NULL);
1132                 if (r < 0)
1133                         log_warning_errno(r, "Failed to setgroups() in sd-pam: %m");
1134                 if (setresgid(gid, gid, gid) < 0)
1135                         log_warning_errno(errno, "Failed to setresgid() in sd-pam: %m");
1136                 if (setresuid(uid, uid, uid) < 0)
1137                         log_warning_errno(errno, "Failed to setresuid() in sd-pam: %m");
1138
1139                 (void) ignore_signals(SIGPIPE, -1);
1140
1141                 /* Wait until our parent died. This will only work if
1142                  * the above setresuid() succeeds, otherwise the kernel
1143                  * will not allow unprivileged parents kill their privileged
1144                  * children this way. We rely on the control groups kill logic
1145                  * to do the rest for us. */
1146                 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
1147                         goto child_finish;
1148
1149                 /* Tell the parent that our setup is done. This is especially
1150                  * important regarding dropping privileges. Otherwise, unit
1151                  * setup might race against our setresuid(2) call.
1152                  *
1153                  * If the parent aborted, we'll detect this below, hence ignore
1154                  * return failure here. */
1155                 (void) barrier_place(&barrier);
1156
1157                 /* Check if our parent process might already have died? */
1158                 if (getppid() == parent_pid) {
1159                         sigset_t ss;
1160
1161                         assert_se(sigemptyset(&ss) >= 0);
1162                         assert_se(sigaddset(&ss, SIGTERM) >= 0);
1163
1164                         for (;;) {
1165                                 if (sigwait(&ss, &sig) < 0) {
1166                                         if (errno == EINTR)
1167                                                 continue;
1168
1169                                         goto child_finish;
1170                                 }
1171
1172                                 assert(sig == SIGTERM);
1173                                 break;
1174                         }
1175                 }
1176
1177                 /* If our parent died we'll end the session */
1178                 if (getppid() != parent_pid) {
1179                         pam_code = pam_close_session(handle, flags);
1180                         if (pam_code != PAM_SUCCESS)
1181                                 goto child_finish;
1182                 }
1183
1184                 ret = 0;
1185
1186         child_finish:
1187                 pam_end(handle, pam_code | flags);
1188                 _exit(ret);
1189         }
1190
1191         barrier_set_role(&barrier, BARRIER_PARENT);
1192
1193         /* If the child was forked off successfully it will do all the
1194          * cleanups, so forget about the handle here. */
1195         handle = NULL;
1196
1197         /* Unblock SIGTERM again in the parent */
1198         assert_se(sigprocmask(SIG_SETMASK, &old_ss, NULL) >= 0);
1199
1200         /* We close the log explicitly here, since the PAM modules
1201          * might have opened it, but we don't want this fd around. */
1202         closelog();
1203
1204         /* Synchronously wait for the child to initialize. We don't care for
1205          * errors as we cannot recover. However, warn loudly if it happens. */
1206         if (!barrier_place_and_sync(&barrier))
1207                 log_error("PAM initialization failed");
1208
1209         strv_free(*env);
1210         *env = e;
1211
1212         return 0;
1213
1214 fail:
1215         if (pam_code != PAM_SUCCESS) {
1216                 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
1217                 r = -EPERM;  /* PAM errors do not map to errno */
1218         } else
1219                 log_error_errno(r, "PAM failed: %m");
1220
1221         if (handle) {
1222                 if (close_session)
1223                         pam_code = pam_close_session(handle, flags);
1224
1225                 pam_end(handle, pam_code | flags);
1226         }
1227
1228         strv_free(e);
1229         closelog();
1230
1231         return r;
1232 #else
1233         return 0;
1234 #endif
1235 }
1236
1237 static void rename_process_from_path(const char *path) {
1238         char process_name[11];
1239         const char *p;
1240         size_t l;
1241
1242         /* This resulting string must fit in 10 chars (i.e. the length
1243          * of "/sbin/init") to look pretty in /bin/ps */
1244
1245         p = basename(path);
1246         if (isempty(p)) {
1247                 rename_process("(...)");
1248                 return;
1249         }
1250
1251         l = strlen(p);
1252         if (l > 8) {
1253                 /* The end of the process name is usually more
1254                  * interesting, since the first bit might just be
1255                  * "systemd-" */
1256                 p = p + l - 8;
1257                 l = 8;
1258         }
1259
1260         process_name[0] = '(';
1261         memcpy(process_name+1, p, l);
1262         process_name[1+l] = ')';
1263         process_name[1+l+1] = 0;
1264
1265         rename_process(process_name);
1266 }
1267
1268 static bool context_has_address_families(const ExecContext *c) {
1269         assert(c);
1270
1271         return c->address_families_whitelist ||
1272                 !set_isempty(c->address_families);
1273 }
1274
1275 static bool context_has_syscall_filters(const ExecContext *c) {
1276         assert(c);
1277
1278         return c->syscall_whitelist ||
1279                 !set_isempty(c->syscall_filter);
1280 }
1281
1282 static bool context_has_no_new_privileges(const ExecContext *c) {
1283         assert(c);
1284
1285         if (c->no_new_privileges)
1286                 return true;
1287
1288         if (have_effective_cap(CAP_SYS_ADMIN)) /* if we are privileged, we don't need NNP */
1289                 return false;
1290
1291         /* We need NNP if we have any form of seccomp and are unprivileged */
1292         return context_has_address_families(c) ||
1293                 c->memory_deny_write_execute ||
1294                 c->restrict_realtime ||
1295                 exec_context_restrict_namespaces_set(c) ||
1296                 c->protect_kernel_tunables ||
1297                 c->protect_kernel_modules ||
1298                 c->private_devices ||
1299                 context_has_syscall_filters(c) ||
1300                 !set_isempty(c->syscall_archs);
1301 }
1302
1303 #ifdef HAVE_SECCOMP
1304
1305 static bool skip_seccomp_unavailable(const Unit* u, const char* msg) {
1306
1307         if (is_seccomp_available())
1308                 return false;
1309
1310         log_open();
1311         log_unit_debug(u, "SECCOMP features not detected in the kernel, skipping %s", msg);
1312         log_close();
1313         return true;
1314 }
1315
1316 static int apply_syscall_filter(const Unit* u, const ExecContext *c) {
1317         uint32_t negative_action, default_action, action;
1318
1319         assert(u);
1320         assert(c);
1321
1322         if (!context_has_syscall_filters(c))
1323                 return 0;
1324
1325         if (skip_seccomp_unavailable(u, "SystemCallFilter="))
1326                 return 0;
1327
1328         negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
1329
1330         if (c->syscall_whitelist) {
1331                 default_action = negative_action;
1332                 action = SCMP_ACT_ALLOW;
1333         } else {
1334                 default_action = SCMP_ACT_ALLOW;
1335                 action = negative_action;
1336         }
1337
1338         return seccomp_load_syscall_filter_set_raw(default_action, c->syscall_filter, action);
1339 }
1340
1341 static int apply_syscall_archs(const Unit *u, const ExecContext *c) {
1342         assert(u);
1343         assert(c);
1344
1345         if (set_isempty(c->syscall_archs))
1346                 return 0;
1347
1348         if (skip_seccomp_unavailable(u, "SystemCallArchitectures="))
1349                 return 0;
1350
1351         return seccomp_restrict_archs(c->syscall_archs);
1352 }
1353
1354 static int apply_address_families(const Unit* u, const ExecContext *c) {
1355         assert(u);
1356         assert(c);
1357
1358         if (!context_has_address_families(c))
1359                 return 0;
1360
1361         if (skip_seccomp_unavailable(u, "RestrictAddressFamilies="))
1362                 return 0;
1363
1364         return seccomp_restrict_address_families(c->address_families, c->address_families_whitelist);
1365 }
1366
1367 static int apply_memory_deny_write_execute(const Unit* u, const ExecContext *c) {
1368         assert(u);
1369         assert(c);
1370
1371         if (!c->memory_deny_write_execute)
1372                 return 0;
1373
1374         if (skip_seccomp_unavailable(u, "MemoryDenyWriteExecute="))
1375                 return 0;
1376
1377         return seccomp_memory_deny_write_execute();
1378 }
1379
1380 static int apply_restrict_realtime(const Unit* u, const ExecContext *c) {
1381         assert(u);
1382         assert(c);
1383
1384         if (!c->restrict_realtime)
1385                 return 0;
1386
1387         if (skip_seccomp_unavailable(u, "RestrictRealtime="))
1388                 return 0;
1389
1390         return seccomp_restrict_realtime();
1391 }
1392
1393 static int apply_protect_sysctl(const Unit *u, const ExecContext *c) {
1394         assert(u);
1395         assert(c);
1396
1397         /* Turn off the legacy sysctl() system call. Many distributions turn this off while building the kernel, but
1398          * let's protect even those systems where this is left on in the kernel. */
1399
1400         if (!c->protect_kernel_tunables)
1401                 return 0;
1402
1403         if (skip_seccomp_unavailable(u, "ProtectKernelTunables="))
1404                 return 0;
1405
1406         return seccomp_protect_sysctl();
1407 }
1408
1409 static int apply_protect_kernel_modules(const Unit *u, const ExecContext *c) {
1410         assert(u);
1411         assert(c);
1412
1413         /* Turn off module syscalls on ProtectKernelModules=yes */
1414
1415         if (!c->protect_kernel_modules)
1416                 return 0;
1417
1418         if (skip_seccomp_unavailable(u, "ProtectKernelModules="))
1419                 return 0;
1420
1421         return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_MODULE, SCMP_ACT_ERRNO(EPERM));
1422 }
1423
1424 static int apply_private_devices(const Unit *u, const ExecContext *c) {
1425         assert(u);
1426         assert(c);
1427
1428         /* If PrivateDevices= is set, also turn off iopl and all @raw-io syscalls. */
1429
1430         if (!c->private_devices)
1431                 return 0;
1432
1433         if (skip_seccomp_unavailable(u, "PrivateDevices="))
1434                 return 0;
1435
1436         return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_RAW_IO, SCMP_ACT_ERRNO(EPERM));
1437 }
1438
1439 static int apply_restrict_namespaces(Unit *u, const ExecContext *c) {
1440         assert(u);
1441         assert(c);
1442
1443         if (!exec_context_restrict_namespaces_set(c))
1444                 return 0;
1445
1446         if (skip_seccomp_unavailable(u, "RestrictNamespaces="))
1447                 return 0;
1448
1449         return seccomp_restrict_namespaces(c->restrict_namespaces);
1450 }
1451
1452 #endif
1453
1454 static void do_idle_pipe_dance(int idle_pipe[4]) {
1455         assert(idle_pipe);
1456
1457         idle_pipe[1] = safe_close(idle_pipe[1]);
1458         idle_pipe[2] = safe_close(idle_pipe[2]);
1459
1460         if (idle_pipe[0] >= 0) {
1461                 int r;
1462
1463                 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1464
1465                 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
1466                         ssize_t n;
1467
1468                         /* Signal systemd that we are bored and want to continue. */
1469                         n = write(idle_pipe[3], "x", 1);
1470                         if (n > 0)
1471                                 /* Wait for systemd to react to the signal above. */
1472                                 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
1473                 }
1474
1475                 idle_pipe[0] = safe_close(idle_pipe[0]);
1476
1477         }
1478
1479         idle_pipe[3] = safe_close(idle_pipe[3]);
1480 }
1481
1482 static int build_environment(
1483                 Unit *u,
1484                 const ExecContext *c,
1485                 const ExecParameters *p,
1486                 unsigned n_fds,
1487                 const char *home,
1488                 const char *username,
1489                 const char *shell,
1490                 dev_t journal_stream_dev,
1491                 ino_t journal_stream_ino,
1492                 char ***ret) {
1493
1494         _cleanup_strv_free_ char **our_env = NULL;
1495         unsigned n_env = 0;
1496         char *x;
1497
1498         assert(u);
1499         assert(c);
1500         assert(ret);
1501
1502         our_env = new0(char*, 14);
1503         if (!our_env)
1504                 return -ENOMEM;
1505
1506         if (n_fds > 0) {
1507                 _cleanup_free_ char *joined = NULL;
1508
1509                 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid()) < 0)
1510                         return -ENOMEM;
1511                 our_env[n_env++] = x;
1512
1513                 if (asprintf(&x, "LISTEN_FDS=%u", n_fds) < 0)
1514                         return -ENOMEM;
1515                 our_env[n_env++] = x;
1516
1517                 joined = strv_join(p->fd_names, ":");
1518                 if (!joined)
1519                         return -ENOMEM;
1520
1521                 x = strjoin("LISTEN_FDNAMES=", joined);
1522                 if (!x)
1523                         return -ENOMEM;
1524                 our_env[n_env++] = x;
1525         }
1526
1527         if ((p->flags & EXEC_SET_WATCHDOG) && p->watchdog_usec > 0) {
1528                 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid()) < 0)
1529                         return -ENOMEM;
1530                 our_env[n_env++] = x;
1531
1532                 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, p->watchdog_usec) < 0)
1533                         return -ENOMEM;
1534                 our_env[n_env++] = x;
1535         }
1536
1537         /* If this is D-Bus, tell the nss-systemd module, since it relies on being able to use D-Bus look up dynamic
1538          * users via PID 1, possibly dead-locking the dbus daemon. This way it will not use D-Bus to resolve names, but
1539          * check the database directly. */
1540         if (unit_has_name(u, SPECIAL_DBUS_SERVICE)) {
1541                 x = strdup("SYSTEMD_NSS_BYPASS_BUS=1");
1542                 if (!x)
1543                         return -ENOMEM;
1544                 our_env[n_env++] = x;
1545         }
1546
1547         if (home) {
1548                 x = strappend("HOME=", home);
1549                 if (!x)
1550                         return -ENOMEM;
1551                 our_env[n_env++] = x;
1552         }
1553
1554         if (username) {
1555                 x = strappend("LOGNAME=", username);
1556                 if (!x)
1557                         return -ENOMEM;
1558                 our_env[n_env++] = x;
1559
1560                 x = strappend("USER=", username);
1561                 if (!x)
1562                         return -ENOMEM;
1563                 our_env[n_env++] = x;
1564         }
1565
1566         if (shell) {
1567                 x = strappend("SHELL=", shell);
1568                 if (!x)
1569                         return -ENOMEM;
1570                 our_env[n_env++] = x;
1571         }
1572
1573         if (!sd_id128_is_null(u->invocation_id)) {
1574                 if (asprintf(&x, "INVOCATION_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(u->invocation_id)) < 0)
1575                         return -ENOMEM;
1576
1577                 our_env[n_env++] = x;
1578         }
1579
1580         if (exec_context_needs_term(c)) {
1581                 const char *tty_path, *term = NULL;
1582
1583                 tty_path = exec_context_tty_path(c);
1584
1585                 /* If we are forked off PID 1 and we are supposed to operate on /dev/console, then let's try to inherit
1586                  * the $TERM set for PID 1. This is useful for containers so that the $TERM the container manager
1587                  * passes to PID 1 ends up all the way in the console login shown. */
1588
1589                 if (path_equal(tty_path, "/dev/console") && getppid() == 1)
1590                         term = getenv("TERM");
1591                 if (!term)
1592                         term = default_term_for_tty(tty_path);
1593
1594                 x = strappend("TERM=", term);
1595                 if (!x)
1596                         return -ENOMEM;
1597                 our_env[n_env++] = x;
1598         }
1599
1600         if (journal_stream_dev != 0 && journal_stream_ino != 0) {
1601                 if (asprintf(&x, "JOURNAL_STREAM=" DEV_FMT ":" INO_FMT, journal_stream_dev, journal_stream_ino) < 0)
1602                         return -ENOMEM;
1603
1604                 our_env[n_env++] = x;
1605         }
1606
1607         our_env[n_env++] = NULL;
1608         assert(n_env <= 12);
1609
1610         *ret = our_env;
1611         our_env = NULL;
1612
1613         return 0;
1614 }
1615
1616 static int build_pass_environment(const ExecContext *c, char ***ret) {
1617         _cleanup_strv_free_ char **pass_env = NULL;
1618         size_t n_env = 0, n_bufsize = 0;
1619         char **i;
1620
1621         STRV_FOREACH(i, c->pass_environment) {
1622                 _cleanup_free_ char *x = NULL;
1623                 char *v;
1624
1625                 v = getenv(*i);
1626                 if (!v)
1627                         continue;
1628                 x = strjoin(*i, "=", v);
1629                 if (!x)
1630                         return -ENOMEM;
1631                 if (!GREEDY_REALLOC(pass_env, n_bufsize, n_env + 2))
1632                         return -ENOMEM;
1633                 pass_env[n_env++] = x;
1634                 pass_env[n_env] = NULL;
1635                 x = NULL;
1636         }
1637
1638         *ret = pass_env;
1639         pass_env = NULL;
1640
1641         return 0;
1642 }
1643
1644 static bool exec_needs_mount_namespace(
1645                 const ExecContext *context,
1646                 const ExecParameters *params,
1647                 ExecRuntime *runtime) {
1648
1649         assert(context);
1650         assert(params);
1651
1652         if (context->root_image)
1653                 return true;
1654
1655         if (!strv_isempty(context->read_write_paths) ||
1656             !strv_isempty(context->read_only_paths) ||
1657             !strv_isempty(context->inaccessible_paths))
1658                 return true;
1659
1660         if (context->n_bind_mounts > 0)
1661                 return true;
1662
1663         if (context->mount_flags != 0)
1664                 return true;
1665
1666         if (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir))
1667                 return true;
1668
1669         if (context->private_devices ||
1670             context->protect_system != PROTECT_SYSTEM_NO ||
1671             context->protect_home != PROTECT_HOME_NO ||
1672             context->protect_kernel_tunables ||
1673             context->protect_kernel_modules ||
1674             context->protect_control_groups)
1675                 return true;
1676
1677         if (context->mount_apivfs && (context->root_image || context->root_directory))
1678                 return true;
1679
1680         return false;
1681 }
1682
1683 static int setup_private_users(uid_t uid, gid_t gid) {
1684         _cleanup_free_ char *uid_map = NULL, *gid_map = NULL;
1685         _cleanup_close_pair_ int errno_pipe[2] = { -1, -1 };
1686         _cleanup_close_ int unshare_ready_fd = -1;
1687         _cleanup_(sigkill_waitp) pid_t pid = 0;
1688         uint64_t c = 1;
1689         siginfo_t si;
1690         ssize_t n;
1691         int r;
1692
1693         /* Set up a user namespace and map root to root, the selected UID/GID to itself, and everything else to
1694          * nobody. In order to be able to write this mapping we need CAP_SETUID in the original user namespace, which
1695          * we however lack after opening the user namespace. To work around this we fork() a temporary child process,
1696          * which waits for the parent to create the new user namespace while staying in the original namespace. The
1697          * child then writes the UID mapping, under full privileges. The parent waits for the child to finish and
1698          * continues execution normally. */
1699
1700         if (uid != 0 && uid_is_valid(uid)) {
1701                 r = asprintf(&uid_map,
1702                              "0 0 1\n"                      /* Map root → root */
1703                              UID_FMT " " UID_FMT " 1\n",    /* Map $UID → $UID */
1704                              uid, uid);
1705                 if (r < 0)
1706                         return -ENOMEM;
1707         } else {
1708                 uid_map = strdup("0 0 1\n");            /* The case where the above is the same */
1709                 if (!uid_map)
1710                         return -ENOMEM;
1711         }
1712
1713         if (gid != 0 && gid_is_valid(gid)) {
1714                 r = asprintf(&gid_map,
1715                              "0 0 1\n"                      /* Map root → root */
1716                              GID_FMT " " GID_FMT " 1\n",    /* Map $GID → $GID */
1717                              gid, gid);
1718                 if (r < 0)
1719                         return -ENOMEM;
1720         } else {
1721                 gid_map = strdup("0 0 1\n");            /* The case where the above is the same */
1722                 if (!gid_map)
1723                         return -ENOMEM;
1724         }
1725
1726         /* Create a communication channel so that the parent can tell the child when it finished creating the user
1727          * namespace. */
1728         unshare_ready_fd = eventfd(0, EFD_CLOEXEC);
1729         if (unshare_ready_fd < 0)
1730                 return -errno;
1731
1732         /* Create a communication channel so that the child can tell the parent a proper error code in case it
1733          * failed. */
1734         if (pipe2(errno_pipe, O_CLOEXEC) < 0)
1735                 return -errno;
1736
1737         pid = fork();
1738         if (pid < 0)
1739                 return -errno;
1740
1741         if (pid == 0) {
1742                 _cleanup_close_ int fd = -1;
1743                 const char *a;
1744                 pid_t ppid;
1745
1746                 /* Child process, running in the original user namespace. Let's update the parent's UID/GID map from
1747                  * here, after the parent opened its own user namespace. */
1748
1749                 ppid = getppid();
1750                 errno_pipe[0] = safe_close(errno_pipe[0]);
1751
1752                 /* Wait until the parent unshared the user namespace */
1753                 if (read(unshare_ready_fd, &c, sizeof(c)) < 0) {
1754                         r = -errno;
1755                         goto child_fail;
1756                 }
1757
1758                 /* Disable the setgroups() system call in the child user namespace, for good. */
1759                 a = procfs_file_alloca(ppid, "setgroups");
1760                 fd = open(a, O_WRONLY|O_CLOEXEC);
1761                 if (fd < 0) {
1762                         if (errno != ENOENT) {
1763                                 r = -errno;
1764                                 goto child_fail;
1765                         }
1766
1767                         /* If the file is missing the kernel is too old, let's continue anyway. */
1768                 } else {
1769                         if (write(fd, "deny\n", 5) < 0) {
1770                                 r = -errno;
1771                                 goto child_fail;
1772                         }
1773
1774                         fd = safe_close(fd);
1775                 }
1776
1777                 /* First write the GID map */
1778                 a = procfs_file_alloca(ppid, "gid_map");
1779                 fd = open(a, O_WRONLY|O_CLOEXEC);
1780                 if (fd < 0) {
1781                         r = -errno;
1782                         goto child_fail;
1783                 }
1784                 if (write(fd, gid_map, strlen(gid_map)) < 0) {
1785                         r = -errno;
1786                         goto child_fail;
1787                 }
1788                 fd = safe_close(fd);
1789
1790                 /* The write the UID map */
1791                 a = procfs_file_alloca(ppid, "uid_map");
1792                 fd = open(a, O_WRONLY|O_CLOEXEC);
1793                 if (fd < 0) {
1794                         r = -errno;
1795                         goto child_fail;
1796                 }
1797                 if (write(fd, uid_map, strlen(uid_map)) < 0) {
1798                         r = -errno;
1799                         goto child_fail;
1800                 }
1801
1802                 _exit(EXIT_SUCCESS);
1803
1804         child_fail:
1805                 (void) write(errno_pipe[1], &r, sizeof(r));
1806                 _exit(EXIT_FAILURE);
1807         }
1808
1809         errno_pipe[1] = safe_close(errno_pipe[1]);
1810
1811         if (unshare(CLONE_NEWUSER) < 0)
1812                 return -errno;
1813
1814         /* Let the child know that the namespace is ready now */
1815         if (write(unshare_ready_fd, &c, sizeof(c)) < 0)
1816                 return -errno;
1817
1818         /* Try to read an error code from the child */
1819         n = read(errno_pipe[0], &r, sizeof(r));
1820         if (n < 0)
1821                 return -errno;
1822         if (n == sizeof(r)) { /* an error code was sent to us */
1823                 if (r < 0)
1824                         return r;
1825                 return -EIO;
1826         }
1827         if (n != 0) /* on success we should have read 0 bytes */
1828                 return -EIO;
1829
1830         r = wait_for_terminate(pid, &si);
1831         if (r < 0)
1832                 return r;
1833         pid = 0;
1834
1835         /* If something strange happened with the child, let's consider this fatal, too */
1836         if (si.si_code != CLD_EXITED || si.si_status != 0)
1837                 return -EIO;
1838
1839         return 0;
1840 }
1841
1842 static int setup_runtime_directory(
1843                 const ExecContext *context,
1844                 const ExecParameters *params,
1845                 uid_t uid,
1846                 gid_t gid) {
1847
1848         char **rt;
1849         int r;
1850
1851         assert(context);
1852         assert(params);
1853
1854         STRV_FOREACH(rt, context->runtime_directory) {
1855                 _cleanup_free_ char *p;
1856
1857                 p = strjoin(params->runtime_prefix, "/", *rt);
1858                 if (!p)
1859                         return -ENOMEM;
1860
1861                 r = mkdir_p_label(p, context->runtime_directory_mode);
1862                 if (r < 0)
1863                         return r;
1864
1865                 r = chmod_and_chown(p, context->runtime_directory_mode, uid, gid);
1866                 if (r < 0)
1867                         return r;
1868         }
1869
1870         return 0;
1871 }
1872
1873 static int setup_smack(
1874                 const ExecContext *context,
1875                 const ExecCommand *command) {
1876
1877 #ifdef HAVE_SMACK
1878         int r;
1879
1880         assert(context);
1881         assert(command);
1882
1883         if (!mac_smack_use())
1884                 return 0;
1885
1886         if (context->smack_process_label) {
1887                 r = mac_smack_apply_pid(0, context->smack_process_label);
1888                 if (r < 0)
1889                         return r;
1890         }
1891 #ifdef SMACK_DEFAULT_PROCESS_LABEL
1892         else {
1893                 _cleanup_free_ char *exec_label = NULL;
1894
1895                 r = mac_smack_read(command->path, SMACK_ATTR_EXEC, &exec_label);
1896                 if (r < 0 && r != -ENODATA && r != -EOPNOTSUPP)
1897                         return r;
1898
1899                 r = mac_smack_apply_pid(0, exec_label ? : SMACK_DEFAULT_PROCESS_LABEL);
1900                 if (r < 0)
1901                         return r;
1902         }
1903 #endif
1904 #endif
1905
1906         return 0;
1907 }
1908
1909 static int compile_read_write_paths(
1910                 const ExecContext *context,
1911                 const ExecParameters *params,
1912                 char ***ret) {
1913
1914         _cleanup_strv_free_ char **l = NULL;
1915         char **rt;
1916
1917         /* Compile the list of writable paths. This is the combination of
1918          * the explicitly configured paths, plus all runtime directories. */
1919
1920         if (strv_isempty(context->read_write_paths) &&
1921             strv_isempty(context->runtime_directory)) {
1922                 *ret = NULL; /* NOP if neither is set */
1923                 return 0;
1924         }
1925
1926         l = strv_copy(context->read_write_paths);
1927         if (!l)
1928                 return -ENOMEM;
1929
1930         STRV_FOREACH(rt, context->runtime_directory) {
1931                 char *s;
1932
1933                 s = strjoin(params->runtime_prefix, "/", *rt);
1934                 if (!s)
1935                         return -ENOMEM;
1936
1937                 if (strv_consume(&l, s) < 0)
1938                         return -ENOMEM;
1939         }
1940
1941         *ret = l;
1942         l = NULL;
1943
1944         return 0;
1945 }
1946
1947 static int apply_mount_namespace(
1948                 Unit *u,
1949                 ExecCommand *command,
1950                 const ExecContext *context,
1951                 const ExecParameters *params,
1952                 ExecRuntime *runtime) {
1953
1954         _cleanup_strv_free_ char **rw = NULL;
1955         char *tmp = NULL, *var = NULL;
1956         const char *root_dir = NULL, *root_image = NULL;
1957         NameSpaceInfo ns_info = {
1958                 .ignore_protect_paths = false,
1959                 .private_dev = context->private_devices,
1960                 .protect_control_groups = context->protect_control_groups,
1961                 .protect_kernel_tunables = context->protect_kernel_tunables,
1962                 .protect_kernel_modules = context->protect_kernel_modules,
1963                 .mount_apivfs = context->mount_apivfs,
1964         };
1965         bool apply_restrictions;
1966         int r;
1967
1968         assert(context);
1969
1970         /* The runtime struct only contains the parent of the private /tmp,
1971          * which is non-accessible to world users. Inside of it there's a /tmp
1972          * that is sticky, and that's the one we want to use here. */
1973
1974         if (context->private_tmp && runtime) {
1975                 if (runtime->tmp_dir)
1976                         tmp = strjoina(runtime->tmp_dir, "/tmp");
1977                 if (runtime->var_tmp_dir)
1978                         var = strjoina(runtime->var_tmp_dir, "/tmp");
1979         }
1980
1981         r = compile_read_write_paths(context, params, &rw);
1982         if (r < 0)
1983                 return r;
1984
1985         if (params->flags & EXEC_APPLY_CHROOT) {
1986                 root_image = context->root_image;
1987
1988                 if (!root_image)
1989                         root_dir = context->root_directory;
1990         }
1991
1992         /*
1993          * If DynamicUser=no and RootDirectory= is set then lets pass a relaxed
1994          * sandbox info, otherwise enforce it, don't ignore protected paths and
1995          * fail if we are enable to apply the sandbox inside the mount namespace.
1996          */
1997         if (!context->dynamic_user && root_dir)
1998                 ns_info.ignore_protect_paths = true;
1999
2000         apply_restrictions = (params->flags & EXEC_APPLY_PERMISSIONS) && !command->privileged;
2001
2002         r = setup_namespace(root_dir, root_image,
2003                             &ns_info, rw,
2004                             apply_restrictions ? context->read_only_paths : NULL,
2005                             apply_restrictions ? context->inaccessible_paths : NULL,
2006                             context->bind_mounts,
2007                             context->n_bind_mounts,
2008                             tmp,
2009                             var,
2010                             apply_restrictions ? context->protect_home : PROTECT_HOME_NO,
2011                             apply_restrictions ? context->protect_system : PROTECT_SYSTEM_NO,
2012                             context->mount_flags,
2013                             DISSECT_IMAGE_DISCARD_ON_LOOP);
2014
2015         /* If we couldn't set up the namespace this is probably due to a
2016          * missing capability. In this case, silently proceeed. */
2017         if (IN_SET(r, -EPERM, -EACCES)) {
2018                 log_open();
2019                 log_unit_debug_errno(u, r, "Failed to set up namespace, assuming containerized execution, ignoring: %m");
2020                 log_close();
2021                 r = 0;
2022         }
2023
2024         return r;
2025 }
2026
2027 static int apply_working_directory(
2028                 const ExecContext *context,
2029                 const ExecParameters *params,
2030                 const char *home,
2031                 const bool needs_mount_ns,
2032                 int *exit_status) {
2033
2034         const char *d, *wd;
2035
2036         assert(context);
2037         assert(exit_status);
2038
2039         if (context->working_directory_home) {
2040
2041                 if (!home) {
2042                         *exit_status = EXIT_CHDIR;
2043                         return -ENXIO;
2044                 }
2045
2046                 wd = home;
2047
2048         } else if (context->working_directory)
2049                 wd = context->working_directory;
2050         else
2051                 wd = "/";
2052
2053         if (params->flags & EXEC_APPLY_CHROOT) {
2054                 if (!needs_mount_ns && context->root_directory)
2055                         if (chroot(context->root_directory) < 0) {
2056                                 *exit_status = EXIT_CHROOT;
2057                                 return -errno;
2058                         }
2059
2060                 d = wd;
2061         } else
2062                 d = prefix_roota(context->root_directory, wd);
2063
2064         if (chdir(d) < 0 && !context->working_directory_missing_ok) {
2065                 *exit_status = EXIT_CHDIR;
2066                 return -errno;
2067         }
2068
2069         return 0;
2070 }
2071
2072 static int setup_keyring(Unit *u, const ExecParameters *p, uid_t uid, gid_t gid) {
2073         key_serial_t keyring;
2074
2075         assert(u);
2076         assert(p);
2077
2078         /* Let's set up a new per-service "session" kernel keyring for each system service. This has the benefit that
2079          * each service runs with its own keyring shared among all processes of the service, but with no hook-up beyond
2080          * that scope, and in particular no link to the per-UID keyring. If we don't do this the keyring will be
2081          * automatically created on-demand and then linked to the per-UID keyring, by the kernel. The kernel's built-in
2082          * on-demand behaviour is very appropriate for login users, but probably not so much for system services, where
2083          * UIDs are not necessarily specific to a service but reused (at least in the case of UID 0). */
2084
2085         if (!(p->flags & EXEC_NEW_KEYRING))
2086                 return 0;
2087
2088         keyring = keyctl(KEYCTL_JOIN_SESSION_KEYRING, 0, 0, 0, 0);
2089         if (keyring == -1) {
2090                 if (errno == ENOSYS)
2091                         log_debug_errno(errno, "Kernel keyring not supported, ignoring.");
2092                 else if (IN_SET(errno, EACCES, EPERM))
2093                         log_debug_errno(errno, "Kernel keyring access prohibited, ignoring.");
2094                 else if (errno == EDQUOT)
2095                         log_debug_errno(errno, "Out of kernel keyrings to allocate, ignoring.");
2096                 else
2097                         return log_error_errno(errno, "Setting up kernel keyring failed: %m");
2098
2099                 return 0;
2100         }
2101
2102         /* Populate they keyring with the invocation ID by default. */
2103         if (!sd_id128_is_null(u->invocation_id)) {
2104                 key_serial_t key;
2105
2106                 key = add_key("user", "invocation_id", &u->invocation_id, sizeof(u->invocation_id), KEY_SPEC_SESSION_KEYRING);
2107                 if (key == -1)
2108                         log_debug_errno(errno, "Failed to add invocation ID to keyring, ignoring: %m");
2109                 else {
2110                         if (keyctl(KEYCTL_SETPERM, key,
2111                                    KEY_POS_VIEW|KEY_POS_READ|KEY_POS_SEARCH|
2112                                    KEY_USR_VIEW|KEY_USR_READ|KEY_USR_SEARCH, 0, 0) < 0)
2113                                 return log_error_errno(errno, "Failed to restrict invocation ID permission: %m");
2114                 }
2115         }
2116
2117         /* And now, make the keyring owned by the service's user */
2118         if (uid_is_valid(uid) || gid_is_valid(gid))
2119                 if (keyctl(KEYCTL_CHOWN, keyring, uid, gid, 0) < 0)
2120                         return log_error_errno(errno, "Failed to change ownership of session keyring: %m");
2121
2122         return 0;
2123 }
2124
2125 static void append_socket_pair(int *array, unsigned *n, int pair[2]) {
2126         assert(array);
2127         assert(n);
2128
2129         if (!pair)
2130                 return;
2131
2132         if (pair[0] >= 0)
2133                 array[(*n)++] = pair[0];
2134         if (pair[1] >= 0)
2135                 array[(*n)++] = pair[1];
2136 }
2137
2138 static int close_remaining_fds(
2139                 const ExecParameters *params,
2140                 ExecRuntime *runtime,
2141                 DynamicCreds *dcreds,
2142                 int user_lookup_fd,
2143                 int socket_fd,
2144                 int *fds, unsigned n_fds) {
2145
2146         unsigned n_dont_close = 0;
2147         int dont_close[n_fds + 12];
2148
2149         assert(params);
2150
2151         if (params->stdin_fd >= 0)
2152                 dont_close[n_dont_close++] = params->stdin_fd;
2153         if (params->stdout_fd >= 0)
2154                 dont_close[n_dont_close++] = params->stdout_fd;
2155         if (params->stderr_fd >= 0)
2156                 dont_close[n_dont_close++] = params->stderr_fd;
2157
2158         if (socket_fd >= 0)
2159                 dont_close[n_dont_close++] = socket_fd;
2160         if (n_fds > 0) {
2161                 memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
2162                 n_dont_close += n_fds;
2163         }
2164
2165         if (runtime)
2166                 append_socket_pair(dont_close, &n_dont_close, runtime->netns_storage_socket);
2167
2168         if (dcreds) {
2169                 if (dcreds->user)
2170                         append_socket_pair(dont_close, &n_dont_close, dcreds->user->storage_socket);
2171                 if (dcreds->group)
2172                         append_socket_pair(dont_close, &n_dont_close, dcreds->group->storage_socket);
2173         }
2174
2175         if (user_lookup_fd >= 0)
2176                 dont_close[n_dont_close++] = user_lookup_fd;
2177
2178         return close_all_fds(dont_close, n_dont_close);
2179 }
2180
2181 static int send_user_lookup(
2182                 Unit *unit,
2183                 int user_lookup_fd,
2184                 uid_t uid,
2185                 gid_t gid) {
2186
2187         assert(unit);
2188
2189         /* Send the resolved UID/GID to PID 1 after we learnt it. We send a single datagram, containing the UID/GID
2190          * data as well as the unit name. Note that we suppress sending this if no user/group to resolve was
2191          * specified. */
2192
2193         if (user_lookup_fd < 0)
2194                 return 0;
2195
2196         if (!uid_is_valid(uid) && !gid_is_valid(gid))
2197                 return 0;
2198
2199         if (writev(user_lookup_fd,
2200                (struct iovec[]) {
2201                            { .iov_base = &uid, .iov_len = sizeof(uid) },
2202                            { .iov_base = &gid, .iov_len = sizeof(gid) },
2203                            { .iov_base = unit->id, .iov_len = strlen(unit->id) }}, 3) < 0)
2204                 return -errno;
2205
2206         return 0;
2207 }
2208
2209 static int acquire_home(const ExecContext *c, uid_t uid, const char** home, char **buf) {
2210         int r;
2211
2212         assert(c);
2213         assert(home);
2214         assert(buf);
2215
2216         /* If WorkingDirectory=~ is set, try to acquire a usable home directory. */
2217
2218         if (*home)
2219                 return 0;
2220
2221         if (!c->working_directory_home)
2222                 return 0;
2223
2224         if (uid == 0) {
2225                 /* Hardcode /root as home directory for UID 0 */
2226                 *home = "/root";
2227                 return 1;
2228         }
2229
2230         r = get_home_dir(buf);
2231         if (r < 0)
2232                 return r;
2233
2234         *home = *buf;
2235         return 1;
2236 }
2237
2238 static int exec_child(
2239                 Unit *unit,
2240                 ExecCommand *command,
2241                 const ExecContext *context,
2242                 const ExecParameters *params,
2243                 ExecRuntime *runtime,
2244                 DynamicCreds *dcreds,
2245                 char **argv,
2246                 int socket_fd,
2247                 int named_iofds[3],
2248                 int *fds,
2249                 unsigned n_storage_fds,
2250                 unsigned n_socket_fds,
2251                 char **files_env,
2252                 int user_lookup_fd,
2253                 int *exit_status,
2254                 char **error_message) {
2255
2256         _cleanup_strv_free_ char **our_env = NULL, **pass_env = NULL, **accum_env = NULL, **final_argv = NULL;
2257         _cleanup_free_ char *mac_selinux_context_net = NULL, *home_buffer = NULL;
2258         _cleanup_free_ gid_t *supplementary_gids = NULL;
2259         const char *username = NULL, *groupname = NULL;
2260         const char *home = NULL, *shell = NULL;
2261         dev_t journal_stream_dev = 0;
2262         ino_t journal_stream_ino = 0;
2263         bool needs_mount_namespace;
2264         uid_t uid = UID_INVALID;
2265         gid_t gid = GID_INVALID;
2266         int i, r, ngids = 0;
2267         unsigned n_fds;
2268
2269         assert(unit);
2270         assert(command);
2271         assert(context);
2272         assert(params);
2273         assert(exit_status);
2274         assert(error_message);
2275         /* We don't always set error_message, hence it must be initialized */
2276         assert(*error_message == NULL);
2277
2278         rename_process_from_path(command->path);
2279
2280         /* We reset exactly these signals, since they are the
2281          * only ones we set to SIG_IGN in the main daemon. All
2282          * others we leave untouched because we set them to
2283          * SIG_DFL or a valid handler initially, both of which
2284          * will be demoted to SIG_DFL. */
2285         (void) default_signals(SIGNALS_CRASH_HANDLER,
2286                                SIGNALS_IGNORE, -1);
2287
2288         if (context->ignore_sigpipe)
2289                 (void) ignore_signals(SIGPIPE, -1);
2290
2291         r = reset_signal_mask();
2292         if (r < 0) {
2293                 *exit_status = EXIT_SIGNAL_MASK;
2294                 *error_message = strdup("Failed to reset signal mask");
2295                 /* If strdup fails, here and below, we will just print the generic error message. */
2296                 return r;
2297         }
2298
2299         if (params->idle_pipe)
2300                 do_idle_pipe_dance(params->idle_pipe);
2301
2302         /* Close sockets very early to make sure we don't
2303          * block init reexecution because it cannot bind its
2304          * sockets */
2305
2306         log_forget_fds();
2307
2308         n_fds = n_storage_fds + n_socket_fds;
2309         r = close_remaining_fds(params, runtime, dcreds, user_lookup_fd, socket_fd, fds, n_fds);
2310         if (r < 0) {
2311                 *exit_status = EXIT_FDS;
2312                 *error_message = strdup("Failed to close remaining fds");
2313                 return r;
2314         }
2315
2316         if (!context->same_pgrp)
2317                 if (setsid() < 0) {
2318                         *exit_status = EXIT_SETSID;
2319                         return -errno;
2320                 }
2321
2322         exec_context_tty_reset(context, params);
2323
2324         if (unit_shall_confirm_spawn(unit)) {
2325                 const char *vc = params->confirm_spawn;
2326                 _cleanup_free_ char *cmdline = NULL;
2327
2328                 cmdline = exec_command_line(argv);
2329                 if (!cmdline) {
2330                         *exit_status = EXIT_CONFIRM;
2331                         return -ENOMEM;
2332                 }
2333
2334                 r = ask_for_confirmation(vc, unit, cmdline);
2335                 if (r != CONFIRM_EXECUTE) {
2336                         if (r == CONFIRM_PRETEND_SUCCESS) {
2337                                 *exit_status = EXIT_SUCCESS;
2338                                 return 0;
2339                         }
2340                         *exit_status = EXIT_CONFIRM;
2341                         *error_message = strdup("Execution cancelled");
2342                         return -ECANCELED;
2343                 }
2344         }
2345
2346         if (context->dynamic_user && dcreds) {
2347
2348                 /* Make sure we bypass our own NSS module for any NSS checks */
2349                 if (putenv((char*) "SYSTEMD_NSS_DYNAMIC_BYPASS=1") != 0) {
2350                         *exit_status = EXIT_USER;
2351                         *error_message = strdup("Failed to update environment");
2352                         return -errno;
2353                 }
2354
2355                 r = dynamic_creds_realize(dcreds, &uid, &gid);
2356                 if (r < 0) {
2357                         *exit_status = EXIT_USER;
2358                         *error_message = strdup("Failed to update dynamic user credentials");
2359                         return r;
2360                 }
2361
2362                 if (!uid_is_valid(uid)) {
2363                         *exit_status = EXIT_USER;
2364                         (void) asprintf(error_message, "UID validation failed for \""UID_FMT"\"", uid);
2365                         /* If asprintf fails, here and below, we will just print the generic error message. */
2366                         return -ESRCH;
2367                 }
2368
2369                 if (!gid_is_valid(gid)) {
2370                         *exit_status = EXIT_USER;
2371                         (void) asprintf(error_message, "GID validation failed for \""GID_FMT"\"", gid);
2372                         return -ESRCH;
2373                 }
2374
2375                 if (dcreds->user)
2376                         username = dcreds->user->name;
2377
2378         } else {
2379                 r = get_fixed_user(context, &username, &uid, &gid, &home, &shell);
2380                 if (r < 0) {
2381                         *exit_status = EXIT_USER;
2382                         *error_message = strdup("Failed to determine user credentials");
2383                         return r;
2384                 }
2385
2386                 r = get_fixed_group(context, &groupname, &gid);
2387                 if (r < 0) {
2388                         *exit_status = EXIT_GROUP;
2389                         *error_message = strdup("Failed to determine group credentials");
2390                         return r;
2391                 }
2392         }
2393
2394         /* Initialize user supplementary groups and get SupplementaryGroups= ones */
2395         r = get_supplementary_groups(context, username, groupname, gid,
2396                                      &supplementary_gids, &ngids);
2397         if (r < 0) {
2398                 *exit_status = EXIT_GROUP;
2399                 *error_message = strdup("Failed to determine supplementary groups");
2400                 return r;
2401         }
2402
2403         r = send_user_lookup(unit, user_lookup_fd, uid, gid);
2404         if (r < 0) {
2405                 *exit_status = EXIT_USER;
2406                 *error_message = strdup("Failed to send user credentials to PID1");
2407                 return r;
2408         }
2409
2410         user_lookup_fd = safe_close(user_lookup_fd);
2411
2412         r = acquire_home(context, uid, &home, &home_buffer);
2413         if (r < 0) {
2414                 *exit_status = EXIT_CHDIR;
2415                 *error_message = strdup("Failed to determine $HOME for user");
2416                 return r;
2417         }
2418
2419         /* If a socket is connected to STDIN/STDOUT/STDERR, we
2420          * must sure to drop O_NONBLOCK */
2421         if (socket_fd >= 0)
2422                 (void) fd_nonblock(socket_fd, false);
2423
2424         r = setup_input(context, params, socket_fd, named_iofds);
2425         if (r < 0) {
2426                 *exit_status = EXIT_STDIN;
2427                 *error_message = strdup("Failed to set up stdin");
2428                 return r;
2429         }
2430
2431         r = setup_output(unit, context, params, STDOUT_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
2432         if (r < 0) {
2433                 *exit_status = EXIT_STDOUT;
2434                 *error_message = strdup("Failed to set up stdout");
2435                 return r;
2436         }
2437
2438         r = setup_output(unit, context, params, STDERR_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
2439         if (r < 0) {
2440                 *exit_status = EXIT_STDERR;
2441                 *error_message = strdup("Failed to set up stderr");
2442                 return r;
2443         }
2444
2445         if (params->cgroup_path) {
2446                 r = cg_attach_everywhere(params->cgroup_supported, params->cgroup_path, 0, NULL, NULL);
2447                 if (r < 0) {
2448                         *exit_status = EXIT_CGROUP;
2449                         (void) asprintf(error_message, "Failed to attach to cgroup %s", params->cgroup_path);
2450                         return r;
2451                 }
2452         }
2453
2454         if (context->oom_score_adjust_set) {
2455                 char t[DECIMAL_STR_MAX(context->oom_score_adjust)];
2456
2457                 /* When we can't make this change due to EPERM, then
2458                  * let's silently skip over it. User namespaces
2459                  * prohibit write access to this file, and we
2460                  * shouldn't trip up over that. */
2461
2462                 sprintf(t, "%i", context->oom_score_adjust);
2463                 r = write_string_file("/proc/self/oom_score_adj", t, 0);
2464                 if (r == -EPERM || r == -EACCES) {
2465                         log_open();
2466                         log_unit_debug_errno(unit, r, "Failed to adjust OOM setting, assuming containerized execution, ignoring: %m");
2467                         log_close();
2468                 } else if (r < 0) {
2469                         *exit_status = EXIT_OOM_ADJUST;
2470                         *error_message = strdup("Failed to write /proc/self/oom_score_adj");
2471                         return -errno;
2472                 }
2473         }
2474
2475         if (context->nice_set)
2476                 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
2477                         *exit_status = EXIT_NICE;
2478                         return -errno;
2479                 }
2480
2481         if (context->cpu_sched_set) {
2482                 struct sched_param param = {
2483                         .sched_priority = context->cpu_sched_priority,
2484                 };
2485
2486                 r = sched_setscheduler(0,
2487                                        context->cpu_sched_policy |
2488                                        (context->cpu_sched_reset_on_fork ?
2489                                         SCHED_RESET_ON_FORK : 0),
2490                                        &param);
2491                 if (r < 0) {
2492                         *exit_status = EXIT_SETSCHEDULER;
2493                         return -errno;
2494                 }
2495         }
2496
2497         if (context->cpuset)
2498                 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
2499                         *exit_status = EXIT_CPUAFFINITY;
2500                         return -errno;
2501                 }
2502
2503         if (context->ioprio_set)
2504                 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
2505                         *exit_status = EXIT_IOPRIO;
2506                         return -errno;
2507                 }
2508
2509         if (context->timer_slack_nsec != NSEC_INFINITY)
2510                 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
2511                         *exit_status = EXIT_TIMERSLACK;
2512                         return -errno;
2513                 }
2514
2515         if (context->personality != PERSONALITY_INVALID)
2516                 if (personality(context->personality) < 0) {
2517                         *exit_status = EXIT_PERSONALITY;
2518                         return -errno;
2519                 }
2520
2521         if (context->utmp_id)
2522                 utmp_put_init_process(context->utmp_id, getpid(), getsid(0),
2523                                       context->tty_path,
2524                                       context->utmp_mode == EXEC_UTMP_INIT  ? INIT_PROCESS :
2525                                       context->utmp_mode == EXEC_UTMP_LOGIN ? LOGIN_PROCESS :
2526                                       USER_PROCESS,
2527                                       username);
2528
2529         if (context->user) {
2530                 r = chown_terminal(STDIN_FILENO, uid);
2531                 if (r < 0) {
2532                         *exit_status = EXIT_STDIN;
2533                         return r;
2534                 }
2535         }
2536
2537         /* If delegation is enabled we'll pass ownership of the cgroup
2538          * (but only in systemd's own controller hierarchy!) to the
2539          * user of the new process. */
2540         if (params->cgroup_path && context->user && params->cgroup_delegate) {
2541                 r = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0644, uid, gid);
2542                 if (r < 0) {
2543                         *exit_status = EXIT_CGROUP;
2544                         return r;
2545                 }
2546
2547
2548                 r = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0755, uid, gid);
2549                 if (r < 0) {
2550                         *exit_status = EXIT_CGROUP;
2551                         return r;
2552                 }
2553         }
2554
2555         if (!strv_isempty(context->runtime_directory) && params->runtime_prefix) {
2556                 r = setup_runtime_directory(context, params, uid, gid);
2557                 if (r < 0) {
2558                         *exit_status = EXIT_RUNTIME_DIRECTORY;
2559                         return r;
2560                 }
2561         }
2562
2563         r = build_environment(
2564                         unit,
2565                         context,
2566                         params,
2567                         n_fds,
2568                         home,
2569                         username,
2570                         shell,
2571                         journal_stream_dev,
2572                         journal_stream_ino,
2573                         &our_env);
2574         if (r < 0) {
2575                 *exit_status = EXIT_MEMORY;
2576                 return r;
2577         }
2578
2579         r = build_pass_environment(context, &pass_env);
2580         if (r < 0) {
2581                 *exit_status = EXIT_MEMORY;
2582                 return r;
2583         }
2584
2585         accum_env = strv_env_merge(5,
2586                                    params->environment,
2587                                    our_env,
2588                                    pass_env,
2589                                    context->environment,
2590                                    files_env,
2591                                    NULL);
2592         if (!accum_env) {
2593                 *exit_status = EXIT_MEMORY;
2594                 return -ENOMEM;
2595         }
2596         accum_env = strv_env_clean(accum_env);
2597
2598         (void) umask(context->umask);
2599
2600         r = setup_keyring(unit, params, uid, gid);
2601         if (r < 0) {
2602                 *exit_status = EXIT_KEYRING;
2603                 return r;
2604         }
2605
2606         if ((params->flags & EXEC_APPLY_PERMISSIONS) && !command->privileged) {
2607                 if (context->pam_name && username) {
2608                         r = setup_pam(context->pam_name, username, uid, gid, context->tty_path, &accum_env, fds, n_fds);
2609                         if (r < 0) {
2610                                 *exit_status = EXIT_PAM;
2611                                 return r;
2612                         }
2613                 }
2614         }
2615
2616         if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
2617                 r = setup_netns(runtime->netns_storage_socket);
2618                 if (r < 0) {
2619                         *exit_status = EXIT_NETWORK;
2620                         return r;
2621                 }
2622         }
2623
2624         needs_mount_namespace = exec_needs_mount_namespace(context, params, runtime);
2625         if (needs_mount_namespace) {
2626                 r = apply_mount_namespace(unit, command, context, params, runtime);
2627                 if (r < 0) {
2628                         *exit_status = EXIT_NAMESPACE;
2629                         return r;
2630                 }
2631         }
2632
2633         /* Apply just after mount namespace setup */
2634         r = apply_working_directory(context, params, home, needs_mount_namespace, exit_status);
2635         if (r < 0)
2636                 return r;
2637
2638         /* Drop groups as early as possbile */
2639         if ((params->flags & EXEC_APPLY_PERMISSIONS) && !command->privileged) {
2640                 r = enforce_groups(context, gid, supplementary_gids, ngids);
2641                 if (r < 0) {
2642                         *exit_status = EXIT_GROUP;
2643                         return r;
2644                 }
2645         }
2646
2647 #ifdef HAVE_SELINUX
2648         if ((params->flags & EXEC_APPLY_PERMISSIONS) &&
2649             mac_selinux_use() &&
2650             params->selinux_context_net &&
2651             socket_fd >= 0 &&
2652             !command->privileged) {
2653
2654                 r = mac_selinux_get_child_mls_label(socket_fd, command->path, context->selinux_context, &mac_selinux_context_net);
2655                 if (r < 0) {
2656                         *exit_status = EXIT_SELINUX_CONTEXT;
2657                         return r;
2658                 }
2659         }
2660 #endif
2661
2662         if ((params->flags & EXEC_APPLY_PERMISSIONS) && context->private_users) {
2663                 r = setup_private_users(uid, gid);
2664                 if (r < 0) {
2665                         *exit_status = EXIT_USER;
2666                         return r;
2667                 }
2668         }
2669
2670         /* We repeat the fd closing here, to make sure that
2671          * nothing is leaked from the PAM modules. Note that
2672          * we are more aggressive this time since socket_fd
2673          * and the netns fds we don't need anymore. The custom
2674          * endpoint fd was needed to upload the policy and can
2675          * now be closed as well. */
2676         r = close_all_fds(fds, n_fds);
2677         if (r >= 0)
2678                 r = shift_fds(fds, n_fds);
2679         if (r >= 0)
2680                 r = flags_fds(fds, n_storage_fds, n_socket_fds, context->non_blocking);
2681         if (r < 0) {
2682                 *exit_status = EXIT_FDS;
2683                 return r;
2684         }
2685
2686         if ((params->flags & EXEC_APPLY_PERMISSIONS) && !command->privileged) {
2687
2688                 int secure_bits = context->secure_bits;
2689
2690                 for (i = 0; i < _RLIMIT_MAX; i++) {
2691
2692                         if (!context->rlimit[i])
2693                                 continue;
2694
2695                         r = setrlimit_closest(i, context->rlimit[i]);
2696                         if (r < 0) {
2697                                 *exit_status = EXIT_LIMITS;
2698                                 return r;
2699                         }
2700                 }
2701
2702                 /* Set the RTPRIO resource limit to 0, but only if nothing else was explicitly requested. */
2703                 if (context->restrict_realtime && !context->rlimit[RLIMIT_RTPRIO]) {
2704                         if (setrlimit(RLIMIT_RTPRIO, &RLIMIT_MAKE_CONST(0)) < 0) {
2705                                 *exit_status = EXIT_LIMITS;
2706                                 return -errno;
2707                         }
2708                 }
2709
2710                 if (!cap_test_all(context->capability_bounding_set)) {
2711                         r = capability_bounding_set_drop(context->capability_bounding_set, false);
2712                         if (r < 0) {
2713                                 *exit_status = EXIT_CAPABILITIES;
2714                                 *error_message = strdup("Failed to drop capabilities");
2715                                 return r;
2716                         }
2717                 }
2718
2719                 /* This is done before enforce_user, but ambient set
2720                  * does not survive over setresuid() if keep_caps is not set. */
2721                 if (context->capability_ambient_set != 0) {
2722                         r = capability_ambient_set_apply(context->capability_ambient_set, true);
2723                         if (r < 0) {
2724                                 *exit_status = EXIT_CAPABILITIES;
2725                                 *error_message = strdup("Failed to apply ambient capabilities (before UID change)");
2726                                 return r;
2727                         }
2728                 }
2729
2730                 if (context->user) {
2731                         r = enforce_user(context, uid);
2732                         if (r < 0) {
2733                                 *exit_status = EXIT_USER;
2734                                 (void) asprintf(error_message, "Failed to change UID to "UID_FMT, uid);
2735                                 return r;
2736                         }
2737                         if (context->capability_ambient_set != 0) {
2738
2739                                 /* Fix the ambient capabilities after user change. */
2740                                 r = capability_ambient_set_apply(context->capability_ambient_set, false);
2741                                 if (r < 0) {
2742                                         *exit_status = EXIT_CAPABILITIES;
2743                                         *error_message = strdup("Failed to apply ambient capabilities (after UID change)");
2744                                         return r;
2745                                 }
2746
2747                                 /* If we were asked to change user and ambient capabilities
2748                                  * were requested, we had to add keep-caps to the securebits
2749                                  * so that we would maintain the inherited capability set
2750                                  * through the setresuid(). Make sure that the bit is added
2751                                  * also to the context secure_bits so that we don't try to
2752                                  * drop the bit away next. */
2753
2754                                 secure_bits |= 1<<SECURE_KEEP_CAPS;
2755                         }
2756                 }
2757
2758                 /* Apply the MAC contexts late, but before seccomp syscall filtering, as those should really be last to
2759                  * influence our own codepaths as little as possible. Moreover, applying MAC contexts usually requires
2760                  * syscalls that are subject to seccomp filtering, hence should probably be applied before the syscalls
2761                  * are restricted. */
2762
2763 #ifdef HAVE_SELINUX
2764                 if (mac_selinux_use()) {
2765                         char *exec_context = mac_selinux_context_net ?: context->selinux_context;
2766
2767                         if (exec_context) {
2768                                 r = setexeccon(exec_context);
2769                                 if (r < 0) {
2770                                         *exit_status = EXIT_SELINUX_CONTEXT;
2771                                         (void) asprintf(error_message, "Failed to set SELinux context to %s", exec_context);
2772                                         return r;
2773                                 }
2774                         }
2775                 }
2776 #endif
2777
2778                 r = setup_smack(context, command);
2779                 if (r < 0) {
2780                         *exit_status = EXIT_SMACK_PROCESS_LABEL;
2781                         *error_message = strdup("Failed to set SMACK process label");
2782                         return r;
2783                 }
2784
2785 #ifdef HAVE_APPARMOR
2786                 if (context->apparmor_profile && mac_apparmor_use()) {
2787                         r = aa_change_onexec(context->apparmor_profile);
2788                         if (r < 0 && !context->apparmor_profile_ignore) {
2789                                 *exit_status = EXIT_APPARMOR_PROFILE;
2790                                 (void) asprintf(error_message,
2791                                                 "Failed to prepare AppArmor profile change to %s",
2792                                                 context->apparmor_profile);
2793                                 return -errno;
2794                         }
2795                 }
2796 #endif
2797
2798                 /* PR_GET_SECUREBITS is not privileged, while
2799                  * PR_SET_SECUREBITS is. So to suppress
2800                  * potential EPERMs we'll try not to call
2801                  * PR_SET_SECUREBITS unless necessary. */
2802                 if (prctl(PR_GET_SECUREBITS) != secure_bits)
2803                         if (prctl(PR_SET_SECUREBITS, secure_bits) < 0) {
2804                                 *exit_status = EXIT_SECUREBITS;
2805                                 *error_message = strdup("Failed to set secure bits");
2806                                 return -errno;
2807                         }
2808
2809                 if (context_has_no_new_privileges(context))
2810                         if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
2811                                 *exit_status = EXIT_NO_NEW_PRIVILEGES;
2812                                 *error_message = strdup("Failed to disable new privileges");
2813                                 return -errno;
2814                         }
2815
2816 #ifdef HAVE_SECCOMP
2817                 r = apply_address_families(unit, context);
2818                 if (r < 0) {
2819                         *exit_status = EXIT_ADDRESS_FAMILIES;
2820                         *error_message = strdup("Failed to restrict address families");
2821                         return r;
2822                 }
2823
2824                 r = apply_memory_deny_write_execute(unit, context);
2825                 if (r < 0) {
2826                         *exit_status = EXIT_SECCOMP;
2827                         *error_message = strdup("Failed to disable writing to executable memory");
2828                         return r;
2829                 }
2830
2831                 r = apply_restrict_realtime(unit, context);
2832                 if (r < 0) {
2833                         *exit_status = EXIT_SECCOMP;
2834                         *error_message = strdup("Failed to apply realtime restrictions");
2835                         return r;
2836                 }
2837
2838                 r = apply_restrict_namespaces(unit, context);
2839                 if (r < 0) {
2840                         *exit_status = EXIT_SECCOMP;
2841                         *error_message = strdup("Failed to apply namespace restrictions");
2842                         return r;
2843                 }
2844
2845                 r = apply_protect_sysctl(unit, context);
2846                 if (r < 0) {
2847                         *exit_status = EXIT_SECCOMP;
2848                         *error_message = strdup("Failed to apply sysctl restrictions");
2849                         return r;
2850                 }
2851
2852                 r = apply_protect_kernel_modules(unit, context);
2853                 if (r < 0) {
2854                         *exit_status = EXIT_SECCOMP;
2855                         *error_message = strdup("Failed to apply module loading restrictions");
2856                         return r;
2857                 }
2858
2859                 r = apply_private_devices(unit, context);
2860                 if (r < 0) {
2861                         *exit_status = EXIT_SECCOMP;
2862                         *error_message = strdup("Failed to set up private devices");
2863                         return r;
2864                 }
2865
2866                 r = apply_syscall_archs(unit, context);
2867                 if (r < 0) {
2868                         *exit_status = EXIT_SECCOMP;
2869                         *error_message = strdup("Failed to apply syscall architecture restrictions");
2870                         return r;
2871                 }
2872
2873                 /* This really should remain the last step before the execve(), to make sure our own code is unaffected
2874                  * by the filter as little as possible. */
2875                 r = apply_syscall_filter(unit, context);
2876                 if (r < 0) {
2877                         *exit_status = EXIT_SECCOMP;
2878                         *error_message = strdup("Failed to apply syscall filters");
2879                         return r;
2880                 }
2881 #endif
2882         }
2883
2884         final_argv = replace_env_argv(argv, accum_env);
2885         if (!final_argv) {
2886                 *exit_status = EXIT_MEMORY;
2887                 *error_message = strdup("Failed to prepare process arguments");
2888                 return -ENOMEM;
2889         }
2890
2891         if (_unlikely_(log_get_max_level() >= LOG_DEBUG)) {
2892                 _cleanup_free_ char *line;
2893
2894                 line = exec_command_line(final_argv);
2895                 if (line) {
2896                         log_open();
2897                         log_struct(LOG_DEBUG,
2898                                    "EXECUTABLE=%s", command->path,
2899                                    LOG_UNIT_MESSAGE(unit, "Executing: %s", line),
2900                                    LOG_UNIT_ID(unit),
2901                                    NULL);
2902                         log_close();
2903                 }
2904         }
2905
2906         execve(command->path, final_argv, accum_env);
2907         *exit_status = EXIT_EXEC;
2908         return -errno;
2909 }
2910
2911 int exec_spawn(Unit *unit,
2912                ExecCommand *command,
2913                const ExecContext *context,
2914                const ExecParameters *params,
2915                ExecRuntime *runtime,
2916                DynamicCreds *dcreds,
2917                pid_t *ret) {
2918
2919         _cleanup_strv_free_ char **files_env = NULL;
2920         int *fds = NULL;
2921         unsigned n_storage_fds = 0, n_socket_fds = 0;
2922         _cleanup_free_ char *line = NULL;
2923         int socket_fd, r;
2924         int named_iofds[3] = { -1, -1, -1 };
2925         char **argv;
2926         pid_t pid;
2927
2928         assert(unit);
2929         assert(command);
2930         assert(context);
2931         assert(ret);
2932         assert(params);
2933         assert(params->fds || (params->n_storage_fds + params->n_socket_fds <= 0));
2934
2935         if (context->std_input == EXEC_INPUT_SOCKET ||
2936             context->std_output == EXEC_OUTPUT_SOCKET ||
2937             context->std_error == EXEC_OUTPUT_SOCKET) {
2938
2939                 if (params->n_socket_fds > 1) {
2940                         log_unit_error(unit, "Got more than one socket.");
2941                         return -EINVAL;
2942                 }
2943
2944                 if (params->n_socket_fds == 0) {
2945                         log_unit_error(unit, "Got no socket.");
2946                         return -EINVAL;
2947                 }
2948
2949                 socket_fd = params->fds[0];
2950         } else {
2951                 socket_fd = -1;
2952                 fds = params->fds;
2953                 n_storage_fds = params->n_storage_fds;
2954                 n_socket_fds = params->n_socket_fds;
2955         }
2956
2957         r = exec_context_named_iofds(unit, context, params, named_iofds);
2958         if (r < 0)
2959                 return log_unit_error_errno(unit, r, "Failed to load a named file descriptor: %m");
2960
2961         r = exec_context_load_environment(unit, context, &files_env);
2962         if (r < 0)
2963                 return log_unit_error_errno(unit, r, "Failed to load environment files: %m");
2964
2965         argv = params->argv ?: command->argv;
2966         line = exec_command_line(argv);
2967         if (!line)
2968                 return log_oom();
2969
2970         log_struct(LOG_DEBUG,
2971                    LOG_UNIT_MESSAGE(unit, "About to execute: %s", line),
2972                    "EXECUTABLE=%s", command->path,
2973                    LOG_UNIT_ID(unit),
2974                    NULL);
2975         pid = fork();
2976         if (pid < 0)
2977                 return log_unit_error_errno(unit, errno, "Failed to fork: %m");
2978
2979         if (pid == 0) {
2980                 int exit_status;
2981                 _cleanup_free_ char *error_message = NULL;
2982
2983                 r = exec_child(unit,
2984                                command,
2985                                context,
2986                                params,
2987                                runtime,
2988                                dcreds,
2989                                argv,
2990                                socket_fd,
2991                                named_iofds,
2992                                fds,
2993                                n_storage_fds,
2994                                n_socket_fds,
2995                                files_env,
2996                                unit->manager->user_lookup_fds[1],
2997                                &exit_status,
2998                                &error_message);
2999                 if (r < 0) {
3000                         log_open();
3001                         if (error_message)
3002                                 log_struct_errno(LOG_ERR, r,
3003                                                  "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
3004                                                  LOG_UNIT_ID(unit),
3005                                                  LOG_UNIT_MESSAGE(unit, "%s: %m",
3006                                                                   error_message),
3007                                                  "EXECUTABLE=%s", command->path,
3008                                                  NULL);
3009                         else if (r == -ENOENT && command->ignore)
3010                                 log_struct_errno(LOG_INFO, r,
3011                                                  "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
3012                                                  LOG_UNIT_ID(unit),
3013                                                  LOG_UNIT_MESSAGE(unit, "Skipped spawning %s: %m",
3014                                                                   command->path),
3015                                                  "EXECUTABLE=%s", command->path,
3016                                                  NULL);
3017                         else
3018                                 log_struct_errno(LOG_ERR, r,
3019                                                  "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
3020                                                  LOG_UNIT_ID(unit),
3021                                                  LOG_UNIT_MESSAGE(unit, "Failed at step %s spawning %s: %m",
3022                                                                   exit_status_to_string(exit_status, EXIT_STATUS_SYSTEMD),
3023                                                                   command->path),
3024                                                  "EXECUTABLE=%s", command->path,
3025                                                  NULL);
3026                 }
3027
3028                 _exit(exit_status);
3029         }
3030
3031         log_unit_debug(unit, "Forked %s as "PID_FMT, command->path, pid);
3032
3033         /* We add the new process to the cgroup both in the child (so
3034          * that we can be sure that no user code is ever executed
3035          * outside of the cgroup) and in the parent (so that we can be
3036          * sure that when we kill the cgroup the process will be
3037          * killed too). */
3038         if (params->cgroup_path)
3039                 (void) cg_attach(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, pid);
3040
3041         exec_status_start(&command->exec_status, pid);
3042
3043         *ret = pid;
3044         return 0;
3045 }
3046
3047 void exec_context_init(ExecContext *c) {
3048         assert(c);
3049
3050         c->umask = 0022;
3051         c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
3052         c->cpu_sched_policy = SCHED_OTHER;
3053         c->syslog_priority = LOG_DAEMON|LOG_INFO;
3054         c->syslog_level_prefix = true;
3055         c->ignore_sigpipe = true;
3056         c->timer_slack_nsec = NSEC_INFINITY;
3057         c->personality = PERSONALITY_INVALID;
3058         c->runtime_directory_mode = 0755;
3059         c->capability_bounding_set = CAP_ALL;
3060         c->restrict_namespaces = NAMESPACE_FLAGS_ALL;
3061 }
3062
3063 void exec_context_done(ExecContext *c) {
3064         unsigned l;
3065
3066         assert(c);
3067
3068         c->environment = strv_free(c->environment);
3069         c->environment_files = strv_free(c->environment_files);
3070         c->pass_environment = strv_free(c->pass_environment);
3071
3072         for (l = 0; l < ELEMENTSOF(c->rlimit); l++)
3073                 c->rlimit[l] = mfree(c->rlimit[l]);
3074
3075         for (l = 0; l < 3; l++)
3076                 c->stdio_fdname[l] = mfree(c->stdio_fdname[l]);
3077
3078         c->working_directory = mfree(c->working_directory);
3079         c->root_directory = mfree(c->root_directory);
3080         c->root_image = mfree(c->root_image);
3081         c->tty_path = mfree(c->tty_path);
3082         c->syslog_identifier = mfree(c->syslog_identifier);
3083         c->user = mfree(c->user);
3084         c->group = mfree(c->group);
3085
3086         c->supplementary_groups = strv_free(c->supplementary_groups);
3087
3088         c->pam_name = mfree(c->pam_name);
3089
3090         c->read_only_paths = strv_free(c->read_only_paths);
3091         c->read_write_paths = strv_free(c->read_write_paths);
3092         c->inaccessible_paths = strv_free(c->inaccessible_paths);
3093
3094         bind_mount_free_many(c->bind_mounts, c->n_bind_mounts);
3095
3096         if (c->cpuset)
3097                 CPU_FREE(c->cpuset);
3098
3099         c->utmp_id = mfree(c->utmp_id);
3100         c->selinux_context = mfree(c->selinux_context);
3101         c->apparmor_profile = mfree(c->apparmor_profile);
3102
3103         c->syscall_filter = set_free(c->syscall_filter);
3104         c->syscall_archs = set_free(c->syscall_archs);
3105         c->address_families = set_free(c->address_families);
3106
3107         c->runtime_directory = strv_free(c->runtime_directory);
3108 }
3109
3110 int exec_context_destroy_runtime_directory(ExecContext *c, const char *runtime_prefix) {
3111         char **i;
3112
3113         assert(c);
3114
3115         if (!runtime_prefix)
3116                 return 0;
3117
3118         STRV_FOREACH(i, c->runtime_directory) {
3119                 _cleanup_free_ char *p;
3120
3121                 p = strjoin(runtime_prefix, "/", *i);
3122                 if (!p)
3123                         return -ENOMEM;
3124
3125                 /* We execute this synchronously, since we need to be
3126                  * sure this is gone when we start the service
3127                  * next. */
3128                 (void) rm_rf(p, REMOVE_ROOT);
3129         }
3130
3131         return 0;
3132 }
3133
3134 void exec_command_done(ExecCommand *c) {
3135         assert(c);
3136
3137         c->path = mfree(c->path);
3138
3139         c->argv = strv_free(c->argv);
3140 }
3141
3142 void exec_command_done_array(ExecCommand *c, unsigned n) {
3143         unsigned i;
3144
3145         for (i = 0; i < n; i++)
3146                 exec_command_done(c+i);
3147 }
3148
3149 ExecCommand* exec_command_free_list(ExecCommand *c) {
3150         ExecCommand *i;
3151
3152         while ((i = c)) {
3153                 LIST_REMOVE(command, c, i);
3154                 exec_command_done(i);
3155                 free(i);
3156         }
3157
3158         return NULL;
3159 }
3160
3161 void exec_command_free_array(ExecCommand **c, unsigned n) {
3162         unsigned i;
3163
3164         for (i = 0; i < n; i++)
3165                 c[i] = exec_command_free_list(c[i]);
3166 }
3167
3168 typedef struct InvalidEnvInfo {
3169         Unit *unit;
3170         const char *path;
3171 } InvalidEnvInfo;
3172
3173 static void invalid_env(const char *p, void *userdata) {
3174         InvalidEnvInfo *info = userdata;
3175
3176         log_unit_error(info->unit, "Ignoring invalid environment assignment '%s': %s", p, info->path);
3177 }
3178
3179 const char* exec_context_fdname(const ExecContext *c, int fd_index) {
3180         assert(c);
3181
3182         switch (fd_index) {
3183         case STDIN_FILENO:
3184                 if (c->std_input != EXEC_INPUT_NAMED_FD)
3185                         return NULL;
3186                 return c->stdio_fdname[STDIN_FILENO] ?: "stdin";
3187         case STDOUT_FILENO:
3188                 if (c->std_output != EXEC_OUTPUT_NAMED_FD)
3189                         return NULL;
3190                 return c->stdio_fdname[STDOUT_FILENO] ?: "stdout";
3191         case STDERR_FILENO:
3192                 if (c->std_error != EXEC_OUTPUT_NAMED_FD)
3193                         return NULL;
3194                 return c->stdio_fdname[STDERR_FILENO] ?: "stderr";
3195         default:
3196                 return NULL;
3197         }
3198 }
3199
3200 int exec_context_named_iofds(Unit *unit, const ExecContext *c, const ExecParameters *p, int named_iofds[3]) {
3201         unsigned i, targets;
3202         const char* stdio_fdname[3];
3203         unsigned n_fds;
3204
3205         assert(c);
3206         assert(p);
3207
3208         targets = (c->std_input == EXEC_INPUT_NAMED_FD) +
3209                   (c->std_output == EXEC_OUTPUT_NAMED_FD) +
3210                   (c->std_error == EXEC_OUTPUT_NAMED_FD);
3211
3212         for (i = 0; i < 3; i++)
3213                 stdio_fdname[i] = exec_context_fdname(c, i);
3214
3215         n_fds = p->n_storage_fds + p->n_socket_fds;
3216
3217         for (i = 0; i < n_fds  && targets > 0; i++)
3218                 if (named_iofds[STDIN_FILENO] < 0 &&
3219                     c->std_input == EXEC_INPUT_NAMED_FD &&
3220                     stdio_fdname[STDIN_FILENO] &&
3221                     streq(p->fd_names[i], stdio_fdname[STDIN_FILENO])) {
3222
3223                         named_iofds[STDIN_FILENO] = p->fds[i];
3224                         targets--;
3225
3226                 } else if (named_iofds[STDOUT_FILENO] < 0 &&
3227                            c->std_output == EXEC_OUTPUT_NAMED_FD &&
3228                            stdio_fdname[STDOUT_FILENO] &&
3229                            streq(p->fd_names[i], stdio_fdname[STDOUT_FILENO])) {
3230
3231                         named_iofds[STDOUT_FILENO] = p->fds[i];
3232                         targets--;
3233
3234                 } else if (named_iofds[STDERR_FILENO] < 0 &&
3235                            c->std_error == EXEC_OUTPUT_NAMED_FD &&
3236                            stdio_fdname[STDERR_FILENO] &&
3237                            streq(p->fd_names[i], stdio_fdname[STDERR_FILENO])) {
3238
3239                         named_iofds[STDERR_FILENO] = p->fds[i];
3240                         targets--;
3241                 }
3242
3243         return targets == 0 ? 0 : -ENOENT;
3244 }
3245
3246 int exec_context_load_environment(Unit *unit, const ExecContext *c, char ***l) {
3247         char **i, **r = NULL;
3248
3249         assert(c);
3250         assert(l);
3251
3252         STRV_FOREACH(i, c->environment_files) {
3253                 char *fn;
3254                 int k;
3255                 unsigned n;
3256                 bool ignore = false;
3257                 char **p;
3258                 _cleanup_globfree_ glob_t pglob = {};
3259
3260                 fn = *i;
3261
3262                 if (fn[0] == '-') {
3263                         ignore = true;
3264                         fn++;
3265                 }
3266
3267                 if (!path_is_absolute(fn)) {
3268                         if (ignore)
3269                                 continue;
3270
3271                         strv_free(r);
3272                         return -EINVAL;
3273                 }
3274
3275                 /* Filename supports globbing, take all matching files */
3276                 k = safe_glob(fn, 0, &pglob);
3277                 if (k < 0) {
3278                         if (ignore)
3279                                 continue;
3280
3281                         strv_free(r);
3282                         return k;
3283                 }
3284
3285                 /* When we don't match anything, -ENOENT should be returned */
3286                 assert(pglob.gl_pathc > 0);
3287
3288                 for (n = 0; n < pglob.gl_pathc; n++) {
3289                         k = load_env_file(NULL, pglob.gl_pathv[n], NULL, &p);
3290                         if (k < 0) {
3291                                 if (ignore)
3292                                         continue;
3293
3294                                 strv_free(r);
3295                                 return k;
3296                         }
3297                         /* Log invalid environment variables with filename */
3298                         if (p) {
3299                                 InvalidEnvInfo info = {
3300                                         .unit = unit,
3301                                         .path = pglob.gl_pathv[n]
3302                                 };
3303
3304                                 p = strv_env_clean_with_callback(p, invalid_env, &info);
3305                         }
3306
3307                         if (r == NULL)
3308                                 r = p;
3309                         else {
3310                                 char **m;
3311
3312                                 m = strv_env_merge(2, r, p);
3313                                 strv_free(r);
3314                                 strv_free(p);
3315                                 if (!m)
3316                                         return -ENOMEM;
3317
3318                                 r = m;
3319                         }
3320                 }
3321         }
3322
3323         *l = r;
3324
3325         return 0;
3326 }
3327
3328 static bool tty_may_match_dev_console(const char *tty) {
3329         _cleanup_free_ char *active = NULL;
3330         char *console;
3331
3332         if (!tty)
3333                 return true;
3334
3335         if (startswith(tty, "/dev/"))
3336                 tty += 5;
3337
3338         /* trivial identity? */
3339         if (streq(tty, "console"))
3340                 return true;
3341
3342         console = resolve_dev_console(&active);
3343         /* if we could not resolve, assume it may */
3344         if (!console)
3345                 return true;
3346
3347         /* "tty0" means the active VC, so it may be the same sometimes */
3348         return streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
3349 }
3350
3351 bool exec_context_may_touch_console(ExecContext *ec) {
3352
3353         return (ec->tty_reset ||
3354                 ec->tty_vhangup ||
3355                 ec->tty_vt_disallocate ||
3356                 is_terminal_input(ec->std_input) ||
3357                 is_terminal_output(ec->std_output) ||
3358                 is_terminal_output(ec->std_error)) &&
3359                tty_may_match_dev_console(exec_context_tty_path(ec));
3360 }
3361
3362 static void strv_fprintf(FILE *f, char **l) {
3363         char **g;
3364
3365         assert(f);
3366
3367         STRV_FOREACH(g, l)
3368                 fprintf(f, " %s", *g);
3369 }
3370
3371 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
3372         char **e, **d;
3373         unsigned i;
3374         int r;
3375
3376         assert(c);
3377         assert(f);
3378
3379         prefix = strempty(prefix);
3380
3381         fprintf(f,
3382                 "%sUMask: %04o\n"
3383                 "%sWorkingDirectory: %s\n"
3384                 "%sRootDirectory: %s\n"
3385                 "%sNonBlocking: %s\n"
3386                 "%sPrivateTmp: %s\n"
3387                 "%sPrivateDevices: %s\n"
3388                 "%sProtectKernelTunables: %s\n"
3389                 "%sProtectKernelModules: %s\n"
3390                 "%sProtectControlGroups: %s\n"
3391                 "%sPrivateNetwork: %s\n"
3392                 "%sPrivateUsers: %s\n"
3393                 "%sProtectHome: %s\n"
3394                 "%sProtectSystem: %s\n"
3395                 "%sMountAPIVFS: %s\n"
3396                 "%sIgnoreSIGPIPE: %s\n"
3397                 "%sMemoryDenyWriteExecute: %s\n"
3398                 "%sRestrictRealtime: %s\n",
3399                 prefix, c->umask,
3400                 prefix, c->working_directory ? c->working_directory : "/",
3401                 prefix, c->root_directory ? c->root_directory : "/",
3402                 prefix, yes_no(c->non_blocking),
3403                 prefix, yes_no(c->private_tmp),
3404                 prefix, yes_no(c->private_devices),
3405                 prefix, yes_no(c->protect_kernel_tunables),
3406                 prefix, yes_no(c->protect_kernel_modules),
3407                 prefix, yes_no(c->protect_control_groups),
3408                 prefix, yes_no(c->private_network),
3409                 prefix, yes_no(c->private_users),
3410                 prefix, protect_home_to_string(c->protect_home),
3411                 prefix, protect_system_to_string(c->protect_system),
3412                 prefix, yes_no(c->mount_apivfs),
3413                 prefix, yes_no(c->ignore_sigpipe),
3414                 prefix, yes_no(c->memory_deny_write_execute),
3415                 prefix, yes_no(c->restrict_realtime));
3416
3417         if (c->root_image)
3418                 fprintf(f, "%sRootImage: %s\n", prefix, c->root_image);
3419
3420         STRV_FOREACH(e, c->environment)
3421                 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
3422
3423         STRV_FOREACH(e, c->environment_files)
3424                 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
3425
3426         STRV_FOREACH(e, c->pass_environment)
3427                 fprintf(f, "%sPassEnvironment: %s\n", prefix, *e);
3428
3429         fprintf(f, "%sRuntimeDirectoryMode: %04o\n", prefix, c->runtime_directory_mode);
3430
3431         STRV_FOREACH(d, c->runtime_directory)
3432                 fprintf(f, "%sRuntimeDirectory: %s\n", prefix, *d);
3433
3434         if (c->nice_set)
3435                 fprintf(f,
3436                         "%sNice: %i\n",
3437                         prefix, c->nice);
3438
3439         if (c->oom_score_adjust_set)
3440                 fprintf(f,
3441                         "%sOOMScoreAdjust: %i\n",
3442                         prefix, c->oom_score_adjust);
3443
3444         for (i = 0; i < RLIM_NLIMITS; i++)
3445                 if (c->rlimit[i]) {
3446                         fprintf(f, "%s%s: " RLIM_FMT "\n",
3447                                 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
3448                         fprintf(f, "%s%sSoft: " RLIM_FMT "\n",
3449                                 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_cur);
3450                 }
3451
3452         if (c->ioprio_set) {
3453                 _cleanup_free_ char *class_str = NULL;
3454
3455                 ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
3456                 fprintf(f,
3457                         "%sIOSchedulingClass: %s\n"
3458                         "%sIOPriority: %i\n",
3459                         prefix, strna(class_str),
3460                         prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
3461         }
3462
3463         if (c->cpu_sched_set) {
3464                 _cleanup_free_ char *policy_str = NULL;
3465
3466                 sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
3467                 fprintf(f,
3468                         "%sCPUSchedulingPolicy: %s\n"
3469                         "%sCPUSchedulingPriority: %i\n"
3470                         "%sCPUSchedulingResetOnFork: %s\n",
3471                         prefix, strna(policy_str),
3472                         prefix, c->cpu_sched_priority,
3473                         prefix, yes_no(c->cpu_sched_reset_on_fork));
3474         }
3475
3476         if (c->cpuset) {
3477                 fprintf(f, "%sCPUAffinity:", prefix);
3478                 for (i = 0; i < c->cpuset_ncpus; i++)
3479                         if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
3480                                 fprintf(f, " %u", i);
3481                 fputs("\n", f);
3482         }
3483
3484         if (c->timer_slack_nsec != NSEC_INFINITY)
3485                 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
3486
3487         fprintf(f,
3488                 "%sStandardInput: %s\n"
3489                 "%sStandardOutput: %s\n"
3490                 "%sStandardError: %s\n",
3491                 prefix, exec_input_to_string(c->std_input),
3492                 prefix, exec_output_to_string(c->std_output),
3493                 prefix, exec_output_to_string(c->std_error));
3494
3495         if (c->tty_path)
3496                 fprintf(f,
3497                         "%sTTYPath: %s\n"
3498                         "%sTTYReset: %s\n"
3499                         "%sTTYVHangup: %s\n"
3500                         "%sTTYVTDisallocate: %s\n",
3501                         prefix, c->tty_path,
3502                         prefix, yes_no(c->tty_reset),
3503                         prefix, yes_no(c->tty_vhangup),
3504                         prefix, yes_no(c->tty_vt_disallocate));
3505
3506         if (c->std_output == EXEC_OUTPUT_SYSLOG ||
3507             c->std_output == EXEC_OUTPUT_KMSG ||
3508             c->std_output == EXEC_OUTPUT_JOURNAL ||
3509             c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
3510             c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
3511             c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
3512             c->std_error == EXEC_OUTPUT_SYSLOG ||
3513             c->std_error == EXEC_OUTPUT_KMSG ||
3514             c->std_error == EXEC_OUTPUT_JOURNAL ||
3515             c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
3516             c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
3517             c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
3518
3519                 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
3520
3521                 log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
3522                 log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
3523
3524                 fprintf(f,
3525                         "%sSyslogFacility: %s\n"
3526                         "%sSyslogLevel: %s\n",
3527                         prefix, strna(fac_str),
3528                         prefix, strna(lvl_str));
3529         }
3530
3531         if (c->secure_bits)
3532                 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
3533                         prefix,
3534                         (c->secure_bits & 1<<SECURE_KEEP_CAPS) ? " keep-caps" : "",
3535                         (c->secure_bits & 1<<SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
3536                         (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
3537                         (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
3538                         (c->secure_bits & 1<<SECURE_NOROOT) ? " noroot" : "",
3539                         (c->secure_bits & 1<<SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
3540
3541         if (c->capability_bounding_set != CAP_ALL) {
3542                 unsigned long l;
3543                 fprintf(f, "%sCapabilityBoundingSet:", prefix);
3544
3545                 for (l = 0; l <= cap_last_cap(); l++)
3546                         if (c->capability_bounding_set & (UINT64_C(1) << l))
3547                                 fprintf(f, " %s", strna(capability_to_name(l)));
3548
3549                 fputs("\n", f);
3550         }
3551
3552         if (c->capability_ambient_set != 0) {
3553                 unsigned long l;
3554                 fprintf(f, "%sAmbientCapabilities:", prefix);
3555
3556                 for (l = 0; l <= cap_last_cap(); l++)
3557                         if (c->capability_ambient_set & (UINT64_C(1) << l))
3558                                 fprintf(f, " %s", strna(capability_to_name(l)));
3559
3560                 fputs("\n", f);
3561         }
3562
3563         if (c->user)
3564                 fprintf(f, "%sUser: %s\n", prefix, c->user);
3565         if (c->group)
3566                 fprintf(f, "%sGroup: %s\n", prefix, c->group);
3567
3568         fprintf(f, "%sDynamicUser: %s\n", prefix, yes_no(c->dynamic_user));
3569
3570         if (strv_length(c->supplementary_groups) > 0) {
3571                 fprintf(f, "%sSupplementaryGroups:", prefix);
3572                 strv_fprintf(f, c->supplementary_groups);
3573                 fputs("\n", f);
3574         }
3575
3576         if (c->pam_name)
3577                 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
3578
3579         if (strv_length(c->read_write_paths) > 0) {
3580                 fprintf(f, "%sReadWritePaths:", prefix);
3581                 strv_fprintf(f, c->read_write_paths);
3582                 fputs("\n", f);
3583         }
3584
3585         if (strv_length(c->read_only_paths) > 0) {
3586                 fprintf(f, "%sReadOnlyPaths:", prefix);
3587                 strv_fprintf(f, c->read_only_paths);
3588                 fputs("\n", f);
3589         }
3590
3591         if (strv_length(c->inaccessible_paths) > 0) {
3592                 fprintf(f, "%sInaccessiblePaths:", prefix);
3593                 strv_fprintf(f, c->inaccessible_paths);
3594                 fputs("\n", f);
3595         }
3596
3597         if (c->n_bind_mounts > 0)
3598                 for (i = 0; i < c->n_bind_mounts; i++) {
3599                         fprintf(f, "%s%s: %s:%s:%s\n", prefix,
3600                                 c->bind_mounts[i].read_only ? "BindReadOnlyPaths" : "BindPaths",
3601                                 c->bind_mounts[i].source,
3602                                 c->bind_mounts[i].destination,
3603                                 c->bind_mounts[i].recursive ? "rbind" : "norbind");
3604                 }
3605
3606         if (c->utmp_id)
3607                 fprintf(f,
3608                         "%sUtmpIdentifier: %s\n",
3609                         prefix, c->utmp_id);
3610
3611         if (c->selinux_context)
3612                 fprintf(f,
3613                         "%sSELinuxContext: %s%s\n",
3614                         prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
3615
3616         if (c->personality != PERSONALITY_INVALID)
3617                 fprintf(f,
3618                         "%sPersonality: %s\n",
3619                         prefix, strna(personality_to_string(c->personality)));
3620
3621         if (c->syscall_filter) {
3622 #ifdef HAVE_SECCOMP
3623                 Iterator j;
3624                 void *id;
3625                 bool first = true;
3626 #endif
3627
3628                 fprintf(f,
3629                         "%sSystemCallFilter: ",
3630                         prefix);
3631
3632                 if (!c->syscall_whitelist)
3633                         fputc('~', f);
3634
3635 #ifdef HAVE_SECCOMP
3636                 SET_FOREACH(id, c->syscall_filter, j) {
3637                         _cleanup_free_ char *name = NULL;
3638
3639                         if (first)
3640                                 first = false;
3641                         else
3642                                 fputc(' ', f);
3643
3644                         name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
3645                         fputs(strna(name), f);
3646                 }
3647 #endif
3648
3649                 fputc('\n', f);
3650         }
3651
3652         if (c->syscall_archs) {
3653 #ifdef HAVE_SECCOMP
3654                 Iterator j;
3655                 void *id;
3656 #endif
3657
3658                 fprintf(f,
3659                         "%sSystemCallArchitectures:",
3660                         prefix);
3661
3662 #ifdef HAVE_SECCOMP
3663                 SET_FOREACH(id, c->syscall_archs, j)
3664                         fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
3665 #endif
3666                 fputc('\n', f);
3667         }
3668
3669         if (exec_context_restrict_namespaces_set(c)) {
3670                 _cleanup_free_ char *s = NULL;
3671
3672                 r = namespace_flag_to_string_many(c->restrict_namespaces, &s);
3673                 if (r >= 0)
3674                         fprintf(f, "%sRestrictNamespaces: %s\n",
3675                                 prefix, s);
3676         }
3677
3678         if (c->syscall_errno > 0)
3679                 fprintf(f,
3680                         "%sSystemCallErrorNumber: %s\n",
3681                         prefix, strna(errno_to_name(c->syscall_errno)));
3682
3683         if (c->apparmor_profile)
3684                 fprintf(f,
3685                         "%sAppArmorProfile: %s%s\n",
3686                         prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
3687 }
3688
3689 bool exec_context_maintains_privileges(ExecContext *c) {
3690         assert(c);
3691
3692         /* Returns true if the process forked off would run under
3693          * an unchanged UID or as root. */
3694
3695         if (!c->user)
3696                 return true;
3697
3698         if (streq(c->user, "root") || streq(c->user, "0"))
3699                 return true;
3700
3701         return false;
3702 }
3703
3704 int exec_context_get_effective_ioprio(ExecContext *c) {
3705         int p;
3706
3707         assert(c);
3708
3709         if (c->ioprio_set)
3710                 return c->ioprio;
3711
3712         p = ioprio_get(IOPRIO_WHO_PROCESS, 0);
3713         if (p < 0)
3714                 return IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 4);
3715
3716         return p;
3717 }
3718
3719 void exec_status_start(ExecStatus *s, pid_t pid) {
3720         assert(s);
3721
3722         zero(*s);
3723         s->pid = pid;
3724         dual_timestamp_get(&s->start_timestamp);
3725 }
3726
3727 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
3728         assert(s);
3729
3730         if (s->pid && s->pid != pid)
3731                 zero(*s);
3732
3733         s->pid = pid;
3734         dual_timestamp_get(&s->exit_timestamp);
3735
3736         s->code = code;
3737         s->status = status;
3738
3739         if (context) {
3740                 if (context->utmp_id)
3741                         utmp_put_dead_process(context->utmp_id, pid, code, status);
3742
3743                 exec_context_tty_reset(context, NULL);
3744         }
3745 }
3746
3747 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
3748         char buf[FORMAT_TIMESTAMP_MAX];
3749
3750         assert(s);
3751         assert(f);
3752
3753         if (s->pid <= 0)
3754                 return;
3755
3756         prefix = strempty(prefix);
3757
3758         fprintf(f,
3759                 "%sPID: "PID_FMT"\n",
3760                 prefix, s->pid);
3761
3762         if (dual_timestamp_is_set(&s->start_timestamp))
3763                 fprintf(f,
3764                         "%sStart Timestamp: %s\n",
3765                         prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
3766
3767         if (dual_timestamp_is_set(&s->exit_timestamp))
3768                 fprintf(f,
3769                         "%sExit Timestamp: %s\n"
3770                         "%sExit Code: %s\n"
3771                         "%sExit Status: %i\n",
3772                         prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
3773                         prefix, sigchld_code_to_string(s->code),
3774                         prefix, s->status);
3775 }
3776
3777 char *exec_command_line(char **argv) {
3778         size_t k;
3779         char *n, *p, **a;
3780         bool first = true;
3781
3782         assert(argv);
3783
3784         k = 1;
3785         STRV_FOREACH(a, argv)
3786                 k += strlen(*a)+3;
3787
3788         n = new(char, k);
3789         if (!n)
3790                 return NULL;
3791
3792         p = n;
3793         STRV_FOREACH(a, argv) {
3794
3795                 if (!first)
3796                         *(p++) = ' ';
3797                 else
3798                         first = false;
3799
3800                 if (strpbrk(*a, WHITESPACE)) {
3801                         *(p++) = '\'';
3802                         p = stpcpy(p, *a);
3803                         *(p++) = '\'';
3804                 } else
3805                         p = stpcpy(p, *a);
3806
3807         }
3808
3809         *p = 0;
3810
3811         /* FIXME: this doesn't really handle arguments that have
3812          * spaces and ticks in them */
3813
3814         return n;
3815 }
3816
3817 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
3818         _cleanup_free_ char *cmd = NULL;
3819         const char *prefix2;
3820
3821         assert(c);
3822         assert(f);
3823
3824         prefix = strempty(prefix);
3825         prefix2 = strjoina(prefix, "\t");
3826
3827         cmd = exec_command_line(c->argv);
3828         fprintf(f,
3829                 "%sCommand Line: %s\n",
3830                 prefix, cmd ? cmd : strerror(ENOMEM));
3831
3832         exec_status_dump(&c->exec_status, f, prefix2);
3833 }
3834
3835 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
3836         assert(f);
3837
3838         prefix = strempty(prefix);
3839
3840         LIST_FOREACH(command, c, c)
3841                 exec_command_dump(c, f, prefix);
3842 }
3843
3844 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
3845         ExecCommand *end;
3846
3847         assert(l);
3848         assert(e);
3849
3850         if (*l) {
3851                 /* It's kind of important, that we keep the order here */
3852                 LIST_FIND_TAIL(command, *l, end);
3853                 LIST_INSERT_AFTER(command, *l, end, e);
3854         } else
3855               *l = e;
3856 }
3857
3858 int exec_command_set(ExecCommand *c, const char *path, ...) {
3859         va_list ap;
3860         char **l, *p;
3861
3862         assert(c);
3863         assert(path);
3864
3865         va_start(ap, path);
3866         l = strv_new_ap(path, ap);
3867         va_end(ap);
3868
3869         if (!l)
3870                 return -ENOMEM;
3871
3872         p = strdup(path);
3873         if (!p) {
3874                 strv_free(l);
3875                 return -ENOMEM;
3876         }
3877
3878         free(c->path);
3879         c->path = p;
3880
3881         strv_free(c->argv);
3882         c->argv = l;
3883
3884         return 0;
3885 }
3886
3887 int exec_command_append(ExecCommand *c, const char *path, ...) {
3888         _cleanup_strv_free_ char **l = NULL;
3889         va_list ap;
3890         int r;
3891
3892         assert(c);
3893         assert(path);
3894
3895         va_start(ap, path);
3896         l = strv_new_ap(path, ap);
3897         va_end(ap);
3898
3899         if (!l)
3900                 return -ENOMEM;
3901
3902         r = strv_extend_strv(&c->argv, l, false);
3903         if (r < 0)
3904                 return r;
3905
3906         return 0;
3907 }
3908
3909
3910 static int exec_runtime_allocate(ExecRuntime **rt) {
3911
3912         if (*rt)
3913                 return 0;
3914
3915         *rt = new0(ExecRuntime, 1);
3916         if (!*rt)
3917                 return -ENOMEM;
3918
3919         (*rt)->n_ref = 1;
3920         (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
3921
3922         return 0;
3923 }
3924
3925 int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) {
3926         int r;
3927
3928         assert(rt);
3929         assert(c);
3930         assert(id);
3931
3932         if (*rt)
3933                 return 1;
3934
3935         if (!c->private_network && !c->private_tmp)
3936                 return 0;
3937
3938         r = exec_runtime_allocate(rt);
3939         if (r < 0)
3940                 return r;
3941
3942         if (c->private_network && (*rt)->netns_storage_socket[0] < 0) {
3943                 if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, (*rt)->netns_storage_socket) < 0)
3944                         return -errno;
3945         }
3946
3947         if (c->private_tmp && !(*rt)->tmp_dir) {
3948                 r = setup_tmp_dirs(id, &(*rt)->tmp_dir, &(*rt)->var_tmp_dir);
3949                 if (r < 0)
3950                         return r;
3951         }
3952
3953         return 1;
3954 }
3955
3956 ExecRuntime *exec_runtime_ref(ExecRuntime *r) {
3957         assert(r);
3958         assert(r->n_ref > 0);
3959
3960         r->n_ref++;
3961         return r;
3962 }
3963
3964 ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
3965
3966         if (!r)
3967                 return NULL;
3968
3969         assert(r->n_ref > 0);
3970
3971         r->n_ref--;
3972         if (r->n_ref > 0)
3973                 return NULL;
3974
3975         free(r->tmp_dir);
3976         free(r->var_tmp_dir);
3977         safe_close_pair(r->netns_storage_socket);
3978         return mfree(r);
3979 }
3980
3981 int exec_runtime_serialize(Unit *u, ExecRuntime *rt, FILE *f, FDSet *fds) {
3982         assert(u);
3983         assert(f);
3984         assert(fds);
3985
3986         if (!rt)
3987                 return 0;
3988
3989         if (rt->tmp_dir)
3990                 unit_serialize_item(u, f, "tmp-dir", rt->tmp_dir);
3991
3992         if (rt->var_tmp_dir)
3993                 unit_serialize_item(u, f, "var-tmp-dir", rt->var_tmp_dir);
3994
3995         if (rt->netns_storage_socket[0] >= 0) {
3996                 int copy;
3997
3998                 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
3999                 if (copy < 0)
4000                         return copy;
4001
4002                 unit_serialize_item_format(u, f, "netns-socket-0", "%i", copy);
4003         }
4004
4005         if (rt->netns_storage_socket[1] >= 0) {
4006                 int copy;
4007
4008                 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
4009                 if (copy < 0)
4010                         return copy;
4011
4012                 unit_serialize_item_format(u, f, "netns-socket-1", "%i", copy);
4013         }
4014
4015         return 0;
4016 }
4017
4018 int exec_runtime_deserialize_item(Unit *u, ExecRuntime **rt, const char *key, const char *value, FDSet *fds) {
4019         int r;
4020
4021         assert(rt);
4022         assert(key);
4023         assert(value);
4024
4025         if (streq(key, "tmp-dir")) {
4026                 char *copy;
4027
4028                 r = exec_runtime_allocate(rt);
4029                 if (r < 0)
4030                         return log_oom();
4031
4032                 copy = strdup(value);
4033                 if (!copy)
4034                         return log_oom();
4035
4036                 free((*rt)->tmp_dir);
4037                 (*rt)->tmp_dir = copy;
4038
4039         } else if (streq(key, "var-tmp-dir")) {
4040                 char *copy;
4041
4042                 r = exec_runtime_allocate(rt);
4043                 if (r < 0)
4044                         return log_oom();
4045
4046                 copy = strdup(value);
4047                 if (!copy)
4048                         return log_oom();
4049
4050                 free((*rt)->var_tmp_dir);
4051                 (*rt)->var_tmp_dir = copy;
4052
4053         } else if (streq(key, "netns-socket-0")) {
4054                 int fd;
4055
4056                 r = exec_runtime_allocate(rt);
4057                 if (r < 0)
4058                         return log_oom();
4059
4060                 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
4061                         log_unit_debug(u, "Failed to parse netns socket value: %s", value);
4062                 else {
4063                         safe_close((*rt)->netns_storage_socket[0]);
4064                         (*rt)->netns_storage_socket[0] = fdset_remove(fds, fd);
4065                 }
4066         } else if (streq(key, "netns-socket-1")) {
4067                 int fd;
4068
4069                 r = exec_runtime_allocate(rt);
4070                 if (r < 0)
4071                         return log_oom();
4072
4073                 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
4074                         log_unit_debug(u, "Failed to parse netns socket value: %s", value);
4075                 else {
4076                         safe_close((*rt)->netns_storage_socket[1]);
4077                         (*rt)->netns_storage_socket[1] = fdset_remove(fds, fd);
4078                 }
4079         } else
4080                 return 0;
4081
4082         return 1;
4083 }
4084
4085 static void *remove_tmpdir_thread(void *p) {
4086         _cleanup_free_ char *path = p;
4087
4088         (void) rm_rf(path, REMOVE_ROOT|REMOVE_PHYSICAL);
4089         return NULL;
4090 }
4091
4092 void exec_runtime_destroy(ExecRuntime *rt) {
4093         int r;
4094
4095         if (!rt)
4096                 return;
4097
4098         /* If there are multiple users of this, let's leave the stuff around */
4099         if (rt->n_ref > 1)
4100                 return;
4101
4102         if (rt->tmp_dir) {
4103                 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
4104
4105                 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
4106                 if (r < 0) {
4107                         log_warning_errno(r, "Failed to nuke %s: %m", rt->tmp_dir);
4108                         free(rt->tmp_dir);
4109                 }
4110
4111                 rt->tmp_dir = NULL;
4112         }
4113
4114         if (rt->var_tmp_dir) {
4115                 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
4116
4117                 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
4118                 if (r < 0) {
4119                         log_warning_errno(r, "Failed to nuke %s: %m", rt->var_tmp_dir);
4120                         free(rt->var_tmp_dir);
4121                 }
4122
4123                 rt->var_tmp_dir = NULL;
4124         }
4125
4126         safe_close_pair(rt->netns_storage_socket);
4127 }
4128
4129 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
4130         [EXEC_INPUT_NULL] = "null",
4131         [EXEC_INPUT_TTY] = "tty",
4132         [EXEC_INPUT_TTY_FORCE] = "tty-force",
4133         [EXEC_INPUT_TTY_FAIL] = "tty-fail",
4134         [EXEC_INPUT_SOCKET] = "socket",
4135         [EXEC_INPUT_NAMED_FD] = "fd",
4136 };
4137
4138 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
4139
4140 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
4141         [EXEC_OUTPUT_INHERIT] = "inherit",
4142         [EXEC_OUTPUT_NULL] = "null",
4143         [EXEC_OUTPUT_TTY] = "tty",
4144         [EXEC_OUTPUT_SYSLOG] = "syslog",
4145         [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
4146         [EXEC_OUTPUT_KMSG] = "kmsg",
4147         [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
4148         [EXEC_OUTPUT_JOURNAL] = "journal",
4149         [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
4150         [EXEC_OUTPUT_SOCKET] = "socket",
4151         [EXEC_OUTPUT_NAMED_FD] = "fd",
4152 };
4153
4154 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);
4155
4156 static const char* const exec_utmp_mode_table[_EXEC_UTMP_MODE_MAX] = {
4157         [EXEC_UTMP_INIT] = "init",
4158         [EXEC_UTMP_LOGIN] = "login",
4159         [EXEC_UTMP_USER] = "user",
4160 };
4161
4162 DEFINE_STRING_TABLE_LOOKUP(exec_utmp_mode, ExecUtmpMode);