]> git.proxmox.com Git - systemd.git/blame - src/core/execute.c
New upstream version 245.7
[systemd.git] / src / core / execute.c
CommitLineData
52ad194e 1/* SPDX-License-Identifier: LGPL-2.1+ */
663996b3 2
663996b3
MS
3#include <errno.h>
4#include <fcntl.h>
6300502b 5#include <poll.h>
8a584da2 6#include <sys/eventfd.h>
e1f67bc7 7#include <sys/ioctl.h>
5a920b42 8#include <sys/mman.h>
6300502b 9#include <sys/personality.h>
663996b3 10#include <sys/prctl.h>
8a584da2 11#include <sys/shm.h>
8a584da2 12#include <sys/types.h>
6300502b
MP
13#include <sys/un.h>
14#include <unistd.h>
13d276d0 15#include <utmpx.h>
663996b3 16
f5e65279 17#if HAVE_PAM
663996b3
MS
18#include <security/pam_appl.h>
19#endif
20
f5e65279 21#if HAVE_SELINUX
60f067b4
JS
22#include <selinux/selinux.h>
23#endif
24
f5e65279 25#if HAVE_SECCOMP
60f067b4
JS
26#include <seccomp.h>
27#endif
28
f5e65279 29#if HAVE_APPARMOR
60f067b4
JS
30#include <sys/apparmor.h>
31#endif
32
86f210e9 33#include "sd-messages.h"
6300502b
MP
34
35#include "af-list.h"
db2df898 36#include "alloc-util.h"
f5e65279 37#if HAVE_APPARMOR
db2df898
MP
38#include "apparmor-util.h"
39#endif
6300502b
MP
40#include "async.h"
41#include "barrier.h"
6300502b 42#include "cap-list.h"
db2df898 43#include "capability-util.h"
f5e65279 44#include "chown-recursive.h"
e1f67bc7 45#include "cgroup-setup.h"
52ad194e 46#include "cpu-set-util.h"
663996b3 47#include "def.h"
6e866b33 48#include "env-file.h"
663996b3 49#include "env-util.h"
60f067b4 50#include "errno-list.h"
db2df898 51#include "execute.h"
6300502b 52#include "exit-status.h"
db2df898 53#include "fd-util.h"
2897b343 54#include "format-util.h"
db2df898
MP
55#include "fs-util.h"
56#include "glob-util.h"
57#include "io-util.h"
6300502b 58#include "ioprio.h"
f5e65279 59#include "label.h"
6300502b
MP
60#include "log.h"
61#include "macro.h"
98393f85 62#include "manager.h"
bb4f798a 63#include "memory-util.h"
e1f67bc7 64#include "missing_fs.h"
6300502b
MP
65#include "mkdir.h"
66#include "namespace.h"
db2df898 67#include "parse-util.h"
6300502b 68#include "path-util.h"
e3bff60a 69#include "process-util.h"
db2df898 70#include "rlimit-util.h"
6300502b 71#include "rm-rf.h"
f5e65279 72#if HAVE_SECCOMP
db2df898
MP
73#include "seccomp-util.h"
74#endif
f5e65279 75#include "securebits-util.h"
6300502b 76#include "selinux-util.h"
86f210e9 77#include "signal-util.h"
6300502b 78#include "smack-util.h"
b012e921 79#include "socket-util.h"
8a584da2 80#include "special.h"
52ad194e 81#include "stat-util.h"
db2df898
MP
82#include "string-table.h"
83#include "string-util.h"
6300502b 84#include "strv.h"
db2df898 85#include "syslog-util.h"
6300502b 86#include "terminal-util.h"
6e866b33 87#include "umask-util.h"
6300502b 88#include "unit.h"
db2df898 89#include "user-util.h"
6300502b 90#include "utmp-wtmp.h"
e3bff60a 91
663996b3 92#define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
14228c0d 93#define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
663996b3 94
60f067b4
JS
95#define SNDBUF_SIZE (8*1024*1024)
96
b012e921 97static int shift_fds(int fds[], size_t n_fds) {
663996b3
MS
98 int start, restart_from;
99
100 if (n_fds <= 0)
101 return 0;
102
103 /* Modifies the fds array! (sorts it) */
104
105 assert(fds);
106
107 start = 0;
108 for (;;) {
109 int i;
110
111 restart_from = -1;
112
113 for (i = start; i < (int) n_fds; i++) {
114 int nfd;
115
116 /* Already at right index? */
117 if (fds[i] == i+3)
118 continue;
119
6300502b
MP
120 nfd = fcntl(fds[i], F_DUPFD, i + 3);
121 if (nfd < 0)
663996b3
MS
122 return -errno;
123
60f067b4 124 safe_close(fds[i]);
663996b3
MS
125 fds[i] = nfd;
126
127 /* Hmm, the fd we wanted isn't free? Then
e735f4d4 128 * let's remember that and try again from here */
663996b3
MS
129 if (nfd != i+3 && restart_from < 0)
130 restart_from = i;
131 }
132
133 if (restart_from < 0)
134 break;
135
136 start = restart_from;
137 }
138
139 return 0;
140}
141
6e866b33 142static int flags_fds(const int fds[], size_t n_socket_fds, size_t n_storage_fds, bool nonblock) {
b012e921 143 size_t i, n_fds;
663996b3
MS
144 int r;
145
6e866b33 146 n_fds = n_socket_fds + n_storage_fds;
663996b3
MS
147 if (n_fds <= 0)
148 return 0;
149
150 assert(fds);
151
81c58355
MB
152 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags.
153 * O_NONBLOCK only applies to socket activation though. */
663996b3
MS
154
155 for (i = 0; i < n_fds; i++) {
156
81c58355
MB
157 if (i < n_socket_fds) {
158 r = fd_nonblock(fds[i], nonblock);
159 if (r < 0)
160 return r;
161 }
663996b3
MS
162
163 /* We unconditionally drop FD_CLOEXEC from the fds,
164 * since after all we want to pass these fds to our
165 * children */
166
6300502b
MP
167 r = fd_cloexec(fds[i], false);
168 if (r < 0)
663996b3
MS
169 return r;
170 }
171
172 return 0;
173}
174
4c89c718 175static const char *exec_context_tty_path(const ExecContext *context) {
663996b3
MS
176 assert(context);
177
4c89c718
MP
178 if (context->stdio_as_fds)
179 return NULL;
180
663996b3
MS
181 if (context->tty_path)
182 return context->tty_path;
183
184 return "/dev/console";
185}
186
4c89c718
MP
187static void exec_context_tty_reset(const ExecContext *context, const ExecParameters *p) {
188 const char *path;
189
663996b3
MS
190 assert(context);
191
4c89c718 192 path = exec_context_tty_path(context);
663996b3 193
4c89c718
MP
194 if (context->tty_vhangup) {
195 if (p && p->stdin_fd >= 0)
196 (void) terminal_vhangup_fd(p->stdin_fd);
197 else if (path)
198 (void) terminal_vhangup(path);
199 }
200
201 if (context->tty_reset) {
202 if (p && p->stdin_fd >= 0)
203 (void) reset_terminal_fd(p->stdin_fd, true);
204 else if (path)
205 (void) reset_terminal(path);
206 }
663996b3 207
4c89c718
MP
208 if (context->tty_vt_disallocate && path)
209 (void) vt_disallocate(path);
663996b3
MS
210}
211
8a584da2
MP
212static bool is_terminal_input(ExecInput i) {
213 return IN_SET(i,
214 EXEC_INPUT_TTY,
215 EXEC_INPUT_TTY_FORCE,
216 EXEC_INPUT_TTY_FAIL);
217}
218
663996b3 219static bool is_terminal_output(ExecOutput o) {
8a584da2
MP
220 return IN_SET(o,
221 EXEC_OUTPUT_TTY,
222 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
223 EXEC_OUTPUT_KMSG_AND_CONSOLE,
224 EXEC_OUTPUT_JOURNAL_AND_CONSOLE);
225}
226
f5e65279
MB
227static bool is_syslog_output(ExecOutput o) {
228 return IN_SET(o,
229 EXEC_OUTPUT_SYSLOG,
230 EXEC_OUTPUT_SYSLOG_AND_CONSOLE);
231}
232
233static bool is_kmsg_output(ExecOutput o) {
234 return IN_SET(o,
235 EXEC_OUTPUT_KMSG,
236 EXEC_OUTPUT_KMSG_AND_CONSOLE);
237}
238
8a584da2
MP
239static bool exec_context_needs_term(const ExecContext *c) {
240 assert(c);
241
242 /* Return true if the execution context suggests we should set $TERM to something useful. */
243
244 if (is_terminal_input(c->std_input))
245 return true;
246
247 if (is_terminal_output(c->std_output))
248 return true;
249
250 if (is_terminal_output(c->std_error))
251 return true;
252
253 return !!c->tty_path;
663996b3
MS
254}
255
663996b3 256static int open_null_as(int flags, int nfd) {
52ad194e 257 int fd;
663996b3
MS
258
259 assert(nfd >= 0);
260
60f067b4
JS
261 fd = open("/dev/null", flags|O_NOCTTY);
262 if (fd < 0)
663996b3
MS
263 return -errno;
264
52ad194e 265 return move_fd(fd, nfd, false);
663996b3
MS
266}
267
46cdbd49
BR
268static int connect_journal_socket(
269 int fd,
270 const char *log_namespace,
271 uid_t uid,
272 gid_t gid) {
273
274 union sockaddr_union sa;
275 socklen_t sa_len;
e735f4d4
MP
276 uid_t olduid = UID_INVALID;
277 gid_t oldgid = GID_INVALID;
46cdbd49 278 const char *j;
e735f4d4
MP
279 int r;
280
46cdbd49
BR
281 j = log_namespace ?
282 strjoina("/run/systemd/journal.", log_namespace, "/stdout") :
283 "/run/systemd/journal/stdout";
284 r = sockaddr_un_set_path(&sa.un, j);
285 if (r < 0)
286 return r;
287 sa_len = r;
288
f5e65279 289 if (gid_is_valid(gid)) {
e735f4d4
MP
290 oldgid = getgid();
291
f5e65279 292 if (setegid(gid) < 0)
e735f4d4
MP
293 return -errno;
294 }
295
f5e65279 296 if (uid_is_valid(uid)) {
e735f4d4
MP
297 olduid = getuid();
298
f5e65279 299 if (seteuid(uid) < 0) {
e735f4d4
MP
300 r = -errno;
301 goto restore_gid;
302 }
303 }
304
46cdbd49 305 r = connect(fd, &sa.sa, sa_len) < 0 ? -errno : 0;
e735f4d4
MP
306
307 /* If we fail to restore the uid or gid, things will likely
308 fail later on. This should only happen if an LSM interferes. */
309
f5e65279 310 if (uid_is_valid(uid))
e735f4d4
MP
311 (void) seteuid(olduid);
312
313 restore_gid:
f5e65279 314 if (gid_is_valid(gid))
e735f4d4
MP
315 (void) setegid(oldgid);
316
317 return r;
318}
319
5a920b42 320static int connect_logger_as(
98393f85 321 const Unit *unit,
5a920b42 322 const ExecContext *context,
f5e65279 323 const ExecParameters *params,
5a920b42
MP
324 ExecOutput output,
325 const char *ident,
326 int nfd,
327 uid_t uid,
328 gid_t gid) {
329
6e866b33
MB
330 _cleanup_close_ int fd = -1;
331 int r;
663996b3
MS
332
333 assert(context);
f5e65279 334 assert(params);
663996b3
MS
335 assert(output < _EXEC_OUTPUT_MAX);
336 assert(ident);
337 assert(nfd >= 0);
338
339 fd = socket(AF_UNIX, SOCK_STREAM, 0);
340 if (fd < 0)
341 return -errno;
342
46cdbd49 343 r = connect_journal_socket(fd, context->log_namespace, uid, gid);
e735f4d4
MP
344 if (r < 0)
345 return r;
663996b3 346
6e866b33 347 if (shutdown(fd, SHUT_RD) < 0)
663996b3 348 return -errno;
663996b3 349
5a920b42 350 (void) fd_inc_sndbuf(fd, SNDBUF_SIZE);
60f067b4 351
6e866b33 352 if (dprintf(fd,
663996b3
MS
353 "%s\n"
354 "%s\n"
355 "%i\n"
356 "%i\n"
357 "%i\n"
358 "%i\n"
359 "%i\n",
f5e65279
MB
360 context->syslog_identifier ?: ident,
361 params->flags & EXEC_PASS_LOG_UNIT ? unit->id : "",
663996b3
MS
362 context->syslog_priority,
363 !!context->syslog_level_prefix,
f5e65279
MB
364 is_syslog_output(output),
365 is_kmsg_output(output),
6e866b33
MB
366 is_terminal_output(output)) < 0)
367 return -errno;
663996b3 368
6e866b33 369 return move_fd(TAKE_FD(fd), nfd, false);
52ad194e 370}
6e866b33 371
52ad194e
MB
372static int open_terminal_as(const char *path, int flags, int nfd) {
373 int fd;
5a920b42 374
52ad194e
MB
375 assert(path);
376 assert(nfd >= 0);
663996b3 377
52ad194e
MB
378 fd = open_terminal(path, flags | O_NOCTTY);
379 if (fd < 0)
380 return fd;
381
382 return move_fd(fd, nfd, false);
663996b3 383}
52ad194e
MB
384
385static int acquire_path(const char *path, int flags, mode_t mode) {
46cdbd49
BR
386 union sockaddr_union sa;
387 socklen_t sa_len;
6e866b33 388 _cleanup_close_ int fd = -1;
46cdbd49 389 int r;
663996b3
MS
390
391 assert(path);
663996b3 392
52ad194e
MB
393 if (IN_SET(flags & O_ACCMODE, O_WRONLY, O_RDWR))
394 flags |= O_CREAT;
395
396 fd = open(path, flags|O_NOCTTY, mode);
397 if (fd >= 0)
6e866b33 398 return TAKE_FD(fd);
663996b3 399
52ad194e
MB
400 if (errno != ENXIO) /* ENXIO is returned when we try to open() an AF_UNIX file system socket on Linux */
401 return -errno;
52ad194e
MB
402
403 /* So, it appears the specified path could be an AF_UNIX socket. Let's see if we can connect to it. */
404
46cdbd49
BR
405 r = sockaddr_un_set_path(&sa.un, path);
406 if (r < 0)
407 return r == -EINVAL ? -ENXIO : r;
408 sa_len = r;
409
52ad194e
MB
410 fd = socket(AF_UNIX, SOCK_STREAM, 0);
411 if (fd < 0)
412 return -errno;
413
46cdbd49 414 if (connect(fd, &sa.sa, sa_len) < 0)
52ad194e
MB
415 return errno == EINVAL ? -ENXIO : -errno; /* Propagate initial error if we get EINVAL, i.e. we have
416 * indication that his wasn't an AF_UNIX socket after all */
663996b3 417
52ad194e
MB
418 if ((flags & O_ACCMODE) == O_RDONLY)
419 r = shutdown(fd, SHUT_WR);
420 else if ((flags & O_ACCMODE) == O_WRONLY)
421 r = shutdown(fd, SHUT_RD);
422 else
46cdbd49 423 r = 0;
6e866b33 424 if (r < 0)
52ad194e 425 return -errno;
52ad194e 426
6e866b33 427 return TAKE_FD(fd);
663996b3
MS
428}
429
52ad194e
MB
430static int fixup_input(
431 const ExecContext *context,
432 int socket_fd,
433 bool apply_tty_stdin) {
434
435 ExecInput std_input;
436
437 assert(context);
438
439 std_input = context->std_input;
663996b3
MS
440
441 if (is_terminal_input(std_input) && !apply_tty_stdin)
442 return EXEC_INPUT_NULL;
443
444 if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
445 return EXEC_INPUT_NULL;
446
52ad194e
MB
447 if (std_input == EXEC_INPUT_DATA && context->stdin_data_size == 0)
448 return EXEC_INPUT_NULL;
449
663996b3
MS
450 return std_input;
451}
452
453static int fixup_output(ExecOutput std_output, int socket_fd) {
454
455 if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
456 return EXEC_OUTPUT_INHERIT;
457
458 return std_output;
459}
460
db2df898
MP
461static int setup_input(
462 const ExecContext *context,
463 const ExecParameters *params,
8a584da2 464 int socket_fd,
f2dec872 465 const int named_iofds[static 3]) {
db2df898 466
663996b3
MS
467 ExecInput i;
468
469 assert(context);
db2df898 470 assert(params);
f2dec872 471 assert(named_iofds);
db2df898
MP
472
473 if (params->stdin_fd >= 0) {
474 if (dup2(params->stdin_fd, STDIN_FILENO) < 0)
475 return -errno;
476
477 /* Try to make this the controlling tty, if it is a tty, and reset it */
52ad194e
MB
478 if (isatty(STDIN_FILENO)) {
479 (void) ioctl(STDIN_FILENO, TIOCSCTTY, context->std_input == EXEC_INPUT_TTY_FORCE);
480 (void) reset_terminal_fd(STDIN_FILENO, true);
481 }
db2df898
MP
482
483 return STDIN_FILENO;
484 }
663996b3 485
52ad194e 486 i = fixup_input(context, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
663996b3
MS
487
488 switch (i) {
489
490 case EXEC_INPUT_NULL:
491 return open_null_as(O_RDONLY, STDIN_FILENO);
492
493 case EXEC_INPUT_TTY:
494 case EXEC_INPUT_TTY_FORCE:
495 case EXEC_INPUT_TTY_FAIL: {
52ad194e 496 int fd;
663996b3 497
4c89c718 498 fd = acquire_terminal(exec_context_tty_path(context),
98393f85
MB
499 i == EXEC_INPUT_TTY_FAIL ? ACQUIRE_TERMINAL_TRY :
500 i == EXEC_INPUT_TTY_FORCE ? ACQUIRE_TERMINAL_FORCE :
501 ACQUIRE_TERMINAL_WAIT,
5eef597e 502 USEC_INFINITY);
60f067b4 503 if (fd < 0)
663996b3
MS
504 return fd;
505
52ad194e 506 return move_fd(fd, STDIN_FILENO, false);
663996b3
MS
507 }
508
509 case EXEC_INPUT_SOCKET:
52ad194e
MB
510 assert(socket_fd >= 0);
511
663996b3
MS
512 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
513
8a584da2 514 case EXEC_INPUT_NAMED_FD:
52ad194e
MB
515 assert(named_iofds[STDIN_FILENO] >= 0);
516
8a584da2
MP
517 (void) fd_nonblock(named_iofds[STDIN_FILENO], false);
518 return dup2(named_iofds[STDIN_FILENO], STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
519
52ad194e
MB
520 case EXEC_INPUT_DATA: {
521 int fd;
522
523 fd = acquire_data_fd(context->stdin_data, context->stdin_data_size, 0);
524 if (fd < 0)
525 return fd;
526
527 return move_fd(fd, STDIN_FILENO, false);
528 }
529
530 case EXEC_INPUT_FILE: {
531 bool rw;
532 int fd;
533
534 assert(context->stdio_file[STDIN_FILENO]);
535
536 rw = (context->std_output == EXEC_OUTPUT_FILE && streq_ptr(context->stdio_file[STDIN_FILENO], context->stdio_file[STDOUT_FILENO])) ||
537 (context->std_error == EXEC_OUTPUT_FILE && streq_ptr(context->stdio_file[STDIN_FILENO], context->stdio_file[STDERR_FILENO]));
538
539 fd = acquire_path(context->stdio_file[STDIN_FILENO], rw ? O_RDWR : O_RDONLY, 0666 & ~context->umask);
540 if (fd < 0)
541 return fd;
542
543 return move_fd(fd, STDIN_FILENO, false);
544 }
545
663996b3
MS
546 default:
547 assert_not_reached("Unknown input type");
548 }
549}
550
6e866b33
MB
551static bool can_inherit_stderr_from_stdout(
552 const ExecContext *context,
553 ExecOutput o,
554 ExecOutput e) {
555
556 assert(context);
557
558 /* Returns true, if given the specified STDERR and STDOUT output we can directly dup() the stdout fd to the
559 * stderr fd */
560
561 if (e == EXEC_OUTPUT_INHERIT)
562 return true;
563 if (e != o)
564 return false;
565
566 if (e == EXEC_OUTPUT_NAMED_FD)
567 return streq_ptr(context->stdio_fdname[STDOUT_FILENO], context->stdio_fdname[STDERR_FILENO]);
568
569 if (IN_SET(e, EXEC_OUTPUT_FILE, EXEC_OUTPUT_FILE_APPEND))
570 return streq_ptr(context->stdio_file[STDOUT_FILENO], context->stdio_file[STDERR_FILENO]);
571
572 return true;
573}
574
db2df898 575static int setup_output(
98393f85 576 const Unit *unit,
db2df898
MP
577 const ExecContext *context,
578 const ExecParameters *params,
579 int fileno,
580 int socket_fd,
f2dec872 581 const int named_iofds[static 3],
db2df898 582 const char *ident,
5a920b42
MP
583 uid_t uid,
584 gid_t gid,
585 dev_t *journal_stream_dev,
586 ino_t *journal_stream_ino) {
db2df898 587
663996b3
MS
588 ExecOutput o;
589 ExecInput i;
590 int r;
591
e3bff60a 592 assert(unit);
663996b3 593 assert(context);
db2df898 594 assert(params);
663996b3 595 assert(ident);
5a920b42
MP
596 assert(journal_stream_dev);
597 assert(journal_stream_ino);
663996b3 598
db2df898
MP
599 if (fileno == STDOUT_FILENO && params->stdout_fd >= 0) {
600
601 if (dup2(params->stdout_fd, STDOUT_FILENO) < 0)
602 return -errno;
603
604 return STDOUT_FILENO;
605 }
606
607 if (fileno == STDERR_FILENO && params->stderr_fd >= 0) {
608 if (dup2(params->stderr_fd, STDERR_FILENO) < 0)
609 return -errno;
610
611 return STDERR_FILENO;
612 }
613
52ad194e 614 i = fixup_input(context, socket_fd, params->flags & EXEC_APPLY_TTY_STDIN);
663996b3
MS
615 o = fixup_output(context->std_output, socket_fd);
616
617 if (fileno == STDERR_FILENO) {
618 ExecOutput e;
619 e = fixup_output(context->std_error, socket_fd);
620
621 /* This expects the input and output are already set up */
622
623 /* Don't change the stderr file descriptor if we inherit all
624 * the way and are not on a tty */
625 if (e == EXEC_OUTPUT_INHERIT &&
626 o == EXEC_OUTPUT_INHERIT &&
627 i == EXEC_INPUT_NULL &&
628 !is_terminal_input(context->std_input) &&
629 getppid () != 1)
630 return fileno;
631
632 /* Duplicate from stdout if possible */
6e866b33 633 if (can_inherit_stderr_from_stdout(context, o, e))
663996b3
MS
634 return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
635
636 o = e;
637
638 } else if (o == EXEC_OUTPUT_INHERIT) {
639 /* If input got downgraded, inherit the original value */
640 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
4c89c718 641 return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
663996b3 642
52ad194e
MB
643 /* If the input is connected to anything that's not a /dev/null or a data fd, inherit that... */
644 if (!IN_SET(i, EXEC_INPUT_NULL, EXEC_INPUT_DATA))
663996b3
MS
645 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
646
647 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
648 if (getppid() != 1)
649 return fileno;
650
651 /* We need to open /dev/null here anew, to get the right access mode. */
652 return open_null_as(O_WRONLY, fileno);
653 }
654
655 switch (o) {
656
657 case EXEC_OUTPUT_NULL:
658 return open_null_as(O_WRONLY, fileno);
659
660 case EXEC_OUTPUT_TTY:
661 if (is_terminal_input(i))
662 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
663
664 /* We don't reset the terminal if this is just about output */
4c89c718 665 return open_terminal_as(exec_context_tty_path(context), O_WRONLY, fileno);
663996b3
MS
666
667 case EXEC_OUTPUT_SYSLOG:
668 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
669 case EXEC_OUTPUT_KMSG:
670 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
671 case EXEC_OUTPUT_JOURNAL:
672 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
f5e65279 673 r = connect_logger_as(unit, context, params, o, ident, fileno, uid, gid);
663996b3 674 if (r < 0) {
f5e65279 675 log_unit_warning_errno(unit, r, "Failed to connect %s to the journal socket, ignoring: %m", fileno == STDOUT_FILENO ? "stdout" : "stderr");
663996b3 676 r = open_null_as(O_WRONLY, fileno);
5a920b42
MP
677 } else {
678 struct stat st;
679
680 /* If we connected this fd to the journal via a stream, patch the device/inode into the passed
681 * parameters, but only then. This is useful so that we can set $JOURNAL_STREAM that permits
f5e65279
MB
682 * services to detect whether they are connected to the journal or not.
683 *
684 * If both stdout and stderr are connected to a stream then let's make sure to store the data
685 * about STDERR as that's usually the best way to do logging. */
5a920b42 686
f5e65279
MB
687 if (fstat(fileno, &st) >= 0 &&
688 (*journal_stream_ino == 0 || fileno == STDERR_FILENO)) {
5a920b42
MP
689 *journal_stream_dev = st.st_dev;
690 *journal_stream_ino = st.st_ino;
691 }
663996b3
MS
692 }
693 return r;
694
695 case EXEC_OUTPUT_SOCKET:
696 assert(socket_fd >= 0);
52ad194e 697
663996b3
MS
698 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
699
8a584da2 700 case EXEC_OUTPUT_NAMED_FD:
52ad194e
MB
701 assert(named_iofds[fileno] >= 0);
702
8a584da2
MP
703 (void) fd_nonblock(named_iofds[fileno], false);
704 return dup2(named_iofds[fileno], fileno) < 0 ? -errno : fileno;
705
6e866b33
MB
706 case EXEC_OUTPUT_FILE:
707 case EXEC_OUTPUT_FILE_APPEND: {
52ad194e 708 bool rw;
6e866b33 709 int fd, flags;
52ad194e
MB
710
711 assert(context->stdio_file[fileno]);
712
713 rw = context->std_input == EXEC_INPUT_FILE &&
714 streq_ptr(context->stdio_file[fileno], context->stdio_file[STDIN_FILENO]);
715
716 if (rw)
717 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
718
6e866b33
MB
719 flags = O_WRONLY;
720 if (o == EXEC_OUTPUT_FILE_APPEND)
721 flags |= O_APPEND;
722
723 fd = acquire_path(context->stdio_file[fileno], flags, 0666 & ~context->umask);
52ad194e
MB
724 if (fd < 0)
725 return fd;
726
6e866b33 727 return move_fd(fd, fileno, 0);
52ad194e
MB
728 }
729
663996b3
MS
730 default:
731 assert_not_reached("Unknown error type");
732 }
733}
734
735static int chown_terminal(int fd, uid_t uid) {
f2dec872 736 int r;
663996b3
MS
737
738 assert(fd >= 0);
739
5a920b42 740 /* Before we chown/chmod the TTY, let's ensure this is actually a tty */
f2dec872
BR
741 if (isatty(fd) < 1) {
742 if (IN_SET(errno, EINVAL, ENOTTY))
743 return 0; /* not a tty */
663996b3 744
663996b3 745 return -errno;
f2dec872 746 }
663996b3 747
f2dec872
BR
748 /* This might fail. What matters are the results. */
749 r = fchmod_and_chown(fd, TTY_MODE, uid, -1);
750 if (r < 0)
751 return r;
663996b3 752
f2dec872 753 return 1;
663996b3
MS
754}
755
2897b343 756static int setup_confirm_stdio(const char *vc, int *_saved_stdin, int *_saved_stdout) {
db2df898
MP
757 _cleanup_close_ int fd = -1, saved_stdin = -1, saved_stdout = -1;
758 int r;
663996b3
MS
759
760 assert(_saved_stdin);
761 assert(_saved_stdout);
762
763 saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
764 if (saved_stdin < 0)
765 return -errno;
766
767 saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
db2df898
MP
768 if (saved_stdout < 0)
769 return -errno;
663996b3 770
98393f85 771 fd = acquire_terminal(vc, ACQUIRE_TERMINAL_WAIT, DEFAULT_CONFIRM_USEC);
db2df898
MP
772 if (fd < 0)
773 return fd;
663996b3
MS
774
775 r = chown_terminal(fd, getuid());
776 if (r < 0)
db2df898 777 return r;
663996b3 778
db2df898
MP
779 r = reset_terminal_fd(fd, true);
780 if (r < 0)
781 return r;
663996b3 782
98393f85 783 r = rearrange_stdio(fd, fd, STDERR_FILENO);
db2df898 784 fd = -1;
98393f85
MB
785 if (r < 0)
786 return r;
663996b3
MS
787
788 *_saved_stdin = saved_stdin;
789 *_saved_stdout = saved_stdout;
790
db2df898 791 saved_stdin = saved_stdout = -1;
663996b3 792
db2df898 793 return 0;
663996b3
MS
794}
795
2897b343
MP
796static void write_confirm_error_fd(int err, int fd, const Unit *u) {
797 assert(err < 0);
798
799 if (err == -ETIMEDOUT)
800 dprintf(fd, "Confirmation question timed out for %s, assuming positive response.\n", u->id);
801 else {
802 errno = -err;
803 dprintf(fd, "Couldn't ask confirmation for %s: %m, assuming positive response.\n", u->id);
804 }
805}
806
807static void write_confirm_error(int err, const char *vc, const Unit *u) {
60f067b4 808 _cleanup_close_ int fd = -1;
663996b3 809
2897b343 810 assert(vc);
663996b3 811
2897b343 812 fd = open_terminal(vc, O_WRONLY|O_NOCTTY|O_CLOEXEC);
663996b3 813 if (fd < 0)
2897b343 814 return;
663996b3 815
2897b343 816 write_confirm_error_fd(err, fd, u);
663996b3
MS
817}
818
db2df898 819static int restore_confirm_stdio(int *saved_stdin, int *saved_stdout) {
663996b3
MS
820 int r = 0;
821
822 assert(saved_stdin);
823 assert(saved_stdout);
824
825 release_terminal();
826
827 if (*saved_stdin >= 0)
828 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
829 r = -errno;
830
831 if (*saved_stdout >= 0)
832 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
833 r = -errno;
834
db2df898
MP
835 *saved_stdin = safe_close(*saved_stdin);
836 *saved_stdout = safe_close(*saved_stdout);
663996b3
MS
837
838 return r;
839}
840
2897b343
MP
841enum {
842 CONFIRM_PRETEND_FAILURE = -1,
843 CONFIRM_PRETEND_SUCCESS = 0,
844 CONFIRM_EXECUTE = 1,
845};
846
847static int ask_for_confirmation(const char *vc, Unit *u, const char *cmdline) {
663996b3 848 int saved_stdout = -1, saved_stdin = -1, r;
2897b343
MP
849 _cleanup_free_ char *e = NULL;
850 char c;
663996b3 851
2897b343
MP
852 /* For any internal errors, assume a positive response. */
853 r = setup_confirm_stdio(vc, &saved_stdin, &saved_stdout);
854 if (r < 0) {
855 write_confirm_error(r, vc, u);
856 return CONFIRM_EXECUTE;
857 }
663996b3 858
2897b343
MP
859 /* confirm_spawn might have been disabled while we were sleeping. */
860 if (manager_is_confirm_spawn_disabled(u->manager)) {
861 r = 1;
862 goto restore_stdio;
863 }
663996b3 864
2897b343
MP
865 e = ellipsize(cmdline, 60, 100);
866 if (!e) {
867 log_oom();
868 r = CONFIRM_EXECUTE;
869 goto restore_stdio;
870 }
663996b3 871
2897b343
MP
872 for (;;) {
873 r = ask_char(&c, "yfshiDjcn", "Execute %s? [y, f, s – h for help] ", e);
874 if (r < 0) {
875 write_confirm_error_fd(r, STDOUT_FILENO, u);
876 r = CONFIRM_EXECUTE;
877 goto restore_stdio;
878 }
879
880 switch (c) {
881 case 'c':
882 printf("Resuming normal execution.\n");
883 manager_disable_confirm_spawn();
884 r = 1;
885 break;
886 case 'D':
887 unit_dump(u, stdout, " ");
888 continue; /* ask again */
889 case 'f':
890 printf("Failing execution.\n");
891 r = CONFIRM_PRETEND_FAILURE;
892 break;
893 case 'h':
894 printf(" c - continue, proceed without asking anymore\n"
895 " D - dump, show the state of the unit\n"
896 " f - fail, don't execute the command and pretend it failed\n"
897 " h - help\n"
898 " i - info, show a short summary of the unit\n"
899 " j - jobs, show jobs that are in progress\n"
900 " s - skip, don't execute the command and pretend it succeeded\n"
901 " y - yes, execute the command\n");
902 continue; /* ask again */
903 case 'i':
904 printf(" Description: %s\n"
905 " Unit: %s\n"
906 " Command: %s\n",
907 u->id, u->description, cmdline);
908 continue; /* ask again */
909 case 'j':
910 manager_dump_jobs(u->manager, stdout, " ");
911 continue; /* ask again */
912 case 'n':
913 /* 'n' was removed in favor of 'f'. */
914 printf("Didn't understand 'n', did you mean 'f'?\n");
915 continue; /* ask again */
916 case 's':
917 printf("Skipping execution.\n");
918 r = CONFIRM_PRETEND_SUCCESS;
919 break;
920 case 'y':
921 r = CONFIRM_EXECUTE;
922 break;
923 default:
924 assert_not_reached("Unhandled choice");
925 }
926 break;
927 }
663996b3 928
2897b343
MP
929restore_stdio:
930 restore_confirm_stdio(&saved_stdin, &saved_stdout);
663996b3
MS
931 return r;
932}
933
8a584da2
MP
934static int get_fixed_user(const ExecContext *c, const char **user,
935 uid_t *uid, gid_t *gid,
936 const char **home, const char **shell) {
663996b3 937 int r;
8a584da2 938 const char *name;
663996b3 939
8a584da2 940 assert(c);
663996b3 941
8a584da2
MP
942 if (!c->user)
943 return 0;
944
945 /* Note that we don't set $HOME or $SHELL if they are not particularly enlightening anyway
946 * (i.e. are "/" or "/bin/nologin"). */
947
948 name = c->user;
6e866b33 949 r = get_user_creds(&name, uid, gid, home, shell, USER_CREDS_CLEAN);
8a584da2
MP
950 if (r < 0)
951 return r;
663996b3 952
8a584da2
MP
953 *user = name;
954 return 0;
955}
956
957static int get_fixed_group(const ExecContext *c, const char **group, gid_t *gid) {
958 int r;
959 const char *name;
960
961 assert(c);
962
963 if (!c->group)
964 return 0;
965
966 name = c->group;
6e866b33 967 r = get_group_creds(&name, gid, 0);
8a584da2
MP
968 if (r < 0)
969 return r;
970
971 *group = name;
972 return 0;
973}
974
975static int get_supplementary_groups(const ExecContext *c, const char *user,
976 const char *group, gid_t gid,
977 gid_t **supplementary_gids, int *ngids) {
978 char **i;
979 int r, k = 0;
980 int ngroups_max;
981 bool keep_groups = false;
982 gid_t *groups = NULL;
983 _cleanup_free_ gid_t *l_gids = NULL;
984
985 assert(c);
986
987 /*
988 * If user is given, then lookup GID and supplementary groups list.
989 * We avoid NSS lookups for gid=0. Also we have to initialize groups
990 * here and as early as possible so we keep the list of supplementary
991 * groups of the caller.
992 */
993 if (user && gid_is_valid(gid) && gid != 0) {
663996b3 994 /* First step, initialize groups from /etc/groups */
8a584da2
MP
995 if (initgroups(user, gid) < 0)
996 return -errno;
663996b3 997
8a584da2
MP
998 keep_groups = true;
999 }
663996b3 1000
f5e65279 1001 if (strv_isempty(c->supplementary_groups))
8a584da2
MP
1002 return 0;
1003
1004 /*
1005 * If SupplementaryGroups= was passed then NGROUPS_MAX has to
1006 * be positive, otherwise fail.
1007 */
1008 errno = 0;
1009 ngroups_max = (int) sysconf(_SC_NGROUPS_MAX);
f2dec872
BR
1010 if (ngroups_max <= 0)
1011 return errno_or_else(EOPNOTSUPP);
663996b3 1012
8a584da2
MP
1013 l_gids = new(gid_t, ngroups_max);
1014 if (!l_gids)
1015 return -ENOMEM;
663996b3 1016
8a584da2
MP
1017 if (keep_groups) {
1018 /*
1019 * Lookup the list of groups that the user belongs to, we
1020 * avoid NSS lookups here too for gid=0.
1021 */
1022 k = ngroups_max;
1023 if (getgrouplist(user, gid, l_gids, &k) < 0)
1024 return -EINVAL;
1025 } else
1026 k = 0;
663996b3 1027
8a584da2
MP
1028 STRV_FOREACH(i, c->supplementary_groups) {
1029 const char *g;
663996b3 1030
8a584da2
MP
1031 if (k >= ngroups_max)
1032 return -E2BIG;
663996b3 1033
8a584da2 1034 g = *i;
6e866b33 1035 r = get_group_creds(&g, l_gids+k, 0);
8a584da2
MP
1036 if (r < 0)
1037 return r;
663996b3 1038
8a584da2
MP
1039 k++;
1040 }
663996b3 1041
8a584da2
MP
1042 /*
1043 * Sets ngids to zero to drop all supplementary groups, happens
1044 * when we are under root and SupplementaryGroups= is empty.
1045 */
1046 if (k == 0) {
1047 *ngids = 0;
1048 return 0;
1049 }
663996b3 1050
8a584da2
MP
1051 /* Otherwise get the final list of supplementary groups */
1052 groups = memdup(l_gids, sizeof(gid_t) * k);
1053 if (!groups)
1054 return -ENOMEM;
663996b3 1055
8a584da2
MP
1056 *supplementary_gids = groups;
1057 *ngids = k;
1058
1059 groups = NULL;
663996b3 1060
8a584da2
MP
1061 return 0;
1062}
1063
98393f85 1064static int enforce_groups(gid_t gid, const gid_t *supplementary_gids, int ngids) {
8a584da2
MP
1065 int r;
1066
52ad194e
MB
1067 /* Handle SupplementaryGroups= if it is not empty */
1068 if (ngids > 0) {
8a584da2
MP
1069 r = maybe_setgroups(ngids, supplementary_gids);
1070 if (r < 0)
1071 return r;
1072 }
1073
1074 if (gid_is_valid(gid)) {
1075 /* Then set our gids */
1076 if (setresgid(gid, gid, gid) < 0)
1077 return -errno;
663996b3
MS
1078 }
1079
1080 return 0;
1081}
1082
1083static int enforce_user(const ExecContext *context, uid_t uid) {
663996b3
MS
1084 assert(context);
1085
8a584da2
MP
1086 if (!uid_is_valid(uid))
1087 return 0;
1088
aa27b158 1089 /* Sets (but doesn't look up) the uid and make sure we keep the
663996b3
MS
1090 * capabilities while doing so. */
1091
aa27b158 1092 if (context->capability_ambient_set != 0) {
663996b3
MS
1093
1094 /* First step: If we need to keep capabilities but
1095 * drop privileges we need to make sure we keep our
1096 * caps, while we drop privileges. */
1097 if (uid != 0) {
1098 int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
1099
1100 if (prctl(PR_GET_SECUREBITS) != sb)
1101 if (prctl(PR_SET_SECUREBITS, sb) < 0)
1102 return -errno;
1103 }
663996b3
MS
1104 }
1105
aa27b158 1106 /* Second step: actually set the uids */
663996b3
MS
1107 if (setresuid(uid, uid, uid) < 0)
1108 return -errno;
1109
1110 /* At this point we should have all necessary capabilities but
1111 are otherwise a normal user. However, the caps might got
1112 corrupted due to the setresuid() so we need clean them up
1113 later. This is done outside of this call. */
1114
1115 return 0;
1116}
1117
f5e65279 1118#if HAVE_PAM
663996b3
MS
1119
1120static int null_conv(
1121 int num_msg,
1122 const struct pam_message **msg,
1123 struct pam_response **resp,
1124 void *appdata_ptr) {
1125
1126 /* We don't support conversations */
1127
1128 return PAM_CONV_ERR;
1129}
1130
8a584da2
MP
1131#endif
1132
663996b3
MS
1133static int setup_pam(
1134 const char *name,
1135 const char *user,
1136 uid_t uid,
8a584da2 1137 gid_t gid,
663996b3 1138 const char *tty,
5a920b42 1139 char ***env,
46cdbd49 1140 const int fds[], size_t n_fds) {
663996b3 1141
f5e65279 1142#if HAVE_PAM
8a584da2 1143
663996b3
MS
1144 static const struct pam_conv conv = {
1145 .conv = null_conv,
1146 .appdata_ptr = NULL
1147 };
1148
6300502b 1149 _cleanup_(barrier_destroy) Barrier barrier = BARRIER_NULL;
663996b3 1150 pam_handle_t *handle = NULL;
86f210e9 1151 sigset_t old_ss;
4c89c718 1152 int pam_code = PAM_SUCCESS, r;
5a920b42 1153 char **nv, **e = NULL;
663996b3
MS
1154 bool close_session = false;
1155 pid_t pam_pid = 0, parent_pid;
60f067b4 1156 int flags = 0;
663996b3
MS
1157
1158 assert(name);
1159 assert(user);
5a920b42 1160 assert(env);
663996b3
MS
1161
1162 /* We set up PAM in the parent process, then fork. The child
1163 * will then stay around until killed via PR_GET_PDEATHSIG or
1164 * systemd via the cgroup logic. It will then remove the PAM
1165 * session again. The parent process will exec() the actual
1166 * daemon. We do things this way to ensure that the main PID
1167 * of the daemon is the one we initially fork()ed. */
1168
4c89c718
MP
1169 r = barrier_create(&barrier);
1170 if (r < 0)
6300502b
MP
1171 goto fail;
1172
e735f4d4 1173 if (log_get_max_level() < LOG_DEBUG)
60f067b4
JS
1174 flags |= PAM_SILENT;
1175
14228c0d
MB
1176 pam_code = pam_start(name, user, &conv, &handle);
1177 if (pam_code != PAM_SUCCESS) {
663996b3
MS
1178 handle = NULL;
1179 goto fail;
1180 }
1181
6e866b33
MB
1182 if (!tty) {
1183 _cleanup_free_ char *q = NULL;
1184
1185 /* Hmm, so no TTY was explicitly passed, but an fd passed to us directly might be a TTY. Let's figure
1186 * out if that's the case, and read the TTY off it. */
1187
1188 if (getttyname_malloc(STDIN_FILENO, &q) >= 0)
1189 tty = strjoina("/dev/", q);
1190 }
1191
14228c0d
MB
1192 if (tty) {
1193 pam_code = pam_set_item(handle, PAM_TTY, tty);
1194 if (pam_code != PAM_SUCCESS)
663996b3 1195 goto fail;
14228c0d 1196 }
663996b3 1197
5a920b42
MP
1198 STRV_FOREACH(nv, *env) {
1199 pam_code = pam_putenv(handle, *nv);
1200 if (pam_code != PAM_SUCCESS)
1201 goto fail;
1202 }
1203
60f067b4 1204 pam_code = pam_acct_mgmt(handle, flags);
14228c0d 1205 if (pam_code != PAM_SUCCESS)
663996b3
MS
1206 goto fail;
1207
46cdbd49
BR
1208 pam_code = pam_setcred(handle, PAM_ESTABLISH_CRED | flags);
1209 if (pam_code != PAM_SUCCESS)
1210 log_debug("pam_setcred() failed, ignoring: %s", pam_strerror(handle, pam_code));
1211
60f067b4 1212 pam_code = pam_open_session(handle, flags);
14228c0d 1213 if (pam_code != PAM_SUCCESS)
663996b3
MS
1214 goto fail;
1215
1216 close_session = true;
1217
14228c0d
MB
1218 e = pam_getenvlist(handle);
1219 if (!e) {
663996b3
MS
1220 pam_code = PAM_BUF_ERR;
1221 goto fail;
1222 }
1223
1224 /* Block SIGTERM, so that we know that it won't get lost in
1225 * the child */
86f210e9
MP
1226
1227 assert_se(sigprocmask_many(SIG_BLOCK, &old_ss, SIGTERM, -1) >= 0);
663996b3 1228
f5e65279 1229 parent_pid = getpid_cached();
663996b3 1230
1d42b86d
MB
1231 r = safe_fork("(sd-pam)", 0, &pam_pid);
1232 if (r < 0)
663996b3 1233 goto fail;
1d42b86d 1234 if (r == 0) {
4c89c718 1235 int sig, ret = EXIT_PAM;
663996b3
MS
1236
1237 /* The child's job is to reset the PAM session on
1238 * termination */
6300502b 1239 barrier_set_role(&barrier, BARRIER_CHILD);
663996b3 1240
1d42b86d
MB
1241 /* Make sure we don't keep open the passed fds in this child. We assume that otherwise only those fds
1242 * are open here that have been opened by PAM. */
1243 (void) close_many(fds, n_fds);
663996b3
MS
1244
1245 /* Drop privileges - we don't need any to pam_close_session
1246 * and this will make PR_SET_PDEATHSIG work in most cases.
1247 * If this fails, ignore the error - but expect sd-pam threads
1248 * to fail to exit normally */
8a584da2
MP
1249
1250 r = maybe_setgroups(0, NULL);
1251 if (r < 0)
1252 log_warning_errno(r, "Failed to setgroups() in sd-pam: %m");
1253 if (setresgid(gid, gid, gid) < 0)
1254 log_warning_errno(errno, "Failed to setresgid() in sd-pam: %m");
663996b3 1255 if (setresuid(uid, uid, uid) < 0)
8a584da2 1256 log_warning_errno(errno, "Failed to setresuid() in sd-pam: %m");
663996b3 1257
86f210e9
MP
1258 (void) ignore_signals(SIGPIPE, -1);
1259
663996b3
MS
1260 /* Wait until our parent died. This will only work if
1261 * the above setresuid() succeeds, otherwise the kernel
1262 * will not allow unprivileged parents kill their privileged
1263 * children this way. We rely on the control groups kill logic
1264 * to do the rest for us. */
1265 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
1266 goto child_finish;
1267
6300502b
MP
1268 /* Tell the parent that our setup is done. This is especially
1269 * important regarding dropping privileges. Otherwise, unit
2897b343
MP
1270 * setup might race against our setresuid(2) call.
1271 *
1272 * If the parent aborted, we'll detect this below, hence ignore
1273 * return failure here. */
1274 (void) barrier_place(&barrier);
6300502b 1275
2897b343 1276 /* Check if our parent process might already have died? */
663996b3 1277 if (getppid() == parent_pid) {
86f210e9
MP
1278 sigset_t ss;
1279
1280 assert_se(sigemptyset(&ss) >= 0);
1281 assert_se(sigaddset(&ss, SIGTERM) >= 0);
1282
663996b3
MS
1283 for (;;) {
1284 if (sigwait(&ss, &sig) < 0) {
1285 if (errno == EINTR)
1286 continue;
1287
1288 goto child_finish;
1289 }
1290
1291 assert(sig == SIGTERM);
1292 break;
1293 }
1294 }
1295
46cdbd49
BR
1296 pam_code = pam_setcred(handle, PAM_DELETE_CRED | flags);
1297 if (pam_code != PAM_SUCCESS)
1298 goto child_finish;
1299
663996b3 1300 /* If our parent died we'll end the session */
14228c0d 1301 if (getppid() != parent_pid) {
60f067b4 1302 pam_code = pam_close_session(handle, flags);
14228c0d 1303 if (pam_code != PAM_SUCCESS)
663996b3 1304 goto child_finish;
14228c0d 1305 }
663996b3 1306
4c89c718 1307 ret = 0;
663996b3
MS
1308
1309 child_finish:
60f067b4 1310 pam_end(handle, pam_code | flags);
4c89c718 1311 _exit(ret);
663996b3
MS
1312 }
1313
6300502b
MP
1314 barrier_set_role(&barrier, BARRIER_PARENT);
1315
663996b3
MS
1316 /* If the child was forked off successfully it will do all the
1317 * cleanups, so forget about the handle here. */
1318 handle = NULL;
1319
1320 /* Unblock SIGTERM again in the parent */
86f210e9 1321 assert_se(sigprocmask(SIG_SETMASK, &old_ss, NULL) >= 0);
663996b3
MS
1322
1323 /* We close the log explicitly here, since the PAM modules
1324 * might have opened it, but we don't want this fd around. */
1325 closelog();
1326
6300502b
MP
1327 /* Synchronously wait for the child to initialize. We don't care for
1328 * errors as we cannot recover. However, warn loudly if it happens. */
1329 if (!barrier_place_and_sync(&barrier))
1330 log_error("PAM initialization failed");
1331
b012e921 1332 return strv_free_and_replace(*env, e);
663996b3
MS
1333
1334fail:
60f067b4
JS
1335 if (pam_code != PAM_SUCCESS) {
1336 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
4c89c718
MP
1337 r = -EPERM; /* PAM errors do not map to errno */
1338 } else
1339 log_error_errno(r, "PAM failed: %m");
663996b3
MS
1340
1341 if (handle) {
1342 if (close_session)
60f067b4 1343 pam_code = pam_close_session(handle, flags);
663996b3 1344
60f067b4 1345 pam_end(handle, pam_code | flags);
663996b3
MS
1346 }
1347
1348 strv_free(e);
663996b3
MS
1349 closelog();
1350
4c89c718 1351 return r;
8a584da2
MP
1352#else
1353 return 0;
663996b3 1354#endif
8a584da2 1355}
663996b3
MS
1356
1357static void rename_process_from_path(const char *path) {
1358 char process_name[11];
1359 const char *p;
1360 size_t l;
1361
1362 /* This resulting string must fit in 10 chars (i.e. the length
1363 * of "/sbin/init") to look pretty in /bin/ps */
1364
60f067b4 1365 p = basename(path);
663996b3
MS
1366 if (isempty(p)) {
1367 rename_process("(...)");
1368 return;
1369 }
1370
1371 l = strlen(p);
1372 if (l > 8) {
1373 /* The end of the process name is usually more
1374 * interesting, since the first bit might just be
1375 * "systemd-" */
1376 p = p + l - 8;
1377 l = 8;
1378 }
1379
1380 process_name[0] = '(';
1381 memcpy(process_name+1, p, l);
1382 process_name[1+l] = ')';
1383 process_name[1+l+1] = 0;
1384
1385 rename_process(process_name);
1386}
1387
2897b343
MP
1388static bool context_has_address_families(const ExecContext *c) {
1389 assert(c);
1390
1391 return c->address_families_whitelist ||
1392 !set_isempty(c->address_families);
1393}
1394
1395static bool context_has_syscall_filters(const ExecContext *c) {
1396 assert(c);
1397
1398 return c->syscall_whitelist ||
52ad194e 1399 !hashmap_isempty(c->syscall_filter);
2897b343
MP
1400}
1401
1402static bool context_has_no_new_privileges(const ExecContext *c) {
1403 assert(c);
1404
1405 if (c->no_new_privileges)
1406 return true;
1407
1408 if (have_effective_cap(CAP_SYS_ADMIN)) /* if we are privileged, we don't need NNP */
1409 return false;
1410
1411 /* We need NNP if we have any form of seccomp and are unprivileged */
1412 return context_has_address_families(c) ||
1413 c->memory_deny_write_execute ||
1414 c->restrict_realtime ||
bb4f798a 1415 c->restrict_suid_sgid ||
2897b343 1416 exec_context_restrict_namespaces_set(c) ||
46cdbd49 1417 c->protect_clock ||
2897b343
MP
1418 c->protect_kernel_tunables ||
1419 c->protect_kernel_modules ||
e1f67bc7 1420 c->protect_kernel_logs ||
2897b343
MP
1421 c->private_devices ||
1422 context_has_syscall_filters(c) ||
f5e65279 1423 !set_isempty(c->syscall_archs) ||
bb4f798a
MB
1424 c->lock_personality ||
1425 c->protect_hostname;
2897b343
MP
1426}
1427
f5e65279 1428#if HAVE_SECCOMP
60f067b4 1429
8a584da2
MP
1430static bool skip_seccomp_unavailable(const Unit* u, const char* msg) {
1431
1432 if (is_seccomp_available())
1433 return false;
1434
8a584da2 1435 log_unit_debug(u, "SECCOMP features not detected in the kernel, skipping %s", msg);
8a584da2
MP
1436 return true;
1437}
1438
f5e65279 1439static int apply_syscall_filter(const Unit* u, const ExecContext *c, bool needs_ambient_hack) {
2897b343 1440 uint32_t negative_action, default_action, action;
f5e65279 1441 int r;
663996b3 1442
2897b343 1443 assert(u);
60f067b4 1444 assert(c);
663996b3 1445
2897b343 1446 if (!context_has_syscall_filters(c))
8a584da2
MP
1447 return 0;
1448
2897b343
MP
1449 if (skip_seccomp_unavailable(u, "SystemCallFilter="))
1450 return 0;
663996b3 1451
f2dec872 1452 negative_action = c->syscall_errno == 0 ? scmp_act_kill_process() : SCMP_ACT_ERRNO(c->syscall_errno);
663996b3 1453
2897b343
MP
1454 if (c->syscall_whitelist) {
1455 default_action = negative_action;
1456 action = SCMP_ACT_ALLOW;
60f067b4 1457 } else {
2897b343
MP
1458 default_action = SCMP_ACT_ALLOW;
1459 action = negative_action;
60f067b4 1460 }
663996b3 1461
f5e65279
MB
1462 if (needs_ambient_hack) {
1463 r = seccomp_filter_set_add(c->syscall_filter, c->syscall_whitelist, syscall_filter_sets + SYSCALL_FILTER_SET_SETUID);
1464 if (r < 0)
1465 return r;
1466 }
1467
6e866b33 1468 return seccomp_load_syscall_filter_set_raw(default_action, c->syscall_filter, action, false);
60f067b4
JS
1469}
1470
2897b343
MP
1471static int apply_syscall_archs(const Unit *u, const ExecContext *c) {
1472 assert(u);
60f067b4
JS
1473 assert(c);
1474
2897b343 1475 if (set_isempty(c->syscall_archs))
8a584da2 1476 return 0;
60f067b4 1477
2897b343
MP
1478 if (skip_seccomp_unavailable(u, "SystemCallArchitectures="))
1479 return 0;
60f067b4 1480
2897b343
MP
1481 return seccomp_restrict_archs(c->syscall_archs);
1482}
60f067b4 1483
2897b343
MP
1484static int apply_address_families(const Unit* u, const ExecContext *c) {
1485 assert(u);
1486 assert(c);
60f067b4 1487
2897b343
MP
1488 if (!context_has_address_families(c))
1489 return 0;
60f067b4 1490
2897b343
MP
1491 if (skip_seccomp_unavailable(u, "RestrictAddressFamilies="))
1492 return 0;
60f067b4 1493
2897b343 1494 return seccomp_restrict_address_families(c->address_families, c->address_families_whitelist);
663996b3
MS
1495}
1496
8a584da2 1497static int apply_memory_deny_write_execute(const Unit* u, const ExecContext *c) {
2897b343 1498 assert(u);
5a920b42
MP
1499 assert(c);
1500
2897b343 1501 if (!c->memory_deny_write_execute)
8a584da2
MP
1502 return 0;
1503
2897b343
MP
1504 if (skip_seccomp_unavailable(u, "MemoryDenyWriteExecute="))
1505 return 0;
5a920b42 1506
2897b343 1507 return seccomp_memory_deny_write_execute();
5a920b42
MP
1508}
1509
8a584da2 1510static int apply_restrict_realtime(const Unit* u, const ExecContext *c) {
2897b343 1511 assert(u);
5a920b42
MP
1512 assert(c);
1513
2897b343 1514 if (!c->restrict_realtime)
8a584da2
MP
1515 return 0;
1516
2897b343
MP
1517 if (skip_seccomp_unavailable(u, "RestrictRealtime="))
1518 return 0;
8a584da2 1519
2897b343 1520 return seccomp_restrict_realtime();
8a584da2
MP
1521}
1522
bb4f798a
MB
1523static int apply_restrict_suid_sgid(const Unit* u, const ExecContext *c) {
1524 assert(u);
1525 assert(c);
1526
1527 if (!c->restrict_suid_sgid)
1528 return 0;
1529
1530 if (skip_seccomp_unavailable(u, "RestrictSUIDSGID="))
1531 return 0;
1532
1533 return seccomp_restrict_suid_sgid();
1534}
1535
8a584da2 1536static int apply_protect_sysctl(const Unit *u, const ExecContext *c) {
2897b343 1537 assert(u);
8a584da2
MP
1538 assert(c);
1539
1540 /* Turn off the legacy sysctl() system call. Many distributions turn this off while building the kernel, but
1541 * let's protect even those systems where this is left on in the kernel. */
1542
2897b343 1543 if (!c->protect_kernel_tunables)
8a584da2
MP
1544 return 0;
1545
2897b343
MP
1546 if (skip_seccomp_unavailable(u, "ProtectKernelTunables="))
1547 return 0;
5a920b42 1548
2897b343 1549 return seccomp_protect_sysctl();
5a920b42
MP
1550}
1551
8a584da2 1552static int apply_protect_kernel_modules(const Unit *u, const ExecContext *c) {
2897b343 1553 assert(u);
8a584da2
MP
1554 assert(c);
1555
1556 /* Turn off module syscalls on ProtectKernelModules=yes */
1557
2897b343
MP
1558 if (!c->protect_kernel_modules)
1559 return 0;
1560
8a584da2
MP
1561 if (skip_seccomp_unavailable(u, "ProtectKernelModules="))
1562 return 0;
1563
6e866b33 1564 return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_MODULE, SCMP_ACT_ERRNO(EPERM), false);
8a584da2
MP
1565}
1566
e1f67bc7
MB
1567static int apply_protect_kernel_logs(const Unit *u, const ExecContext *c) {
1568 assert(u);
1569 assert(c);
1570
1571 if (!c->protect_kernel_logs)
1572 return 0;
1573
1574 if (skip_seccomp_unavailable(u, "ProtectKernelLogs="))
1575 return 0;
1576
1577 return seccomp_protect_syslog();
1578}
1579
d0648cfe 1580static int apply_protect_clock(const Unit *u, const ExecContext *c) {
46cdbd49
BR
1581 assert(u);
1582 assert(c);
1583
1584 if (!c->protect_clock)
1585 return 0;
1586
1587 if (skip_seccomp_unavailable(u, "ProtectClock="))
1588 return 0;
1589
1590 return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_CLOCK, SCMP_ACT_ERRNO(EPERM), false);
1591}
1592
8a584da2 1593static int apply_private_devices(const Unit *u, const ExecContext *c) {
2897b343 1594 assert(u);
8a584da2
MP
1595 assert(c);
1596
1597 /* If PrivateDevices= is set, also turn off iopl and all @raw-io syscalls. */
1598
2897b343
MP
1599 if (!c->private_devices)
1600 return 0;
1601
8a584da2
MP
1602 if (skip_seccomp_unavailable(u, "PrivateDevices="))
1603 return 0;
1604
6e866b33 1605 return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_RAW_IO, SCMP_ACT_ERRNO(EPERM), false);
2897b343
MP
1606}
1607
98393f85 1608static int apply_restrict_namespaces(const Unit *u, const ExecContext *c) {
2897b343
MP
1609 assert(u);
1610 assert(c);
1611
1612 if (!exec_context_restrict_namespaces_set(c))
1613 return 0;
1614
1615 if (skip_seccomp_unavailable(u, "RestrictNamespaces="))
1616 return 0;
1617
1618 return seccomp_restrict_namespaces(c->restrict_namespaces);
8a584da2
MP
1619}
1620
f5e65279
MB
1621static int apply_lock_personality(const Unit* u, const ExecContext *c) {
1622 unsigned long personality;
1623 int r;
1624
1625 assert(u);
1626 assert(c);
1627
1628 if (!c->lock_personality)
1629 return 0;
1630
1631 if (skip_seccomp_unavailable(u, "LockPersonality="))
1632 return 0;
1633
1634 personality = c->personality;
1635
1636 /* If personality is not specified, use either PER_LINUX or PER_LINUX32 depending on what is currently set. */
1637 if (personality == PERSONALITY_INVALID) {
1638
1639 r = opinionated_personality(&personality);
1640 if (r < 0)
1641 return r;
1642 }
1643
1644 return seccomp_lock_personality(personality);
1645}
1646
60f067b4
JS
1647#endif
1648
d0648cfe 1649static int apply_protect_hostname(const Unit *u, const ExecContext *c, int *ret_exit_status) {
d0648cfe
MB
1650 assert(u);
1651 assert(c);
1652
1653 if (!c->protect_hostname)
1654 return 0;
1655
1656 if (ns_type_supported(NAMESPACE_UTS)) {
1657 if (unshare(CLONE_NEWUTS) < 0) {
1658 if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno)) {
1659 *ret_exit_status = EXIT_NAMESPACE;
1660 return log_unit_error_errno(u, errno, "Failed to set up UTS namespacing: %m");
1661 }
1662
1663 log_unit_warning(u, "ProtectHostname=yes is configured, but UTS namespace setup is prohibited (container manager?), ignoring namespace setup.");
1664 }
1665 } else
1666 log_unit_warning(u, "ProtectHostname=yes is configured, but the kernel does not support UTS namespaces, ignoring namespace setup.");
1667
1668#if HAVE_SECCOMP
20a6e51f
MB
1669 int r;
1670
d0648cfe
MB
1671 if (skip_seccomp_unavailable(u, "ProtectHostname="))
1672 return 0;
1673
1674 r = seccomp_protect_hostname();
1675 if (r < 0) {
1676 *ret_exit_status = EXIT_SECCOMP;
1677 return log_unit_error_errno(u, r, "Failed to apply hostname restrictions: %m");
1678 }
1679#endif
1680
1681 return 0;
1682}
1683
7c20daf6 1684static void do_idle_pipe_dance(int idle_pipe[static 4]) {
14228c0d
MB
1685 assert(idle_pipe);
1686
6300502b
MP
1687 idle_pipe[1] = safe_close(idle_pipe[1]);
1688 idle_pipe[2] = safe_close(idle_pipe[2]);
14228c0d
MB
1689
1690 if (idle_pipe[0] >= 0) {
1691 int r;
1692
1693 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1694
1695 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
6300502b
MP
1696 ssize_t n;
1697
14228c0d 1698 /* Signal systemd that we are bored and want to continue. */
6300502b
MP
1699 n = write(idle_pipe[3], "x", 1);
1700 if (n > 0)
e3bff60a 1701 /* Wait for systemd to react to the signal above. */
e1f67bc7 1702 (void) fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
14228c0d
MB
1703 }
1704
6300502b 1705 idle_pipe[0] = safe_close(idle_pipe[0]);
60f067b4
JS
1706
1707 }
1708
6300502b 1709 idle_pipe[3] = safe_close(idle_pipe[3]);
60f067b4
JS
1710}
1711
6e866b33
MB
1712static const char *exec_directory_env_name_to_string(ExecDirectoryType t);
1713
60f067b4 1714static int build_environment(
98393f85 1715 const Unit *u,
5eef597e 1716 const ExecContext *c,
4c89c718 1717 const ExecParameters *p,
b012e921 1718 size_t n_fds,
60f067b4
JS
1719 const char *home,
1720 const char *username,
1721 const char *shell,
5a920b42
MP
1722 dev_t journal_stream_dev,
1723 ino_t journal_stream_ino,
60f067b4
JS
1724 char ***ret) {
1725
1726 _cleanup_strv_free_ char **our_env = NULL;
6e866b33 1727 ExecDirectoryType t;
b012e921 1728 size_t n_env = 0;
60f067b4 1729 char *x;
14228c0d 1730
8a584da2 1731 assert(u);
60f067b4 1732 assert(c);
6e866b33 1733 assert(p);
60f067b4
JS
1734 assert(ret);
1735
478ed938
MB
1736#define N_ENV_VARS 15
1737 our_env = new0(char*, N_ENV_VARS + _EXEC_DIRECTORY_TYPE_MAX);
60f067b4
JS
1738 if (!our_env)
1739 return -ENOMEM;
1740
1741 if (n_fds > 0) {
6300502b
MP
1742 _cleanup_free_ char *joined = NULL;
1743
f5e65279 1744 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid_cached()) < 0)
60f067b4
JS
1745 return -ENOMEM;
1746 our_env[n_env++] = x;
1747
b012e921 1748 if (asprintf(&x, "LISTEN_FDS=%zu", n_fds) < 0)
60f067b4
JS
1749 return -ENOMEM;
1750 our_env[n_env++] = x;
6300502b 1751
4c89c718 1752 joined = strv_join(p->fd_names, ":");
6300502b
MP
1753 if (!joined)
1754 return -ENOMEM;
1755
2897b343 1756 x = strjoin("LISTEN_FDNAMES=", joined);
6300502b
MP
1757 if (!x)
1758 return -ENOMEM;
1759 our_env[n_env++] = x;
60f067b4
JS
1760 }
1761
8a584da2 1762 if ((p->flags & EXEC_SET_WATCHDOG) && p->watchdog_usec > 0) {
f5e65279 1763 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid_cached()) < 0)
60f067b4
JS
1764 return -ENOMEM;
1765 our_env[n_env++] = x;
1766
4c89c718 1767 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, p->watchdog_usec) < 0)
60f067b4
JS
1768 return -ENOMEM;
1769 our_env[n_env++] = x;
1770 }
1771
8a584da2
MP
1772 /* If this is D-Bus, tell the nss-systemd module, since it relies on being able to use D-Bus look up dynamic
1773 * users via PID 1, possibly dead-locking the dbus daemon. This way it will not use D-Bus to resolve names, but
1774 * check the database directly. */
f5e65279 1775 if (p->flags & EXEC_NSS_BYPASS_BUS) {
8a584da2
MP
1776 x = strdup("SYSTEMD_NSS_BYPASS_BUS=1");
1777 if (!x)
1778 return -ENOMEM;
1779 our_env[n_env++] = x;
1780 }
1781
60f067b4 1782 if (home) {
f2dec872 1783 x = strjoin("HOME=", home);
60f067b4
JS
1784 if (!x)
1785 return -ENOMEM;
bb4f798a
MB
1786
1787 path_simplify(x + 5, true);
60f067b4
JS
1788 our_env[n_env++] = x;
1789 }
1790
1791 if (username) {
f2dec872 1792 x = strjoin("LOGNAME=", username);
60f067b4
JS
1793 if (!x)
1794 return -ENOMEM;
1795 our_env[n_env++] = x;
1796
f2dec872 1797 x = strjoin("USER=", username);
60f067b4
JS
1798 if (!x)
1799 return -ENOMEM;
1800 our_env[n_env++] = x;
1801 }
1802
1803 if (shell) {
f2dec872 1804 x = strjoin("SHELL=", shell);
60f067b4
JS
1805 if (!x)
1806 return -ENOMEM;
bb4f798a
MB
1807
1808 path_simplify(x + 6, true);
60f067b4
JS
1809 our_env[n_env++] = x;
1810 }
1811
8a584da2
MP
1812 if (!sd_id128_is_null(u->invocation_id)) {
1813 if (asprintf(&x, "INVOCATION_ID=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(u->invocation_id)) < 0)
1814 return -ENOMEM;
1815
1816 our_env[n_env++] = x;
1817 }
1818
1819 if (exec_context_needs_term(c)) {
1820 const char *tty_path, *term = NULL;
1821
1822 tty_path = exec_context_tty_path(c);
60f067b4 1823
20a6e51f
MB
1824 /* If we are forked off PID 1 and we are supposed to operate on /dev/console, then let's try
1825 * to inherit the $TERM set for PID 1. This is useful for containers so that the $TERM the
1826 * container manager passes to PID 1 ends up all the way in the console login shown. */
8a584da2 1827
20a6e51f 1828 if (path_equal_ptr(tty_path, "/dev/console") && getppid() == 1)
8a584da2 1829 term = getenv("TERM");
20a6e51f 1830
8a584da2
MP
1831 if (!term)
1832 term = default_term_for_tty(tty_path);
1833
f2dec872 1834 x = strjoin("TERM=", term);
60f067b4
JS
1835 if (!x)
1836 return -ENOMEM;
1837 our_env[n_env++] = x;
14228c0d
MB
1838 }
1839
5a920b42
MP
1840 if (journal_stream_dev != 0 && journal_stream_ino != 0) {
1841 if (asprintf(&x, "JOURNAL_STREAM=" DEV_FMT ":" INO_FMT, journal_stream_dev, journal_stream_ino) < 0)
1842 return -ENOMEM;
1843
1844 our_env[n_env++] = x;
1845 }
1846
46cdbd49
BR
1847 if (c->log_namespace) {
1848 x = strjoin("LOG_NAMESPACE=", c->log_namespace);
1849 if (!x)
1850 return -ENOMEM;
1851
1852 our_env[n_env++] = x;
1853 }
1854
6e866b33
MB
1855 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
1856 _cleanup_free_ char *pre = NULL, *joined = NULL;
1857 const char *n;
1858
1859 if (!p->prefix[t])
1860 continue;
1861
1862 if (strv_isempty(c->directories[t].paths))
1863 continue;
1864
1865 n = exec_directory_env_name_to_string(t);
1866 if (!n)
1867 continue;
1868
1869 pre = strjoin(p->prefix[t], "/");
1870 if (!pre)
1871 return -ENOMEM;
1872
1873 joined = strv_join_prefix(c->directories[t].paths, ":", pre);
1874 if (!joined)
1875 return -ENOMEM;
1876
1877 x = strjoin(n, "=", joined);
1878 if (!x)
1879 return -ENOMEM;
1880
1881 our_env[n_env++] = x;
1882 }
1883
60f067b4 1884 our_env[n_env++] = NULL;
478ed938
MB
1885 assert(n_env <= N_ENV_VARS + _EXEC_DIRECTORY_TYPE_MAX);
1886#undef N_ENV_VARS
60f067b4 1887
b012e921 1888 *ret = TAKE_PTR(our_env);
60f067b4
JS
1889
1890 return 0;
14228c0d
MB
1891}
1892
db2df898
MP
1893static int build_pass_environment(const ExecContext *c, char ***ret) {
1894 _cleanup_strv_free_ char **pass_env = NULL;
1895 size_t n_env = 0, n_bufsize = 0;
1896 char **i;
1897
1898 STRV_FOREACH(i, c->pass_environment) {
1899 _cleanup_free_ char *x = NULL;
1900 char *v;
1901
1902 v = getenv(*i);
1903 if (!v)
1904 continue;
2897b343 1905 x = strjoin(*i, "=", v);
db2df898
MP
1906 if (!x)
1907 return -ENOMEM;
f5e65279 1908
db2df898
MP
1909 if (!GREEDY_REALLOC(pass_env, n_bufsize, n_env + 2))
1910 return -ENOMEM;
f5e65279 1911
b012e921 1912 pass_env[n_env++] = TAKE_PTR(x);
db2df898 1913 pass_env[n_env] = NULL;
db2df898
MP
1914 }
1915
b012e921 1916 *ret = TAKE_PTR(pass_env);
8a584da2
MP
1917
1918 return 0;
1919}
1920
1921static bool exec_needs_mount_namespace(
1922 const ExecContext *context,
1923 const ExecParameters *params,
1d42b86d 1924 const ExecRuntime *runtime) {
8a584da2
MP
1925
1926 assert(context);
1927 assert(params);
1928
2897b343
MP
1929 if (context->root_image)
1930 return true;
1931
8a584da2
MP
1932 if (!strv_isempty(context->read_write_paths) ||
1933 !strv_isempty(context->read_only_paths) ||
1934 !strv_isempty(context->inaccessible_paths))
1935 return true;
1936
1d42b86d 1937 if (context->n_bind_mounts > 0)
2897b343
MP
1938 return true;
1939
98393f85
MB
1940 if (context->n_temporary_filesystems > 0)
1941 return true;
1942
bb4f798a 1943 if (!IN_SET(context->mount_flags, 0, MS_SHARED))
8a584da2
MP
1944 return true;
1945
1946 if (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir))
1947 return true;
1948
1949 if (context->private_devices ||
b012e921 1950 context->private_mounts ||
8a584da2
MP
1951 context->protect_system != PROTECT_SYSTEM_NO ||
1952 context->protect_home != PROTECT_HOME_NO ||
1953 context->protect_kernel_tunables ||
1954 context->protect_kernel_modules ||
e1f67bc7 1955 context->protect_kernel_logs ||
8a584da2
MP
1956 context->protect_control_groups)
1957 return true;
1958
b012e921
MB
1959 if (context->root_directory) {
1960 ExecDirectoryType t;
1961
1962 if (context->mount_apivfs)
1963 return true;
1964
1965 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
1966 if (!params->prefix[t])
1967 continue;
1968
1969 if (!strv_isempty(context->directories[t].paths))
1970 return true;
1971 }
1972 }
2897b343 1973
1d42b86d
MB
1974 if (context->dynamic_user &&
1975 (!strv_isempty(context->directories[EXEC_DIRECTORY_STATE].paths) ||
1976 !strv_isempty(context->directories[EXEC_DIRECTORY_CACHE].paths) ||
1977 !strv_isempty(context->directories[EXEC_DIRECTORY_LOGS].paths)))
1978 return true;
1979
46cdbd49
BR
1980 if (context->log_namespace)
1981 return true;
1982
8a584da2
MP
1983 return false;
1984}
1985
46cdbd49 1986static int setup_private_users(uid_t ouid, gid_t ogid, uid_t uid, gid_t gid) {
8a584da2
MP
1987 _cleanup_free_ char *uid_map = NULL, *gid_map = NULL;
1988 _cleanup_close_pair_ int errno_pipe[2] = { -1, -1 };
1989 _cleanup_close_ int unshare_ready_fd = -1;
1990 _cleanup_(sigkill_waitp) pid_t pid = 0;
1991 uint64_t c = 1;
8a584da2
MP
1992 ssize_t n;
1993 int r;
1994
46cdbd49
BR
1995 /* Set up a user namespace and map the original UID/GID (IDs from before any user or group changes, i.e.
1996 * the IDs from the user or system manager(s)) to itself, the selected UID/GID to itself, and everything else to
8a584da2
MP
1997 * nobody. In order to be able to write this mapping we need CAP_SETUID in the original user namespace, which
1998 * we however lack after opening the user namespace. To work around this we fork() a temporary child process,
1999 * which waits for the parent to create the new user namespace while staying in the original namespace. The
2000 * child then writes the UID mapping, under full privileges. The parent waits for the child to finish and
46cdbd49
BR
2001 * continues execution normally.
2002 * For unprivileged users (i.e. without capabilities), the root to root mapping is excluded. As such, it
2003 * does not need CAP_SETUID to write the single line mapping to itself. */
8a584da2 2004
46cdbd49
BR
2005 /* Can only set up multiple mappings with CAP_SETUID. */
2006 if (have_effective_cap(CAP_SETUID) && uid != ouid && uid_is_valid(uid))
2897b343 2007 r = asprintf(&uid_map,
46cdbd49 2008 UID_FMT " " UID_FMT " 1\n" /* Map $OUID → $OUID */
2897b343 2009 UID_FMT " " UID_FMT " 1\n", /* Map $UID → $UID */
46cdbd49
BR
2010 ouid, ouid, uid, uid);
2011 else
2012 r = asprintf(&uid_map,
2013 UID_FMT " " UID_FMT " 1\n", /* Map $OUID → $OUID */
2014 ouid, ouid);
8a584da2 2015
46cdbd49
BR
2016 if (r < 0)
2017 return -ENOMEM;
2018
2019 /* Can only set up multiple mappings with CAP_SETGID. */
2020 if (have_effective_cap(CAP_SETGID) && gid != ogid && gid_is_valid(gid))
2897b343 2021 r = asprintf(&gid_map,
46cdbd49 2022 GID_FMT " " GID_FMT " 1\n" /* Map $OGID → $OGID */
2897b343 2023 GID_FMT " " GID_FMT " 1\n", /* Map $GID → $GID */
46cdbd49
BR
2024 ogid, ogid, gid, gid);
2025 else
2026 r = asprintf(&gid_map,
2027 GID_FMT " " GID_FMT " 1\n", /* Map $OGID -> $OGID */
2028 ogid, ogid);
2029
2030 if (r < 0)
2031 return -ENOMEM;
8a584da2
MP
2032
2033 /* Create a communication channel so that the parent can tell the child when it finished creating the user
2034 * namespace. */
2035 unshare_ready_fd = eventfd(0, EFD_CLOEXEC);
2036 if (unshare_ready_fd < 0)
2037 return -errno;
2038
2039 /* Create a communication channel so that the child can tell the parent a proper error code in case it
2040 * failed. */
2041 if (pipe2(errno_pipe, O_CLOEXEC) < 0)
2042 return -errno;
2043
1d42b86d
MB
2044 r = safe_fork("(sd-userns)", FORK_RESET_SIGNALS|FORK_DEATHSIG, &pid);
2045 if (r < 0)
2046 return r;
2047 if (r == 0) {
8a584da2
MP
2048 _cleanup_close_ int fd = -1;
2049 const char *a;
2050 pid_t ppid;
2051
2052 /* Child process, running in the original user namespace. Let's update the parent's UID/GID map from
2053 * here, after the parent opened its own user namespace. */
2054
2055 ppid = getppid();
2056 errno_pipe[0] = safe_close(errno_pipe[0]);
2057
2058 /* Wait until the parent unshared the user namespace */
2059 if (read(unshare_ready_fd, &c, sizeof(c)) < 0) {
2060 r = -errno;
2061 goto child_fail;
2062 }
2063
2064 /* Disable the setgroups() system call in the child user namespace, for good. */
2065 a = procfs_file_alloca(ppid, "setgroups");
2066 fd = open(a, O_WRONLY|O_CLOEXEC);
2067 if (fd < 0) {
2068 if (errno != ENOENT) {
2069 r = -errno;
2070 goto child_fail;
2071 }
2072
2073 /* If the file is missing the kernel is too old, let's continue anyway. */
2074 } else {
2075 if (write(fd, "deny\n", 5) < 0) {
2076 r = -errno;
2077 goto child_fail;
2078 }
2079
2080 fd = safe_close(fd);
2081 }
2082
2083 /* First write the GID map */
2084 a = procfs_file_alloca(ppid, "gid_map");
2085 fd = open(a, O_WRONLY|O_CLOEXEC);
2086 if (fd < 0) {
2087 r = -errno;
2088 goto child_fail;
2089 }
2090 if (write(fd, gid_map, strlen(gid_map)) < 0) {
2091 r = -errno;
2092 goto child_fail;
2093 }
2094 fd = safe_close(fd);
2095
2096 /* The write the UID map */
2097 a = procfs_file_alloca(ppid, "uid_map");
2098 fd = open(a, O_WRONLY|O_CLOEXEC);
2099 if (fd < 0) {
2100 r = -errno;
2101 goto child_fail;
2102 }
2103 if (write(fd, uid_map, strlen(uid_map)) < 0) {
2104 r = -errno;
2105 goto child_fail;
2106 }
2107
2108 _exit(EXIT_SUCCESS);
2109
2110 child_fail:
2111 (void) write(errno_pipe[1], &r, sizeof(r));
2112 _exit(EXIT_FAILURE);
2113 }
2114
2115 errno_pipe[1] = safe_close(errno_pipe[1]);
2116
2117 if (unshare(CLONE_NEWUSER) < 0)
2118 return -errno;
2119
2120 /* Let the child know that the namespace is ready now */
2121 if (write(unshare_ready_fd, &c, sizeof(c)) < 0)
2122 return -errno;
2123
2124 /* Try to read an error code from the child */
2125 n = read(errno_pipe[0], &r, sizeof(r));
2126 if (n < 0)
2127 return -errno;
2128 if (n == sizeof(r)) { /* an error code was sent to us */
2129 if (r < 0)
2130 return r;
2131 return -EIO;
2132 }
2133 if (n != 0) /* on success we should have read 0 bytes */
2134 return -EIO;
2135
1d42b86d
MB
2136 r = wait_for_terminate_and_check("(sd-userns)", pid, 0);
2137 pid = 0;
8a584da2
MP
2138 if (r < 0)
2139 return r;
1d42b86d 2140 if (r != EXIT_SUCCESS) /* If something strange happened with the child, let's consider this fatal, too */
8a584da2
MP
2141 return -EIO;
2142
2143 return 0;
2144}
2145
812752cc
MB
2146static bool exec_directory_is_private(const ExecContext *context, ExecDirectoryType type) {
2147 if (!context->dynamic_user)
2148 return false;
2149
2150 if (type == EXEC_DIRECTORY_CONFIGURATION)
2151 return false;
2152
2153 if (type == EXEC_DIRECTORY_RUNTIME && context->runtime_directory_preserve_mode == EXEC_PRESERVE_NO)
2154 return false;
2155
2156 return true;
2157}
2158
f5e65279 2159static int setup_exec_directory(
8a584da2
MP
2160 const ExecContext *context,
2161 const ExecParameters *params,
2162 uid_t uid,
f5e65279
MB
2163 gid_t gid,
2164 ExecDirectoryType type,
2165 int *exit_status) {
8a584da2 2166
f5e65279
MB
2167 static const int exit_status_table[_EXEC_DIRECTORY_TYPE_MAX] = {
2168 [EXEC_DIRECTORY_RUNTIME] = EXIT_RUNTIME_DIRECTORY,
2169 [EXEC_DIRECTORY_STATE] = EXIT_STATE_DIRECTORY,
2170 [EXEC_DIRECTORY_CACHE] = EXIT_CACHE_DIRECTORY,
2171 [EXEC_DIRECTORY_LOGS] = EXIT_LOGS_DIRECTORY,
2172 [EXEC_DIRECTORY_CONFIGURATION] = EXIT_CONFIGURATION_DIRECTORY,
2173 };
8a584da2
MP
2174 char **rt;
2175 int r;
2176
2177 assert(context);
2178 assert(params);
f5e65279
MB
2179 assert(type >= 0 && type < _EXEC_DIRECTORY_TYPE_MAX);
2180 assert(exit_status);
8a584da2 2181
f5e65279
MB
2182 if (!params->prefix[type])
2183 return 0;
8a584da2 2184
f5e65279
MB
2185 if (params->flags & EXEC_CHOWN_DIRECTORIES) {
2186 if (!uid_is_valid(uid))
2187 uid = 0;
2188 if (!gid_is_valid(gid))
2189 gid = 0;
2190 }
2191
2192 STRV_FOREACH(rt, context->directories[type].paths) {
2193 _cleanup_free_ char *p = NULL, *pp = NULL;
f5e65279 2194
bb4f798a 2195 p = path_join(params->prefix[type], *rt);
f5e65279
MB
2196 if (!p) {
2197 r = -ENOMEM;
2198 goto fail;
2199 }
8a584da2 2200
f5e65279 2201 r = mkdir_parents_label(p, 0755);
8a584da2 2202 if (r < 0)
f5e65279
MB
2203 goto fail;
2204
812752cc 2205 if (exec_directory_is_private(context, type)) {
6e866b33 2206 _cleanup_free_ char *private_root = NULL;
f5e65279 2207
f2dec872
BR
2208 /* So, here's one extra complication when dealing with DynamicUser=1 units. In that
2209 * case we want to avoid leaving a directory around fully accessible that is owned by
2210 * a dynamic user whose UID is later on reused. To lock this down we use the same
2211 * trick used by container managers to prohibit host users to get access to files of
2212 * the same UID in containers: we place everything inside a directory that has an
2213 * access mode of 0700 and is owned root:root, so that it acts as security boundary
2214 * for unprivileged host code. We then use fs namespacing to make this directory
2215 * permeable for the service itself.
f5e65279 2216 *
f2dec872
BR
2217 * Specifically: for a service which wants a special directory "foo/" we first create
2218 * a directory "private/" with access mode 0700 owned by root:root. Then we place
2219 * "foo" inside of that directory (i.e. "private/foo/"), and make "foo" a symlink to
2220 * "private/foo". This way, privileged host users can access "foo/" as usual, but
2221 * unprivileged host users can't look into it. Inside of the namespace of the unit
2222 * "private/" is replaced by a more liberally accessible tmpfs, into which the host's
2223 * "private/foo/" is mounted under the same name, thus disabling the access boundary
2224 * for the service and making sure it only gets access to the dirs it needs but no
2225 * others. Tricky? Yes, absolutely, but it works!
f5e65279 2226 *
f2dec872
BR
2227 * Note that we don't do this for EXEC_DIRECTORY_CONFIGURATION as that's assumed not
2228 * to be owned by the service itself.
2229 *
2230 * Also, note that we don't do this for EXEC_DIRECTORY_RUNTIME as that's often used
2231 * for sharing files or sockets with other services. */
f5e65279 2232
bb4f798a 2233 private_root = path_join(params->prefix[type], "private");
f5e65279
MB
2234 if (!private_root) {
2235 r = -ENOMEM;
2236 goto fail;
2237 }
2238
2239 /* First set up private root if it doesn't exist yet, with access mode 0700 and owned by root:root */
b012e921 2240 r = mkdir_safe_label(private_root, 0700, 0, 0, MKDIR_WARN_MODE);
f5e65279
MB
2241 if (r < 0)
2242 goto fail;
2243
bb4f798a 2244 pp = path_join(private_root, *rt);
f5e65279
MB
2245 if (!pp) {
2246 r = -ENOMEM;
2247 goto fail;
2248 }
2249
2250 /* Create all directories between the configured directory and this private root, and mark them 0755 */
2251 r = mkdir_parents_label(pp, 0755);
2252 if (r < 0)
2253 goto fail;
2254
52ad194e
MB
2255 if (is_dir(p, false) > 0 &&
2256 (laccess(pp, F_OK) < 0 && errno == ENOENT)) {
2257
2258 /* Hmm, the private directory doesn't exist yet, but the normal one exists? If so, move
2259 * it over. Most likely the service has been upgraded from one that didn't use
2260 * DynamicUser=1, to one that does. */
2261
f2dec872
BR
2262 log_info("Found pre-existing public %s= directory %s, migrating to %s.\n"
2263 "Apparently, service previously had DynamicUser= turned off, and has now turned it on.",
2264 exec_directory_type_to_string(type), p, pp);
2265
52ad194e
MB
2266 if (rename(p, pp) < 0) {
2267 r = -errno;
2268 goto fail;
2269 }
2270 } else {
2271 /* Otherwise, create the actual directory for the service */
2272
2273 r = mkdir_label(pp, context->directories[type].mode);
2274 if (r < 0 && r != -EEXIST)
2275 goto fail;
2276 }
f5e65279 2277
f5e65279 2278 /* And link it up from the original place */
6e866b33 2279 r = symlink_idempotent(pp, p, true);
f5e65279
MB
2280 if (r < 0)
2281 goto fail;
2282
f5e65279 2283 } else {
f2dec872
BR
2284 _cleanup_free_ char *target = NULL;
2285
2286 if (type != EXEC_DIRECTORY_CONFIGURATION &&
2287 readlink_and_make_absolute(p, &target) >= 0) {
cb695f0e 2288 _cleanup_free_ char *q = NULL, *q_resolved = NULL, *target_resolved = NULL;
f2dec872
BR
2289
2290 /* This already exists and is a symlink? Interesting. Maybe it's one created
2291 * by DynamicUser=1 (see above)?
2292 *
2293 * We do this for all directory types except for ConfigurationDirectory=,
2294 * since they all support the private/ symlink logic at least in some
2295 * configurations, see above. */
2296
cb695f0e
MB
2297 r = chase_symlinks(target, NULL, 0, &target_resolved, NULL);
2298 if (r < 0)
2299 goto fail;
2300
f2dec872
BR
2301 q = path_join(params->prefix[type], "private", *rt);
2302 if (!q) {
2303 r = -ENOMEM;
2304 goto fail;
2305 }
2306
cb695f0e
MB
2307 /* /var/lib or friends may be symlinks. So, let's chase them also. */
2308 r = chase_symlinks(q, NULL, CHASE_NONEXISTENT, &q_resolved, NULL);
2309 if (r < 0)
2310 goto fail;
2311
2312 if (path_equal(q_resolved, target_resolved)) {
f2dec872
BR
2313
2314 /* Hmm, apparently DynamicUser= was once turned on for this service,
2315 * but is no longer. Let's move the directory back up. */
2316
2317 log_info("Found pre-existing private %s= directory %s, migrating to %s.\n"
2318 "Apparently, service previously had DynamicUser= turned on, and has now turned it off.",
2319 exec_directory_type_to_string(type), q, p);
2320
2321 if (unlink(p) < 0) {
2322 r = -errno;
2323 goto fail;
2324 }
2325
2326 if (rename(q, p) < 0) {
2327 r = -errno;
2328 goto fail;
2329 }
2330 }
2331 }
2332
f5e65279 2333 r = mkdir_label(p, context->directories[type].mode);
bb4f798a
MB
2334 if (r < 0) {
2335 if (r != -EEXIST)
7c20daf6 2336 goto fail;
bb4f798a
MB
2337
2338 if (type == EXEC_DIRECTORY_CONFIGURATION) {
2339 struct stat st;
2340
2341 /* Don't change the owner/access mode of the configuration directory,
2342 * as in the common case it is not written to by a service, and shall
2343 * not be writable. */
2344
2345 if (stat(p, &st) < 0) {
2346 r = -errno;
2347 goto fail;
2348 }
2349
2350 /* Still complain if the access mode doesn't match */
2351 if (((st.st_mode ^ context->directories[type].mode) & 07777) != 0)
2352 log_warning("%s \'%s\' already exists but the mode is different. "
2353 "(File system: %o %sMode: %o)",
2354 exec_directory_type_to_string(type), *rt,
2355 st.st_mode & 07777, exec_directory_type_to_string(type), context->directories[type].mode & 07777);
2356
7c20daf6 2357 continue;
bb4f798a 2358 }
7c20daf6 2359 }
f5e65279
MB
2360 }
2361
bb4f798a 2362 /* Lock down the access mode (we use chmod_and_chown() to make this idempotent. We don't
f2dec872 2363 * specify UID/GID here, so that path_chown_recursive() can optimize things depending on the
bb4f798a
MB
2364 * current UID/GID ownership.) */
2365 r = chmod_and_chown(pp ?: p, context->directories[type].mode, UID_INVALID, GID_INVALID);
2366 if (r < 0)
2367 goto fail;
f5e65279 2368
bb4f798a
MB
2369 /* Then, change the ownership of the whole tree, if necessary. When dynamic users are used we
2370 * drop the suid/sgid bits, since we really don't want SUID/SGID files for dynamic UID/GID
2371 * assignments to exist.*/
2372 r = path_chown_recursive(pp ?: p, uid, gid, context->dynamic_user ? 01777 : 07777);
8a584da2 2373 if (r < 0)
f5e65279 2374 goto fail;
8a584da2
MP
2375 }
2376
2377 return 0;
f5e65279
MB
2378
2379fail:
2380 *exit_status = exit_status_table[type];
2381 return r;
8a584da2
MP
2382}
2383
52ad194e 2384#if ENABLE_SMACK
8a584da2
MP
2385static int setup_smack(
2386 const ExecContext *context,
2387 const ExecCommand *command) {
2388
8a584da2
MP
2389 int r;
2390
2391 assert(context);
2392 assert(command);
2393
8a584da2
MP
2394 if (context->smack_process_label) {
2395 r = mac_smack_apply_pid(0, context->smack_process_label);
2396 if (r < 0)
2397 return r;
2398 }
2399#ifdef SMACK_DEFAULT_PROCESS_LABEL
2400 else {
2401 _cleanup_free_ char *exec_label = NULL;
2402
2403 r = mac_smack_read(command->path, SMACK_ATTR_EXEC, &exec_label);
f5e65279 2404 if (r < 0 && !IN_SET(r, -ENODATA, -EOPNOTSUPP))
8a584da2
MP
2405 return r;
2406
2407 r = mac_smack_apply_pid(0, exec_label ? : SMACK_DEFAULT_PROCESS_LABEL);
2408 if (r < 0)
2409 return r;
2410 }
8a584da2
MP
2411#endif
2412
2413 return 0;
2414}
52ad194e 2415#endif
8a584da2 2416
f5e65279
MB
2417static int compile_bind_mounts(
2418 const ExecContext *context,
2419 const ExecParameters *params,
2420 BindMount **ret_bind_mounts,
b012e921 2421 size_t *ret_n_bind_mounts,
f5e65279
MB
2422 char ***ret_empty_directories) {
2423
2424 _cleanup_strv_free_ char **empty_directories = NULL;
2425 BindMount *bind_mounts;
b012e921 2426 size_t n, h = 0, i;
f5e65279
MB
2427 ExecDirectoryType t;
2428 int r;
2429
2430 assert(context);
2431 assert(params);
2432 assert(ret_bind_mounts);
2433 assert(ret_n_bind_mounts);
2434 assert(ret_empty_directories);
2435
2436 n = context->n_bind_mounts;
2437 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
2438 if (!params->prefix[t])
2439 continue;
2440
2441 n += strv_length(context->directories[t].paths);
2442 }
2443
2444 if (n <= 0) {
2445 *ret_bind_mounts = NULL;
2446 *ret_n_bind_mounts = 0;
2447 *ret_empty_directories = NULL;
2448 return 0;
2449 }
2450
2451 bind_mounts = new(BindMount, n);
2452 if (!bind_mounts)
2453 return -ENOMEM;
2454
52ad194e 2455 for (i = 0; i < context->n_bind_mounts; i++) {
f5e65279
MB
2456 BindMount *item = context->bind_mounts + i;
2457 char *s, *d;
2458
2459 s = strdup(item->source);
2460 if (!s) {
2461 r = -ENOMEM;
2462 goto finish;
2463 }
2464
2465 d = strdup(item->destination);
2466 if (!d) {
2467 free(s);
2468 r = -ENOMEM;
2469 goto finish;
2470 }
2471
2472 bind_mounts[h++] = (BindMount) {
2473 .source = s,
2474 .destination = d,
2475 .read_only = item->read_only,
2476 .recursive = item->recursive,
2477 .ignore_enoent = item->ignore_enoent,
2478 };
2479 }
2480
2481 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
2482 char **suffix;
2483
2484 if (!params->prefix[t])
2485 continue;
2486
2487 if (strv_isempty(context->directories[t].paths))
2488 continue;
2489
812752cc 2490 if (exec_directory_is_private(context, t) &&
b012e921 2491 !(context->root_directory || context->root_image)) {
f5e65279
MB
2492 char *private_root;
2493
2494 /* So this is for a dynamic user, and we need to make sure the process can access its own
2495 * directory. For that we overmount the usually inaccessible "private" subdirectory with a
2496 * tmpfs that makes it accessible and is empty except for the submounts we do this for. */
2497
f2dec872 2498 private_root = path_join(params->prefix[t], "private");
f5e65279
MB
2499 if (!private_root) {
2500 r = -ENOMEM;
2501 goto finish;
2502 }
2503
2504 r = strv_consume(&empty_directories, private_root);
98393f85 2505 if (r < 0)
f5e65279 2506 goto finish;
f5e65279
MB
2507 }
2508
2509 STRV_FOREACH(suffix, context->directories[t].paths) {
2510 char *s, *d;
2511
812752cc 2512 if (exec_directory_is_private(context, t))
f2dec872 2513 s = path_join(params->prefix[t], "private", *suffix);
f5e65279 2514 else
f2dec872 2515 s = path_join(params->prefix[t], *suffix);
f5e65279
MB
2516 if (!s) {
2517 r = -ENOMEM;
2518 goto finish;
2519 }
2520
812752cc 2521 if (exec_directory_is_private(context, t) &&
b012e921
MB
2522 (context->root_directory || context->root_image))
2523 /* When RootDirectory= or RootImage= are set, then the symbolic link to the private
2524 * directory is not created on the root directory. So, let's bind-mount the directory
2525 * on the 'non-private' place. */
f2dec872 2526 d = path_join(params->prefix[t], *suffix);
b012e921
MB
2527 else
2528 d = strdup(s);
f5e65279
MB
2529 if (!d) {
2530 free(s);
2531 r = -ENOMEM;
2532 goto finish;
2533 }
2534
2535 bind_mounts[h++] = (BindMount) {
2536 .source = s,
2537 .destination = d,
2538 .read_only = false,
bb4f798a 2539 .nosuid = context->dynamic_user, /* don't allow suid/sgid when DynamicUser= is on */
f5e65279
MB
2540 .recursive = true,
2541 .ignore_enoent = false,
2542 };
2543 }
2544 }
2545
2546 assert(h == n);
2547
2548 *ret_bind_mounts = bind_mounts;
2549 *ret_n_bind_mounts = n;
b012e921 2550 *ret_empty_directories = TAKE_PTR(empty_directories);
f5e65279
MB
2551
2552 return (int) n;
2553
2554finish:
2555 bind_mount_free_many(bind_mounts, h);
2556 return r;
2557}
2558
e1f67bc7
MB
2559static bool insist_on_sandboxing(
2560 const ExecContext *context,
2561 const char *root_dir,
2562 const char *root_image,
2563 const BindMount *bind_mounts,
2564 size_t n_bind_mounts) {
2565
2566 size_t i;
2567
2568 assert(context);
2569 assert(n_bind_mounts == 0 || bind_mounts);
2570
2571 /* Checks whether we need to insist on fs namespacing. i.e. whether we have settings configured that
2572 * would alter the view on the file system beyond making things read-only or invisble, i.e. would
2573 * rearrange stuff in a way we cannot ignore gracefully. */
2574
2575 if (context->n_temporary_filesystems > 0)
2576 return true;
2577
2578 if (root_dir || root_image)
2579 return true;
2580
2581 if (context->dynamic_user)
2582 return true;
2583
2584 /* If there are any bind mounts set that don't map back onto themselves, fs namespacing becomes
2585 * essential. */
2586 for (i = 0; i < n_bind_mounts; i++)
2587 if (!path_equal(bind_mounts[i].source, bind_mounts[i].destination))
2588 return true;
2589
46cdbd49
BR
2590 if (context->log_namespace)
2591 return true;
2592
e1f67bc7
MB
2593 return false;
2594}
2595
2897b343 2596static int apply_mount_namespace(
98393f85
MB
2597 const Unit *u,
2598 const ExecCommand *command,
2897b343
MP
2599 const ExecContext *context,
2600 const ExecParameters *params,
f2dec872
BR
2601 const ExecRuntime *runtime,
2602 char **error_path) {
2897b343 2603
52ad194e 2604 _cleanup_strv_free_ char **empty_directories = NULL;
8a584da2 2605 char *tmp = NULL, *var = NULL;
2897b343 2606 const char *root_dir = NULL, *root_image = NULL;
b012e921 2607 NamespaceInfo ns_info;
f5e65279
MB
2608 bool needs_sandboxing;
2609 BindMount *bind_mounts = NULL;
b012e921 2610 size_t n_bind_mounts = 0;
2897b343 2611 int r;
8a584da2
MP
2612
2613 assert(context);
2614
2897b343
MP
2615 if (params->flags & EXEC_APPLY_CHROOT) {
2616 root_image = context->root_image;
2617
2618 if (!root_image)
2619 root_dir = context->root_directory;
2620 }
2621
f5e65279
MB
2622 r = compile_bind_mounts(context, params, &bind_mounts, &n_bind_mounts, &empty_directories);
2623 if (r < 0)
2624 return r;
2625
f5e65279 2626 needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & EXEC_COMMAND_FULLY_PRIVILEGED);
46cdbd49
BR
2627 if (needs_sandboxing) {
2628 /* The runtime struct only contains the parent of the private /tmp,
2629 * which is non-accessible to world users. Inside of it there's a /tmp
2630 * that is sticky, and that's the one we want to use here. */
2631
2632 if (context->private_tmp && runtime) {
2633 if (runtime->tmp_dir)
2634 tmp = strjoina(runtime->tmp_dir, "/tmp");
2635 if (runtime->var_tmp_dir)
2636 var = strjoina(runtime->var_tmp_dir, "/tmp");
2637 }
2638
b012e921
MB
2639 ns_info = (NamespaceInfo) {
2640 .ignore_protect_paths = false,
2641 .private_dev = context->private_devices,
2642 .protect_control_groups = context->protect_control_groups,
2643 .protect_kernel_tunables = context->protect_kernel_tunables,
2644 .protect_kernel_modules = context->protect_kernel_modules,
e1f67bc7 2645 .protect_kernel_logs = context->protect_kernel_logs,
bb4f798a 2646 .protect_hostname = context->protect_hostname,
b012e921
MB
2647 .mount_apivfs = context->mount_apivfs,
2648 .private_mounts = context->private_mounts,
2649 };
46cdbd49 2650 } else if (!context->dynamic_user && root_dir)
b012e921
MB
2651 /*
2652 * If DynamicUser=no and RootDirectory= is set then lets pass a relaxed
2653 * sandbox info, otherwise enforce it, don't ignore protected paths and
2654 * fail if we are enable to apply the sandbox inside the mount namespace.
2655 */
2656 ns_info = (NamespaceInfo) {
2657 .ignore_protect_paths = true,
2658 };
2659 else
2660 ns_info = (NamespaceInfo) {};
2897b343 2661
bb4f798a
MB
2662 if (context->mount_flags == MS_SHARED)
2663 log_unit_debug(u, "shared mount propagation hidden by other fs namespacing unit settings: ignoring");
2664
2897b343 2665 r = setup_namespace(root_dir, root_image,
52ad194e 2666 &ns_info, context->read_write_paths,
f5e65279
MB
2667 needs_sandboxing ? context->read_only_paths : NULL,
2668 needs_sandboxing ? context->inaccessible_paths : NULL,
2669 empty_directories,
2670 bind_mounts,
2671 n_bind_mounts,
98393f85
MB
2672 context->temporary_filesystems,
2673 context->n_temporary_filesystems,
8a584da2
MP
2674 tmp,
2675 var,
46cdbd49 2676 context->log_namespace,
f5e65279
MB
2677 needs_sandboxing ? context->protect_home : PROTECT_HOME_NO,
2678 needs_sandboxing ? context->protect_system : PROTECT_SYSTEM_NO,
2897b343 2679 context->mount_flags,
46cdbd49 2680 DISSECT_IMAGE_DISCARD_ON_LOOP|DISSECT_IMAGE_RELAX_VAR_CHECK|DISSECT_IMAGE_FSCK,
f2dec872 2681 error_path);
8a584da2 2682
6e866b33 2683 /* If we couldn't set up the namespace this is probably due to a missing capability. setup_namespace() reports
f2dec872 2684 * that with a special, recognizable error ENOANO. In this case, silently proceed, but only if exclusively
6e866b33
MB
2685 * sandboxing options were used, i.e. nothing such as RootDirectory= or BindMount= that would result in a
2686 * completely different execution environment. */
2687 if (r == -ENOANO) {
e1f67bc7
MB
2688 if (insist_on_sandboxing(
2689 context,
2690 root_dir, root_image,
2691 bind_mounts,
2692 n_bind_mounts)) {
2693 log_unit_debug(u, "Failed to set up namespace, and refusing to continue since the selected namespacing options alter mount environment non-trivially.\n"
2694 "Bind mounts: %zu, temporary filesystems: %zu, root directory: %s, root image: %s, dynamic user: %s",
2695 n_bind_mounts, context->n_temporary_filesystems, yes_no(root_dir), yes_no(root_image), yes_no(context->dynamic_user));
2696
2697 r = -EOPNOTSUPP;
2698 } else {
6e866b33 2699 log_unit_debug(u, "Failed to set up namespace, assuming containerized execution and ignoring.");
e1f67bc7 2700 r = 0;
6e866b33 2701 }
8a584da2
MP
2702 }
2703
e1f67bc7 2704 bind_mount_free_many(bind_mounts, n_bind_mounts);
8a584da2 2705 return r;
db2df898
MP
2706}
2707
2897b343
MP
2708static int apply_working_directory(
2709 const ExecContext *context,
2710 const ExecParameters *params,
2711 const char *home,
2897b343
MP
2712 int *exit_status) {
2713
2714 const char *d, *wd;
e3bff60a
MP
2715
2716 assert(context);
2897b343
MP
2717 assert(exit_status);
2718
2719 if (context->working_directory_home) {
2720
2721 if (!home) {
2722 *exit_status = EXIT_CHDIR;
2723 return -ENXIO;
2724 }
e3bff60a 2725
8a584da2 2726 wd = home;
2897b343
MP
2727
2728 } else if (context->working_directory)
8a584da2
MP
2729 wd = context->working_directory;
2730 else
2731 wd = "/";
e3bff60a 2732
f2dec872 2733 if (params->flags & EXEC_APPLY_CHROOT)
8a584da2 2734 d = wd;
f2dec872 2735 else
2897b343 2736 d = prefix_roota(context->root_directory, wd);
e3bff60a 2737
2897b343
MP
2738 if (chdir(d) < 0 && !context->working_directory_missing_ok) {
2739 *exit_status = EXIT_CHDIR;
8a584da2 2740 return -errno;
2897b343
MP
2741 }
2742
2743 return 0;
2744}
2745
f2dec872
BR
2746static int apply_root_directory(
2747 const ExecContext *context,
2748 const ExecParameters *params,
2749 const bool needs_mount_ns,
2750 int *exit_status) {
2751
2752 assert(context);
2753 assert(exit_status);
2754
2755 if (params->flags & EXEC_APPLY_CHROOT) {
2756 if (!needs_mount_ns && context->root_directory)
2757 if (chroot(context->root_directory) < 0) {
2758 *exit_status = EXIT_CHROOT;
2759 return -errno;
2760 }
2761 }
2762
2763 return 0;
2764}
2765
f5e65279 2766static int setup_keyring(
98393f85 2767 const Unit *u,
f5e65279
MB
2768 const ExecContext *context,
2769 const ExecParameters *p,
2770 uid_t uid, gid_t gid) {
2771
2897b343 2772 key_serial_t keyring;
b012e921
MB
2773 int r = 0;
2774 uid_t saved_uid;
2775 gid_t saved_gid;
2897b343
MP
2776
2777 assert(u);
f5e65279 2778 assert(context);
2897b343
MP
2779 assert(p);
2780
2781 /* Let's set up a new per-service "session" kernel keyring for each system service. This has the benefit that
2782 * each service runs with its own keyring shared among all processes of the service, but with no hook-up beyond
2783 * that scope, and in particular no link to the per-UID keyring. If we don't do this the keyring will be
2784 * automatically created on-demand and then linked to the per-UID keyring, by the kernel. The kernel's built-in
2785 * on-demand behaviour is very appropriate for login users, but probably not so much for system services, where
2786 * UIDs are not necessarily specific to a service but reused (at least in the case of UID 0). */
2787
f5e65279
MB
2788 if (context->keyring_mode == EXEC_KEYRING_INHERIT)
2789 return 0;
2790
b012e921
MB
2791 /* Acquiring a reference to the user keyring is nasty. We briefly change identity in order to get things set up
2792 * properly by the kernel. If we don't do that then we can't create it atomically, and that sucks for parallel
2793 * execution. This mimics what pam_keyinit does, too. Setting up session keyring, to be owned by the right user
2794 * & group is just as nasty as acquiring a reference to the user keyring. */
2795
2796 saved_uid = getuid();
2797 saved_gid = getgid();
2798
2799 if (gid_is_valid(gid) && gid != saved_gid) {
2800 if (setregid(gid, -1) < 0)
2801 return log_unit_error_errno(u, errno, "Failed to change GID for user keyring: %m");
2802 }
2803
2804 if (uid_is_valid(uid) && uid != saved_uid) {
2805 if (setreuid(uid, -1) < 0) {
2806 r = log_unit_error_errno(u, errno, "Failed to change UID for user keyring: %m");
2807 goto out;
2808 }
2809 }
2810
2897b343
MP
2811 keyring = keyctl(KEYCTL_JOIN_SESSION_KEYRING, 0, 0, 0, 0);
2812 if (keyring == -1) {
2813 if (errno == ENOSYS)
f5e65279 2814 log_unit_debug_errno(u, errno, "Kernel keyring not supported, ignoring.");
2897b343 2815 else if (IN_SET(errno, EACCES, EPERM))
f5e65279 2816 log_unit_debug_errno(u, errno, "Kernel keyring access prohibited, ignoring.");
2897b343 2817 else if (errno == EDQUOT)
f5e65279 2818 log_unit_debug_errno(u, errno, "Out of kernel keyrings to allocate, ignoring.");
2897b343 2819 else
b012e921 2820 r = log_unit_error_errno(u, errno, "Setting up kernel keyring failed: %m");
2897b343 2821
b012e921
MB
2822 goto out;
2823 }
2824
2825 /* When requested link the user keyring into the session keyring. */
2826 if (context->keyring_mode == EXEC_KEYRING_SHARED) {
2827
2828 if (keyctl(KEYCTL_LINK,
2829 KEY_SPEC_USER_KEYRING,
2830 KEY_SPEC_SESSION_KEYRING, 0, 0) < 0) {
2831 r = log_unit_error_errno(u, errno, "Failed to link user keyring into session keyring: %m");
2832 goto out;
2833 }
2834 }
2835
2836 /* Restore uid/gid back */
2837 if (uid_is_valid(uid) && uid != saved_uid) {
2838 if (setreuid(saved_uid, -1) < 0) {
2839 r = log_unit_error_errno(u, errno, "Failed to change UID back for user keyring: %m");
2840 goto out;
2841 }
2897b343
MP
2842 }
2843
b012e921
MB
2844 if (gid_is_valid(gid) && gid != saved_gid) {
2845 if (setregid(saved_gid, -1) < 0)
2846 return log_unit_error_errno(u, errno, "Failed to change GID back for user keyring: %m");
2847 }
2848
2849 /* Populate they keyring with the invocation ID by default, as original saved_uid. */
2897b343
MP
2850 if (!sd_id128_is_null(u->invocation_id)) {
2851 key_serial_t key;
2852
2853 key = add_key("user", "invocation_id", &u->invocation_id, sizeof(u->invocation_id), KEY_SPEC_SESSION_KEYRING);
2854 if (key == -1)
f5e65279 2855 log_unit_debug_errno(u, errno, "Failed to add invocation ID to keyring, ignoring: %m");
2897b343
MP
2856 else {
2857 if (keyctl(KEYCTL_SETPERM, key,
2858 KEY_POS_VIEW|KEY_POS_READ|KEY_POS_SEARCH|
2859 KEY_USR_VIEW|KEY_USR_READ|KEY_USR_SEARCH, 0, 0) < 0)
b012e921 2860 r = log_unit_error_errno(u, errno, "Failed to restrict invocation ID permission: %m");
2897b343
MP
2861 }
2862 }
2863
b012e921
MB
2864out:
2865 /* Revert back uid & gid for the the last time, and exit */
2866 /* no extra logging, as only the first already reported error matters */
2867 if (getuid() != saved_uid)
2868 (void) setreuid(saved_uid, -1);
f5e65279 2869
b012e921
MB
2870 if (getgid() != saved_gid)
2871 (void) setregid(saved_gid, -1);
f5e65279 2872
b012e921 2873 return r;
8a584da2
MP
2874}
2875
7c20daf6 2876static void append_socket_pair(int *array, size_t *n, const int pair[static 2]) {
8a584da2
MP
2877 assert(array);
2878 assert(n);
f2dec872 2879 assert(pair);
8a584da2
MP
2880
2881 if (pair[0] >= 0)
2882 array[(*n)++] = pair[0];
2883 if (pair[1] >= 0)
2884 array[(*n)++] = pair[1];
e3bff60a
MP
2885}
2886
db2df898
MP
2887static int close_remaining_fds(
2888 const ExecParameters *params,
98393f85
MB
2889 const ExecRuntime *runtime,
2890 const DynamicCreds *dcreds,
8a584da2 2891 int user_lookup_fd,
db2df898 2892 int socket_fd,
6e866b33 2893 int exec_fd,
46cdbd49 2894 const int *fds, size_t n_fds) {
db2df898 2895
b012e921 2896 size_t n_dont_close = 0;
8a584da2 2897 int dont_close[n_fds + 12];
db2df898
MP
2898
2899 assert(params);
2900
2901 if (params->stdin_fd >= 0)
2902 dont_close[n_dont_close++] = params->stdin_fd;
2903 if (params->stdout_fd >= 0)
2904 dont_close[n_dont_close++] = params->stdout_fd;
2905 if (params->stderr_fd >= 0)
2906 dont_close[n_dont_close++] = params->stderr_fd;
2907
2908 if (socket_fd >= 0)
2909 dont_close[n_dont_close++] = socket_fd;
6e866b33
MB
2910 if (exec_fd >= 0)
2911 dont_close[n_dont_close++] = exec_fd;
db2df898
MP
2912 if (n_fds > 0) {
2913 memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
2914 n_dont_close += n_fds;
2915 }
2916
8a584da2
MP
2917 if (runtime)
2918 append_socket_pair(dont_close, &n_dont_close, runtime->netns_storage_socket);
2919
2920 if (dcreds) {
2921 if (dcreds->user)
2922 append_socket_pair(dont_close, &n_dont_close, dcreds->user->storage_socket);
2923 if (dcreds->group)
2924 append_socket_pair(dont_close, &n_dont_close, dcreds->group->storage_socket);
db2df898
MP
2925 }
2926
8a584da2
MP
2927 if (user_lookup_fd >= 0)
2928 dont_close[n_dont_close++] = user_lookup_fd;
2929
db2df898
MP
2930 return close_all_fds(dont_close, n_dont_close);
2931}
2932
8a584da2
MP
2933static int send_user_lookup(
2934 Unit *unit,
2935 int user_lookup_fd,
2936 uid_t uid,
2937 gid_t gid) {
2938
2939 assert(unit);
2940
2941 /* Send the resolved UID/GID to PID 1 after we learnt it. We send a single datagram, containing the UID/GID
2942 * data as well as the unit name. Note that we suppress sending this if no user/group to resolve was
2943 * specified. */
2944
2945 if (user_lookup_fd < 0)
2946 return 0;
2947
2948 if (!uid_is_valid(uid) && !gid_is_valid(gid))
2949 return 0;
2950
2951 if (writev(user_lookup_fd,
2952 (struct iovec[]) {
f5e65279
MB
2953 IOVEC_INIT(&uid, sizeof(uid)),
2954 IOVEC_INIT(&gid, sizeof(gid)),
2955 IOVEC_INIT_STRING(unit->id) }, 3) < 0)
8a584da2
MP
2956 return -errno;
2957
2958 return 0;
2959}
2960
2897b343
MP
2961static int acquire_home(const ExecContext *c, uid_t uid, const char** home, char **buf) {
2962 int r;
2963
2964 assert(c);
2965 assert(home);
2966 assert(buf);
2967
2968 /* If WorkingDirectory=~ is set, try to acquire a usable home directory. */
2969
2970 if (*home)
2971 return 0;
2972
2973 if (!c->working_directory_home)
2974 return 0;
2975
2897b343
MP
2976 r = get_home_dir(buf);
2977 if (r < 0)
2978 return r;
2979
2980 *home = *buf;
2981 return 1;
2982}
2983
f5e65279
MB
2984static int compile_suggested_paths(const ExecContext *c, const ExecParameters *p, char ***ret) {
2985 _cleanup_strv_free_ char ** list = NULL;
2986 ExecDirectoryType t;
2987 int r;
2988
2989 assert(c);
2990 assert(p);
2991 assert(ret);
2992
2993 assert(c->dynamic_user);
2994
2995 /* Compile a list of paths that it might make sense to read the owning UID from to use as initial candidate for
2996 * dynamic UID allocation, in order to save us from doing costly recursive chown()s of the special
2997 * directories. */
2998
2999 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
3000 char **i;
3001
3002 if (t == EXEC_DIRECTORY_CONFIGURATION)
3003 continue;
3004
3005 if (!p->prefix[t])
3006 continue;
3007
3008 STRV_FOREACH(i, c->directories[t].paths) {
3009 char *e;
3010
812752cc 3011 if (exec_directory_is_private(c, t))
f2dec872 3012 e = path_join(p->prefix[t], "private", *i);
812752cc
MB
3013 else
3014 e = path_join(p->prefix[t], *i);
f5e65279
MB
3015 if (!e)
3016 return -ENOMEM;
3017
3018 r = strv_consume(&list, e);
3019 if (r < 0)
3020 return r;
3021 }
3022 }
3023
b012e921 3024 *ret = TAKE_PTR(list);
f5e65279
MB
3025
3026 return 0;
3027}
3028
98393f85
MB
3029static char *exec_command_line(char **argv);
3030
6e866b33
MB
3031static int exec_parameters_get_cgroup_path(const ExecParameters *params, char **ret) {
3032 bool using_subcgroup;
3033 char *p;
3034
3035 assert(params);
3036 assert(ret);
3037
3038 if (!params->cgroup_path)
3039 return -EINVAL;
3040
3041 /* If we are called for a unit where cgroup delegation is on, and the payload created its own populated
3042 * subcgroup (which we expect it to do, after all it asked for delegation), then we cannot place the control
3043 * processes started after the main unit's process in the unit's main cgroup because it is now an inner one,
3044 * and inner cgroups may not contain processes. Hence, if delegation is on, and this is a control process,
3045 * let's use ".control" as subcgroup instead. Note that we do so only for ExecStartPost=, ExecReload=,
3046 * ExecStop=, ExecStopPost=, i.e. for the commands where the main process is already forked. For ExecStartPre=
3047 * this is not necessary, the cgroup is still empty. We distinguish these cases with the EXEC_CONTROL_CGROUP
3048 * flag, which is only passed for the former statements, not for the latter. */
3049
3050 using_subcgroup = FLAGS_SET(params->flags, EXEC_CONTROL_CGROUP|EXEC_CGROUP_DELEGATE|EXEC_IS_CONTROL);
3051 if (using_subcgroup)
f2dec872 3052 p = path_join(params->cgroup_path, ".control");
6e866b33
MB
3053 else
3054 p = strdup(params->cgroup_path);
3055 if (!p)
3056 return -ENOMEM;
3057
3058 *ret = p;
3059 return using_subcgroup;
3060}
3061
7d4b9ad6
MB
3062static int exec_context_cpu_affinity_from_numa(const ExecContext *c, CPUSet *ret) {
3063 _cleanup_(cpu_set_reset) CPUSet s = {};
3064 int r;
3065
3066 assert(c);
3067 assert(ret);
3068
3069 if (!c->numa_policy.nodes.set) {
3070 log_debug("Can't derive CPU affinity mask from NUMA mask because NUMA mask is not set, ignoring");
3071 return 0;
3072 }
3073
3074 r = numa_to_cpu_set(&c->numa_policy, &s);
3075 if (r < 0)
3076 return r;
3077
3078 cpu_set_reset(ret);
3079
3080 return cpu_set_add_all(ret, &s);
3081}
3082
3083bool exec_context_get_cpu_affinity_from_numa(const ExecContext *c) {
3084 assert(c);
3085
3086 return c->cpu_affinity_from_numa;
3087}
3088
e735f4d4 3089static int exec_child(
e3bff60a 3090 Unit *unit,
98393f85 3091 const ExecCommand *command,
e735f4d4
MP
3092 const ExecContext *context,
3093 const ExecParameters *params,
3094 ExecRuntime *runtime,
8a584da2 3095 DynamicCreds *dcreds,
e735f4d4 3096 int socket_fd,
f2dec872 3097 const int named_iofds[static 3],
81c58355 3098 int *fds,
b012e921 3099 size_t n_socket_fds,
6e866b33 3100 size_t n_storage_fds,
e735f4d4 3101 char **files_env,
8a584da2 3102 int user_lookup_fd,
f5e65279 3103 int *exit_status) {
5eef597e 3104
bb4f798a 3105 _cleanup_strv_free_ char **our_env = NULL, **pass_env = NULL, **accum_env = NULL, **replaced_argv = NULL;
6e866b33 3106 int *fds_with_exec_fd, n_fds_with_exec_fd, r, ngids = 0, exec_fd = -1;
8a584da2
MP
3107 _cleanup_free_ gid_t *supplementary_gids = NULL;
3108 const char *username = NULL, *groupname = NULL;
6e866b33 3109 _cleanup_free_ char *home_buffer = NULL;
8a584da2 3110 const char *home = NULL, *shell = NULL;
bb4f798a 3111 char **final_argv = NULL;
5a920b42
MP
3112 dev_t journal_stream_dev = 0;
3113 ino_t journal_stream_ino = 0;
46cdbd49 3114 bool userns_set_up = false;
f5e65279
MB
3115 bool needs_sandboxing, /* Do we need to set up full sandboxing? (i.e. all namespacing, all MAC stuff, caps, yadda yadda */
3116 needs_setuid, /* Do we need to do the actual setresuid()/setresgid() calls? */
3117 needs_mount_namespace, /* Do we need to set up a mount namespace for this kernel? */
3118 needs_ambient_hack; /* Do we need to apply the ambient capabilities hack? */
3119#if HAVE_SELINUX
52ad194e 3120 _cleanup_free_ char *mac_selinux_context_net = NULL;
f5e65279
MB
3121 bool use_selinux = false;
3122#endif
3123#if ENABLE_SMACK
3124 bool use_smack = false;
3125#endif
3126#if HAVE_APPARMOR
3127 bool use_apparmor = false;
3128#endif
46cdbd49
BR
3129 uid_t saved_uid = getuid();
3130 gid_t saved_gid = getgid();
f47781d8
MP
3131 uid_t uid = UID_INVALID;
3132 gid_t gid = GID_INVALID;
b012e921 3133 size_t n_fds;
f5e65279
MB
3134 ExecDirectoryType dt;
3135 int secure_bits;
46cdbd49
BR
3136 _cleanup_free_ gid_t *gids_after_pam = NULL;
3137 int ngids_after_pam = 0;
663996b3 3138
e3bff60a 3139 assert(unit);
663996b3
MS
3140 assert(command);
3141 assert(context);
5eef597e 3142 assert(params);
e735f4d4 3143 assert(exit_status);
5eef597e
MP
3144
3145 rename_process_from_path(command->path);
3146
3147 /* We reset exactly these signals, since they are the
3148 * only ones we set to SIG_IGN in the main daemon. All
3149 * others we leave untouched because we set them to
3150 * SIG_DFL or a valid handler initially, both of which
3151 * will be demoted to SIG_DFL. */
86f210e9
MP
3152 (void) default_signals(SIGNALS_CRASH_HANDLER,
3153 SIGNALS_IGNORE, -1);
5eef597e
MP
3154
3155 if (context->ignore_sigpipe)
86f210e9 3156 (void) ignore_signals(SIGPIPE, -1);
5eef597e 3157
e735f4d4
MP
3158 r = reset_signal_mask();
3159 if (r < 0) {
3160 *exit_status = EXIT_SIGNAL_MASK;
f5e65279 3161 return log_unit_error_errno(unit, r, "Failed to set process signal mask: %m");
5eef597e 3162 }
663996b3 3163
5eef597e
MP
3164 if (params->idle_pipe)
3165 do_idle_pipe_dance(params->idle_pipe);
663996b3 3166
f5e65279
MB
3167 /* Close fds we don't need very early to make sure we don't block init reexecution because it cannot bind its
3168 * sockets. Among the fds we close are the logging fds, and we want to keep them closed, so that we don't have
3169 * any fds open we don't really want open during the transition. In order to make logging work, we switch the
3170 * log subsystem into open_when_needed mode, so that it reopens the logs on every single log call. */
e735f4d4 3171
5eef597e 3172 log_forget_fds();
f5e65279
MB
3173 log_set_open_when_needed(true);
3174
3175 /* In case anything used libc syslog(), close this here, too */
3176 closelog();
663996b3 3177
6e866b33
MB
3178 n_fds = n_socket_fds + n_storage_fds;
3179 r = close_remaining_fds(params, runtime, dcreds, user_lookup_fd, socket_fd, params->exec_fd, fds, n_fds);
e735f4d4
MP
3180 if (r < 0) {
3181 *exit_status = EXIT_FDS;
f5e65279 3182 return log_unit_error_errno(unit, r, "Failed to close unwanted file descriptors: %m");
5eef597e 3183 }
663996b3 3184
5eef597e
MP
3185 if (!context->same_pgrp)
3186 if (setsid() < 0) {
e735f4d4 3187 *exit_status = EXIT_SETSID;
f5e65279 3188 return log_unit_error_errno(unit, errno, "Failed to create new process session: %m");
5eef597e
MP
3189 }
3190
4c89c718 3191 exec_context_tty_reset(context, params);
5eef597e 3192
2897b343
MP
3193 if (unit_shall_confirm_spawn(unit)) {
3194 const char *vc = params->confirm_spawn;
3195 _cleanup_free_ char *cmdline = NULL;
3196
6e866b33 3197 cmdline = exec_command_line(command->argv);
2897b343 3198 if (!cmdline) {
f5e65279
MB
3199 *exit_status = EXIT_MEMORY;
3200 return log_oom();
2897b343 3201 }
5eef597e 3202
2897b343
MP
3203 r = ask_for_confirmation(vc, unit, cmdline);
3204 if (r != CONFIRM_EXECUTE) {
3205 if (r == CONFIRM_PRETEND_SUCCESS) {
3206 *exit_status = EXIT_SUCCESS;
3207 return 0;
3208 }
e735f4d4 3209 *exit_status = EXIT_CONFIRM;
f5e65279 3210 log_unit_error(unit, "Execution cancelled by the user");
5eef597e 3211 return -ECANCELED;
5eef597e 3212 }
663996b3
MS
3213 }
3214
6e866b33
MB
3215 /* We are about to invoke NSS and PAM modules. Let's tell them what we are doing here, maybe they care. This is
3216 * used by nss-resolve to disable itself when we are about to start systemd-resolved, to avoid deadlocks. Note
3217 * that these env vars do not survive the execve(), which means they really only apply to the PAM and NSS
3218 * invocations themselves. Also note that while we'll only invoke NSS modules involved in user management they
3219 * might internally call into other NSS modules that are involved in hostname resolution, we never know. */
3220 if (setenv("SYSTEMD_ACTIVATION_UNIT", unit->id, true) != 0 ||
3221 setenv("SYSTEMD_ACTIVATION_SCOPE", MANAGER_IS_SYSTEM(unit->manager) ? "system" : "user", true) != 0) {
3222 *exit_status = EXIT_MEMORY;
3223 return log_unit_error_errno(unit, errno, "Failed to update environment: %m");
3224 }
3225
8a584da2 3226 if (context->dynamic_user && dcreds) {
f5e65279 3227 _cleanup_strv_free_ char **suggested_paths = NULL;
8a584da2 3228
6e866b33
MB
3229 /* On top of that, make sure we bypass our own NSS module nss-systemd comprehensively for any NSS
3230 * checks, if DynamicUser=1 is used, as we shouldn't create a feedback loop with ourselves here.*/
8a584da2
MP
3231 if (putenv((char*) "SYSTEMD_NSS_DYNAMIC_BYPASS=1") != 0) {
3232 *exit_status = EXIT_USER;
f5e65279
MB
3233 return log_unit_error_errno(unit, errno, "Failed to update environment: %m");
3234 }
3235
3236 r = compile_suggested_paths(context, params, &suggested_paths);
3237 if (r < 0) {
3238 *exit_status = EXIT_MEMORY;
3239 return log_oom();
8a584da2
MP
3240 }
3241
f5e65279 3242 r = dynamic_creds_realize(dcreds, suggested_paths, &uid, &gid);
e735f4d4
MP
3243 if (r < 0) {
3244 *exit_status = EXIT_USER;
52ad194e
MB
3245 if (r == -EILSEQ) {
3246 log_unit_error(unit, "Failed to update dynamic user credentials: User or group with specified name already exists.");
3247 return -EOPNOTSUPP;
3248 }
f5e65279 3249 return log_unit_error_errno(unit, r, "Failed to update dynamic user credentials: %m");
e735f4d4 3250 }
e735f4d4 3251
2897b343 3252 if (!uid_is_valid(uid)) {
8a584da2 3253 *exit_status = EXIT_USER;
f5e65279 3254 log_unit_error(unit, "UID validation failed for \""UID_FMT"\"", uid);
2897b343
MP
3255 return -ESRCH;
3256 }
3257
3258 if (!gid_is_valid(gid)) {
3259 *exit_status = EXIT_USER;
f5e65279 3260 log_unit_error(unit, "GID validation failed for \""GID_FMT"\"", gid);
8a584da2
MP
3261 return -ESRCH;
3262 }
3263
3264 if (dcreds->user)
3265 username = dcreds->user->name;
6300502b 3266
8a584da2
MP
3267 } else {
3268 r = get_fixed_user(context, &username, &uid, &gid, &home, &shell);
3269 if (r < 0) {
3270 *exit_status = EXIT_USER;
f5e65279 3271 return log_unit_error_errno(unit, r, "Failed to determine user credentials: %m");
8a584da2
MP
3272 }
3273
3274 r = get_fixed_group(context, &groupname, &gid);
6300502b
MP
3275 if (r < 0) {
3276 *exit_status = EXIT_GROUP;
f5e65279 3277 return log_unit_error_errno(unit, r, "Failed to determine group credentials: %m");
6300502b
MP
3278 }
3279 }
3280
8a584da2
MP
3281 /* Initialize user supplementary groups and get SupplementaryGroups= ones */
3282 r = get_supplementary_groups(context, username, groupname, gid,
3283 &supplementary_gids, &ngids);
3284 if (r < 0) {
3285 *exit_status = EXIT_GROUP;
f5e65279 3286 return log_unit_error_errno(unit, r, "Failed to determine supplementary groups: %m");
8a584da2
MP
3287 }
3288
3289 r = send_user_lookup(unit, user_lookup_fd, uid, gid);
3290 if (r < 0) {
3291 *exit_status = EXIT_USER;
f5e65279 3292 return log_unit_error_errno(unit, r, "Failed to send user credentials to PID1: %m");
8a584da2
MP
3293 }
3294
3295 user_lookup_fd = safe_close(user_lookup_fd);
6300502b 3296
2897b343
MP
3297 r = acquire_home(context, uid, &home, &home_buffer);
3298 if (r < 0) {
3299 *exit_status = EXIT_CHDIR;
f5e65279 3300 return log_unit_error_errno(unit, r, "Failed to determine $HOME for user: %m");
2897b343
MP
3301 }
3302
5eef597e
MP
3303 /* If a socket is connected to STDIN/STDOUT/STDERR, we
3304 * must sure to drop O_NONBLOCK */
3305 if (socket_fd >= 0)
db2df898 3306 (void) fd_nonblock(socket_fd, false);
663996b3 3307
6e866b33
MB
3308 /* Journald will try to look-up our cgroup in order to populate _SYSTEMD_CGROUP and _SYSTEMD_UNIT fields.
3309 * Hence we need to migrate to the target cgroup from init.scope before connecting to journald */
3310 if (params->cgroup_path) {
3311 _cleanup_free_ char *p = NULL;
3312
3313 r = exec_parameters_get_cgroup_path(params, &p);
3314 if (r < 0) {
3315 *exit_status = EXIT_CGROUP;
3316 return log_unit_error_errno(unit, r, "Failed to acquire cgroup path: %m");
3317 }
3318
3319 r = cg_attach_everywhere(params->cgroup_supported, p, 0, NULL, NULL);
3320 if (r < 0) {
3321 *exit_status = EXIT_CGROUP;
3322 return log_unit_error_errno(unit, r, "Failed to attach to cgroup %s: %m", p);
3323 }
3324 }
3325
bb4f798a
MB
3326 if (context->network_namespace_path && runtime && runtime->netns_storage_socket[0] >= 0) {
3327 r = open_netns_path(runtime->netns_storage_socket, context->network_namespace_path);
3328 if (r < 0) {
3329 *exit_status = EXIT_NETWORK;
3330 return log_unit_error_errno(unit, r, "Failed to open network namespace path %s: %m", context->network_namespace_path);
3331 }
3332 }
3333
8a584da2 3334 r = setup_input(context, params, socket_fd, named_iofds);
e735f4d4
MP
3335 if (r < 0) {
3336 *exit_status = EXIT_STDIN;
f5e65279 3337 return log_unit_error_errno(unit, r, "Failed to set up standard input: %m");
5eef597e 3338 }
663996b3 3339
8a584da2 3340 r = setup_output(unit, context, params, STDOUT_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
e735f4d4
MP
3341 if (r < 0) {
3342 *exit_status = EXIT_STDOUT;
f5e65279 3343 return log_unit_error_errno(unit, r, "Failed to set up standard output: %m");
5eef597e 3344 }
663996b3 3345
8a584da2 3346 r = setup_output(unit, context, params, STDERR_FILENO, socket_fd, named_iofds, basename(command->path), uid, gid, &journal_stream_dev, &journal_stream_ino);
e735f4d4
MP
3347 if (r < 0) {
3348 *exit_status = EXIT_STDERR;
f5e65279 3349 return log_unit_error_errno(unit, r, "Failed to set up standard error output: %m");
5eef597e 3350 }
663996b3 3351
5eef597e 3352 if (context->oom_score_adjust_set) {
b012e921
MB
3353 /* When we can't make this change due to EPERM, then let's silently skip over it. User namespaces
3354 * prohibit write access to this file, and we shouldn't trip up over that. */
3355 r = set_oom_score_adjust(context->oom_score_adjust);
f5e65279 3356 if (IN_SET(r, -EPERM, -EACCES))
e3bff60a 3357 log_unit_debug_errno(unit, r, "Failed to adjust OOM setting, assuming containerized execution, ignoring: %m");
f5e65279 3358 else if (r < 0) {
e735f4d4 3359 *exit_status = EXIT_OOM_ADJUST;
f5e65279 3360 return log_unit_error_errno(unit, r, "Failed to adjust OOM setting: %m");
60f067b4 3361 }
5eef597e 3362 }
60f067b4 3363
46cdbd49
BR
3364 if (context->nice_set) {
3365 r = setpriority_closest(context->nice);
3366 if (r < 0)
3367 return log_unit_error_errno(unit, r, "Failed to set up process scheduling priority (nice level): %m");
3368 }
663996b3 3369
5eef597e
MP
3370 if (context->cpu_sched_set) {
3371 struct sched_param param = {
3372 .sched_priority = context->cpu_sched_priority,
3373 };
663996b3 3374
e735f4d4
MP
3375 r = sched_setscheduler(0,
3376 context->cpu_sched_policy |
3377 (context->cpu_sched_reset_on_fork ?
3378 SCHED_RESET_ON_FORK : 0),
3379 &param);
3380 if (r < 0) {
3381 *exit_status = EXIT_SETSCHEDULER;
f5e65279 3382 return log_unit_error_errno(unit, errno, "Failed to set up CPU scheduling: %m");
5eef597e
MP
3383 }
3384 }
663996b3 3385
7d4b9ad6
MB
3386 if (context->cpu_affinity_from_numa || context->cpu_set.set) {
3387 _cleanup_(cpu_set_reset) CPUSet converted_cpu_set = {};
3388 const CPUSet *cpu_set;
3389
3390 if (context->cpu_affinity_from_numa) {
3391 r = exec_context_cpu_affinity_from_numa(context, &converted_cpu_set);
3392 if (r < 0) {
3393 *exit_status = EXIT_CPUAFFINITY;
3394 return log_unit_error_errno(unit, r, "Failed to derive CPU affinity mask from NUMA mask: %m");
3395 }
3396
3397 cpu_set = &converted_cpu_set;
3398 } else
3399 cpu_set = &context->cpu_set;
3400
3401 if (sched_setaffinity(0, cpu_set->allocated, cpu_set->set) < 0) {
e735f4d4 3402 *exit_status = EXIT_CPUAFFINITY;
f5e65279 3403 return log_unit_error_errno(unit, errno, "Failed to set up CPU affinity: %m");
663996b3 3404 }
7d4b9ad6 3405 }
663996b3 3406
f2dec872
BR
3407 if (mpol_is_valid(numa_policy_get_type(&context->numa_policy))) {
3408 r = apply_numa_policy(&context->numa_policy);
3409 if (r == -EOPNOTSUPP)
3410 log_unit_debug_errno(unit, r, "NUMA support not available, ignoring.");
3411 else if (r < 0) {
3412 *exit_status = EXIT_NUMA_POLICY;
3413 return log_unit_error_errno(unit, r, "Failed to set NUMA memory policy: %m");
3414 }
3415 }
3416
5eef597e
MP
3417 if (context->ioprio_set)
3418 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
e735f4d4 3419 *exit_status = EXIT_IOPRIO;
f5e65279 3420 return log_unit_error_errno(unit, errno, "Failed to set up IO scheduling priority: %m");
5eef597e 3421 }
663996b3 3422
5eef597e
MP
3423 if (context->timer_slack_nsec != NSEC_INFINITY)
3424 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
e735f4d4 3425 *exit_status = EXIT_TIMERSLACK;
f5e65279 3426 return log_unit_error_errno(unit, errno, "Failed to set up timer slack: %m");
663996b3
MS
3427 }
3428
f5e65279
MB
3429 if (context->personality != PERSONALITY_INVALID) {
3430 r = safe_personality(context->personality);
3431 if (r < 0) {
e735f4d4 3432 *exit_status = EXIT_PERSONALITY;
f5e65279 3433 return log_unit_error_errno(unit, r, "Failed to set up execution domain (personality): %m");
663996b3 3434 }
f5e65279 3435 }
663996b3 3436
5eef597e 3437 if (context->utmp_id)
f5e65279 3438 utmp_put_init_process(context->utmp_id, getpid_cached(), getsid(0),
2897b343 3439 context->tty_path,
13d276d0
MP
3440 context->utmp_mode == EXEC_UTMP_INIT ? INIT_PROCESS :
3441 context->utmp_mode == EXEC_UTMP_LOGIN ? LOGIN_PROCESS :
3442 USER_PROCESS,
2897b343 3443 username);
5eef597e 3444
bb4f798a 3445 if (uid_is_valid(uid)) {
e735f4d4
MP
3446 r = chown_terminal(STDIN_FILENO, uid);
3447 if (r < 0) {
3448 *exit_status = EXIT_STDIN;
f5e65279 3449 return log_unit_error_errno(unit, r, "Failed to change ownership of terminal: %m");
663996b3 3450 }
5eef597e 3451 }
663996b3 3452
7c20daf6 3453 /* If delegation is enabled we'll pass ownership of the cgroup to the user of the new process. On cgroup v1
52ad194e 3454 * this is only about systemd's own hierarchy, i.e. not the controller hierarchies, simply because that's not
7c20daf6 3455 * safe. On cgroup v2 there's only one hierarchy anyway, and delegation is safe there, hence in that case only
52ad194e 3456 * touch a single hierarchy too. */
f5e65279 3457 if (params->cgroup_path && context->user && (params->flags & EXEC_CGROUP_DELEGATE)) {
52ad194e 3458 r = cg_set_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, uid, gid);
e735f4d4
MP
3459 if (r < 0) {
3460 *exit_status = EXIT_CGROUP;
f5e65279 3461 return log_unit_error_errno(unit, r, "Failed to adjust control group access: %m");
663996b3 3462 }
5eef597e 3463 }
663996b3 3464
f5e65279
MB
3465 for (dt = 0; dt < _EXEC_DIRECTORY_TYPE_MAX; dt++) {
3466 r = setup_exec_directory(context, params, uid, gid, dt, exit_status);
3467 if (r < 0)
3468 return log_unit_error_errno(unit, r, "Failed to set up special execution directory in %s: %m", params->prefix[dt]);
5eef597e 3469 }
663996b3 3470
5a920b42 3471 r = build_environment(
8a584da2 3472 unit,
5a920b42
MP
3473 context,
3474 params,
3475 n_fds,
3476 home,
3477 username,
3478 shell,
3479 journal_stream_dev,
3480 journal_stream_ino,
3481 &our_env);
3482 if (r < 0) {
3483 *exit_status = EXIT_MEMORY;
f5e65279 3484 return log_oom();
5a920b42
MP
3485 }
3486
3487 r = build_pass_environment(context, &pass_env);
3488 if (r < 0) {
3489 *exit_status = EXIT_MEMORY;
f5e65279 3490 return log_oom();
5a920b42
MP
3491 }
3492
3493 accum_env = strv_env_merge(5,
3494 params->environment,
3495 our_env,
3496 pass_env,
3497 context->environment,
46cdbd49 3498 files_env);
5a920b42
MP
3499 if (!accum_env) {
3500 *exit_status = EXIT_MEMORY;
f5e65279 3501 return log_oom();
5a920b42
MP
3502 }
3503 accum_env = strv_env_clean(accum_env);
3504
8a584da2 3505 (void) umask(context->umask);
14228c0d 3506
f5e65279 3507 r = setup_keyring(unit, context, params, uid, gid);
2897b343
MP
3508 if (r < 0) {
3509 *exit_status = EXIT_KEYRING;
f5e65279 3510 return log_unit_error_errno(unit, r, "Failed to set up kernel keyring: %m");
2897b343
MP
3511 }
3512
f5e65279
MB
3513 /* We need sandboxing if the caller asked us to apply it and the command isn't explicitly excepted from it */
3514 needs_sandboxing = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & EXEC_COMMAND_FULLY_PRIVILEGED);
3515
3516 /* We need the ambient capability hack, if the caller asked us to apply it and the command is marked for it, and the kernel doesn't actually support ambient caps */
3517 needs_ambient_hack = (params->flags & EXEC_APPLY_SANDBOXING) && (command->flags & EXEC_COMMAND_AMBIENT_MAGIC) && !ambient_capabilities_supported();
3518
3519 /* We need setresuid() if the caller asked us to apply sandboxing and the command isn't explicitly excepted from either whole sandboxing or just setresuid() itself, and the ambient hack is not desired */
3520 if (needs_ambient_hack)
3521 needs_setuid = false;
3522 else
3523 needs_setuid = (params->flags & EXEC_APPLY_SANDBOXING) && !(command->flags & (EXEC_COMMAND_FULLY_PRIVILEGED|EXEC_COMMAND_NO_SETUID));
3524
3525 if (needs_sandboxing) {
3526 /* MAC enablement checks need to be done before a new mount ns is created, as they rely on /sys being
3527 * present. The actual MAC context application will happen later, as late as possible, to avoid
3528 * impacting our own code paths. */
3529
3530#if HAVE_SELINUX
3531 use_selinux = mac_selinux_use();
3532#endif
3533#if ENABLE_SMACK
3534 use_smack = mac_smack_use();
3535#endif
3536#if HAVE_APPARMOR
3537 use_apparmor = mac_apparmor_use();
3538#endif
3539 }
3540
7c20daf6
FS
3541 if (needs_sandboxing) {
3542 int which_failed;
3543
3544 /* Let's set the resource limits before we call into PAM, so that pam_limits wins over what
3545 * is set here. (See below.) */
3546
3547 r = setrlimit_closest_all((const struct rlimit* const *) context->rlimit, &which_failed);
3548 if (r < 0) {
3549 *exit_status = EXIT_LIMITS;
3550 return log_unit_error_errno(unit, r, "Failed to adjust resource limit RLIMIT_%s: %m", rlimit_to_string(which_failed));
3551 }
3552 }
3553
f5e65279 3554 if (needs_setuid) {
7c20daf6
FS
3555
3556 /* Let's call into PAM after we set up our own idea of resource limits to that pam_limits
3557 * wins here. (See above.) */
3558
6300502b 3559 if (context->pam_name && username) {
8a584da2 3560 r = setup_pam(context->pam_name, username, uid, gid, context->tty_path, &accum_env, fds, n_fds);
6300502b
MP
3561 if (r < 0) {
3562 *exit_status = EXIT_PAM;
f5e65279 3563 return log_unit_error_errno(unit, r, "Failed to set up PAM session: %m");
6300502b 3564 }
46cdbd49
BR
3565
3566 ngids_after_pam = getgroups_alloc(&gids_after_pam);
3567 if (ngids_after_pam < 0) {
3568 *exit_status = EXIT_MEMORY;
3569 return log_unit_error_errno(unit, ngids_after_pam, "Failed to obtain groups after setting up PAM: %m");
3570 }
3571 }
3572 }
3573
3574 if (needs_sandboxing) {
3575#if HAVE_SELINUX
3576 if (use_selinux && params->selinux_context_net && socket_fd >= 0) {
3577 r = mac_selinux_get_child_mls_label(socket_fd, command->path, context->selinux_context, &mac_selinux_context_net);
3578 if (r < 0) {
3579 *exit_status = EXIT_SELINUX_CONTEXT;
3580 return log_unit_error_errno(unit, r, "Failed to determine SELinux context: %m");
3581 }
3582 }
3583#endif
3584
3585 /* If we're unprivileged, set up the user namespace first to enable use of the other namespaces.
3586 * Users with CAP_SYS_ADMIN can set up user namespaces last because they will be able to
3587 * set up the all of the other namespaces (i.e. network, mount, UTS) without a user namespace. */
3588 if (context->private_users && !have_effective_cap(CAP_SYS_ADMIN)) {
3589 userns_set_up = true;
3590 r = setup_private_users(saved_uid, saved_gid, uid, gid);
3591 if (r < 0) {
3592 *exit_status = EXIT_USER;
3593 return log_unit_error_errno(unit, r, "Failed to set up user namespacing for unprivileged user: %m");
3594 }
663996b3 3595 }
6300502b 3596 }
663996b3 3597
bb4f798a
MB
3598 if ((context->private_network || context->network_namespace_path) && runtime && runtime->netns_storage_socket[0] >= 0) {
3599
52ad194e
MB
3600 if (ns_type_supported(NAMESPACE_NET)) {
3601 r = setup_netns(runtime->netns_storage_socket);
46cdbd49
BR
3602 if (r == -EPERM)
3603 log_unit_warning_errno(unit, r,
3604 "PrivateNetwork=yes is configured, but network namespace setup failed, ignoring: %m");
3605 else if (r < 0) {
52ad194e
MB
3606 *exit_status = EXIT_NETWORK;
3607 return log_unit_error_errno(unit, r, "Failed to set up network namespacing: %m");
3608 }
bb4f798a
MB
3609 } else if (context->network_namespace_path) {
3610 *exit_status = EXIT_NETWORK;
46cdbd49
BR
3611 return log_unit_error_errno(unit, SYNTHETIC_ERRNO(EOPNOTSUPP),
3612 "NetworkNamespacePath= is not supported, refusing.");
52ad194e
MB
3613 } else
3614 log_unit_warning(unit, "PrivateNetwork=yes is configured, but the kernel does not support network namespaces, ignoring.");
5eef597e 3615 }
60f067b4 3616
e3bff60a 3617 needs_mount_namespace = exec_needs_mount_namespace(context, params, runtime);
e3bff60a 3618 if (needs_mount_namespace) {
f2dec872
BR
3619 _cleanup_free_ char *error_path = NULL;
3620
3621 r = apply_mount_namespace(unit, command, context, params, runtime, &error_path);
8a584da2 3622 if (r < 0) {
e735f4d4 3623 *exit_status = EXIT_NAMESPACE;
f2dec872
BR
3624 return log_unit_error_errno(unit, r, "Failed to set up mount namespacing%s%s: %m",
3625 error_path ? ": " : "", strempty(error_path));
5eef597e
MP
3626 }
3627 }
60f067b4 3628
d0648cfe
MB
3629 if (needs_sandboxing) {
3630 r = apply_protect_hostname(unit, context, exit_status);
3631 if (r < 0)
3632 return r;
bb4f798a
MB
3633 }
3634
46cdbd49
BR
3635 /* Drop groups as early as possible.
3636 * This needs to be done after PrivateDevices=y setup as device nodes should be owned by the host's root.
3637 * For non-root in a userns, devices will be owned by the user/group before the group change, and nobody. */
f5e65279 3638 if (needs_setuid) {
46cdbd49
BR
3639 _cleanup_free_ gid_t *gids_to_enforce = NULL;
3640 int ngids_to_enforce = 0;
3641
3642 ngids_to_enforce = merge_gid_lists(supplementary_gids,
3643 ngids,
3644 gids_after_pam,
3645 ngids_after_pam,
3646 &gids_to_enforce);
3647 if (ngids_to_enforce < 0) {
3648 *exit_status = EXIT_MEMORY;
3649 return log_unit_error_errno(unit,
3650 ngids_to_enforce,
3651 "Failed to merge group lists. Group membership might be incorrect: %m");
3652 }
3653
3654 r = enforce_groups(gid, gids_to_enforce, ngids_to_enforce);
8a584da2
MP
3655 if (r < 0) {
3656 *exit_status = EXIT_GROUP;
f5e65279 3657 return log_unit_error_errno(unit, r, "Changing group credentials failed: %m");
5eef597e
MP
3658 }
3659 }
663996b3 3660
46cdbd49
BR
3661 /* If the user namespace was not set up above, try to do it now.
3662 * It's preferred to set up the user namespace later (after all other namespaces) so as not to be
3663 * restricted by rules pertaining to combining user namspaces with other namespaces (e.g. in the
3664 * case of mount namespaces being less privileged when the mount point list is copied from a
3665 * different user namespace). */
f47781d8 3666
46cdbd49
BR
3667 if (needs_sandboxing && context->private_users && !userns_set_up) {
3668 r = setup_private_users(saved_uid, saved_gid, uid, gid);
3669 if (r < 0) {
3670 *exit_status = EXIT_USER;
3671 return log_unit_error_errno(unit, r, "Failed to set up user namespacing: %m");
8a584da2
MP
3672 }
3673 }
3674
f5e65279 3675 /* We repeat the fd closing here, to make sure that nothing is leaked from the PAM modules. Note that we are
6e866b33
MB
3676 * more aggressive this time since socket_fd and the netns fds we don't need anymore. We do keep the exec_fd
3677 * however if we have it as we want to keep it open until the final execve(). */
3678
3679 if (params->exec_fd >= 0) {
3680 exec_fd = params->exec_fd;
3681
3682 if (exec_fd < 3 + (int) n_fds) {
3683 int moved_fd;
3684
3685 /* Let's move the exec fd far up, so that it's outside of the fd range we want to pass to the
3686 * process we are about to execute. */
3687
3688 moved_fd = fcntl(exec_fd, F_DUPFD_CLOEXEC, 3 + (int) n_fds);
3689 if (moved_fd < 0) {
3690 *exit_status = EXIT_FDS;
3691 return log_unit_error_errno(unit, errno, "Couldn't move exec fd up: %m");
3692 }
3693
3694 safe_close(exec_fd);
3695 exec_fd = moved_fd;
3696 } else {
3697 /* This fd should be FD_CLOEXEC already, but let's make sure. */
3698 r = fd_cloexec(exec_fd, true);
3699 if (r < 0) {
3700 *exit_status = EXIT_FDS;
3701 return log_unit_error_errno(unit, r, "Failed to make exec fd FD_CLOEXEC: %m");
3702 }
3703 }
3704
3705 fds_with_exec_fd = newa(int, n_fds + 1);
3706 memcpy_safe(fds_with_exec_fd, fds, n_fds * sizeof(int));
3707 fds_with_exec_fd[n_fds] = exec_fd;
3708 n_fds_with_exec_fd = n_fds + 1;
3709 } else {
3710 fds_with_exec_fd = fds;
3711 n_fds_with_exec_fd = n_fds;
3712 }
3713
3714 r = close_all_fds(fds_with_exec_fd, n_fds_with_exec_fd);
e735f4d4
MP
3715 if (r >= 0)
3716 r = shift_fds(fds, n_fds);
3717 if (r >= 0)
6e866b33 3718 r = flags_fds(fds, n_socket_fds, n_storage_fds, context->non_blocking);
e735f4d4
MP
3719 if (r < 0) {
3720 *exit_status = EXIT_FDS;
f5e65279 3721 return log_unit_error_errno(unit, r, "Failed to adjust passed file descriptors: %m");
5eef597e
MP
3722 }
3723
6e866b33
MB
3724 /* At this point, the fds we want to pass to the program are all ready and set up, with O_CLOEXEC turned off
3725 * and at the right fd numbers. The are no other fds open, with one exception: the exec_fd if it is defined,
3726 * and it has O_CLOEXEC set, after all we want it to be closed by the execve(), so that our parent knows we
3727 * came this far. */
3728
f5e65279 3729 secure_bits = context->secure_bits;
5eef597e 3730
f5e65279
MB
3731 if (needs_sandboxing) {
3732 uint64_t bset;
5a920b42 3733
7c20daf6
FS
3734 /* Set the RTPRIO resource limit to 0, but only if nothing else was explicitly
3735 * requested. (Note this is placed after the general resource limit initialization, see
3736 * above, in order to take precedence.) */
5a920b42
MP
3737 if (context->restrict_realtime && !context->rlimit[RLIMIT_RTPRIO]) {
3738 if (setrlimit(RLIMIT_RTPRIO, &RLIMIT_MAKE_CONST(0)) < 0) {
e735f4d4 3739 *exit_status = EXIT_LIMITS;
f5e65279 3740 return log_unit_error_errno(unit, errno, "Failed to adjust RLIMIT_RTPRIO resource limit: %m");
663996b3
MS
3741 }
3742 }
5eef597e 3743
52ad194e
MB
3744#if ENABLE_SMACK
3745 /* LSM Smack needs the capability CAP_MAC_ADMIN to change the current execution security context of the
3746 * process. This is the latest place before dropping capabilities. Other MAC context are set later. */
3747 if (use_smack) {
3748 r = setup_smack(context, command);
3749 if (r < 0) {
3750 *exit_status = EXIT_SMACK_PROCESS_LABEL;
3751 return log_unit_error_errno(unit, r, "Failed to set SMACK process label: %m");
3752 }
3753 }
3754#endif
3755
f5e65279
MB
3756 bset = context->capability_bounding_set;
3757 /* If the ambient caps hack is enabled (which means the kernel can't do them, and the user asked for
3758 * our magic fallback), then let's add some extra caps, so that the service can drop privs of its own,
3759 * instead of us doing that */
3760 if (needs_ambient_hack)
3761 bset |= (UINT64_C(1) << CAP_SETPCAP) |
3762 (UINT64_C(1) << CAP_SETUID) |
3763 (UINT64_C(1) << CAP_SETGID);
3764
3765 if (!cap_test_all(bset)) {
3766 r = capability_bounding_set_drop(bset, false);
e735f4d4
MP
3767 if (r < 0) {
3768 *exit_status = EXIT_CAPABILITIES;
f5e65279 3769 return log_unit_error_errno(unit, r, "Failed to drop capabilities: %m");
663996b3 3770 }
663996b3
MS
3771 }
3772
4c89c718
MP
3773 /* This is done before enforce_user, but ambient set
3774 * does not survive over setresuid() if keep_caps is not set. */
46cdbd49 3775 if (!needs_ambient_hack) {
4c89c718
MP
3776 r = capability_ambient_set_apply(context->capability_ambient_set, true);
3777 if (r < 0) {
3778 *exit_status = EXIT_CAPABILITIES;
f5e65279 3779 return log_unit_error_errno(unit, r, "Failed to apply ambient capabilities (before UID change): %m");
4c89c718 3780 }
4c89c718 3781 }
f5e65279 3782 }
4c89c718 3783
f2dec872
BR
3784 /* chroot to root directory first, before we lose the ability to chroot */
3785 r = apply_root_directory(context, params, needs_mount_namespace, exit_status);
3786 if (r < 0)
3787 return log_unit_error_errno(unit, r, "Chrooting to the requested root directory failed: %m");
3788
f5e65279 3789 if (needs_setuid) {
bb4f798a 3790 if (uid_is_valid(uid)) {
e735f4d4
MP
3791 r = enforce_user(context, uid);
3792 if (r < 0) {
3793 *exit_status = EXIT_USER;
f5e65279 3794 return log_unit_error_errno(unit, r, "Failed to change UID to " UID_FMT ": %m", uid);
663996b3 3795 }
f5e65279
MB
3796
3797 if (!needs_ambient_hack &&
3798 context->capability_ambient_set != 0) {
4c89c718
MP
3799
3800 /* Fix the ambient capabilities after user change. */
3801 r = capability_ambient_set_apply(context->capability_ambient_set, false);
3802 if (r < 0) {
3803 *exit_status = EXIT_CAPABILITIES;
f5e65279 3804 return log_unit_error_errno(unit, r, "Failed to apply ambient capabilities (after UID change): %m");
4c89c718
MP
3805 }
3806
3807 /* If we were asked to change user and ambient capabilities
3808 * were requested, we had to add keep-caps to the securebits
3809 * so that we would maintain the inherited capability set
3810 * through the setresuid(). Make sure that the bit is added
3811 * also to the context secure_bits so that we don't try to
3812 * drop the bit away next. */
3813
aa27b158 3814 secure_bits |= 1<<SECURE_KEEP_CAPS;
4c89c718 3815 }
663996b3 3816 }
f5e65279 3817 }
663996b3 3818
6e866b33
MB
3819 /* Apply working directory here, because the working directory might be on NFS and only the user running
3820 * this service might have the correct privilege to change to the working directory */
f2dec872 3821 r = apply_working_directory(context, params, home, exit_status);
6e866b33
MB
3822 if (r < 0)
3823 return log_unit_error_errno(unit, r, "Changing to the requested working directory failed: %m");
3824
f5e65279 3825 if (needs_sandboxing) {
52ad194e 3826 /* Apply other MAC contexts late, but before seccomp syscall filtering, as those should really be last to
8a584da2
MP
3827 * influence our own codepaths as little as possible. Moreover, applying MAC contexts usually requires
3828 * syscalls that are subject to seccomp filtering, hence should probably be applied before the syscalls
3829 * are restricted. */
3830
f5e65279
MB
3831#if HAVE_SELINUX
3832 if (use_selinux) {
8a584da2
MP
3833 char *exec_context = mac_selinux_context_net ?: context->selinux_context;
3834
3835 if (exec_context) {
3836 r = setexeccon(exec_context);
3837 if (r < 0) {
3838 *exit_status = EXIT_SELINUX_CONTEXT;
f5e65279 3839 return log_unit_error_errno(unit, r, "Failed to change SELinux context to %s: %m", exec_context);
8a584da2
MP
3840 }
3841 }
3842 }
3843#endif
3844
f5e65279
MB
3845#if HAVE_APPARMOR
3846 if (use_apparmor && context->apparmor_profile) {
8a584da2
MP
3847 r = aa_change_onexec(context->apparmor_profile);
3848 if (r < 0 && !context->apparmor_profile_ignore) {
3849 *exit_status = EXIT_APPARMOR_PROFILE;
f5e65279 3850 return log_unit_error_errno(unit, errno, "Failed to prepare AppArmor profile change to %s: %m", context->apparmor_profile);
8a584da2
MP
3851 }
3852 }
3853#endif
3854
f5e65279
MB
3855 /* PR_GET_SECUREBITS is not privileged, while PR_SET_SECUREBITS is. So to suppress potential EPERMs
3856 * we'll try not to call PR_SET_SECUREBITS unless necessary. */
4c89c718
MP
3857 if (prctl(PR_GET_SECUREBITS) != secure_bits)
3858 if (prctl(PR_SET_SECUREBITS, secure_bits) < 0) {
e735f4d4 3859 *exit_status = EXIT_SECUREBITS;
f5e65279 3860 return log_unit_error_errno(unit, errno, "Failed to set process secure bits: %m");
5eef597e 3861 }
663996b3 3862
8a584da2 3863 if (context_has_no_new_privileges(context))
5eef597e 3864 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
e735f4d4 3865 *exit_status = EXIT_NO_NEW_PRIVILEGES;
f5e65279 3866 return log_unit_error_errno(unit, errno, "Failed to disable new privileges: %m");
663996b3
MS
3867 }
3868
f5e65279 3869#if HAVE_SECCOMP
2897b343
MP
3870 r = apply_address_families(unit, context);
3871 if (r < 0) {
3872 *exit_status = EXIT_ADDRESS_FAMILIES;
f5e65279 3873 return log_unit_error_errno(unit, r, "Failed to restrict address families: %m");
663996b3
MS
3874 }
3875
2897b343
MP
3876 r = apply_memory_deny_write_execute(unit, context);
3877 if (r < 0) {
3878 *exit_status = EXIT_SECCOMP;
f5e65279 3879 return log_unit_error_errno(unit, r, "Failed to disable writing to executable memory: %m");
5a920b42
MP
3880 }
3881
2897b343
MP
3882 r = apply_restrict_realtime(unit, context);
3883 if (r < 0) {
3884 *exit_status = EXIT_SECCOMP;
f5e65279 3885 return log_unit_error_errno(unit, r, "Failed to apply realtime restrictions: %m");
5a920b42
MP
3886 }
3887
bb4f798a
MB
3888 r = apply_restrict_suid_sgid(unit, context);
3889 if (r < 0) {
3890 *exit_status = EXIT_SECCOMP;
3891 return log_unit_error_errno(unit, r, "Failed to apply SUID/SGID restrictions: %m");
3892 }
3893
2897b343
MP
3894 r = apply_restrict_namespaces(unit, context);
3895 if (r < 0) {
3896 *exit_status = EXIT_SECCOMP;
f5e65279 3897 return log_unit_error_errno(unit, r, "Failed to apply namespace restrictions: %m");
663996b3
MS
3898 }
3899
2897b343
MP
3900 r = apply_protect_sysctl(unit, context);
3901 if (r < 0) {
3902 *exit_status = EXIT_SECCOMP;
f5e65279 3903 return log_unit_error_errno(unit, r, "Failed to apply sysctl restrictions: %m");
8a584da2 3904 }
663996b3 3905
2897b343
MP
3906 r = apply_protect_kernel_modules(unit, context);
3907 if (r < 0) {
3908 *exit_status = EXIT_SECCOMP;
f5e65279 3909 return log_unit_error_errno(unit, r, "Failed to apply module loading restrictions: %m");
2897b343
MP
3910 }
3911
e1f67bc7
MB
3912 r = apply_protect_kernel_logs(unit, context);
3913 if (r < 0) {
3914 *exit_status = EXIT_SECCOMP;
3915 return log_unit_error_errno(unit, r, "Failed to apply kernel log restrictions: %m");
3916 }
3917
46cdbd49
BR
3918 r = apply_protect_clock(unit, context);
3919 if (r < 0) {
3920 *exit_status = EXIT_SECCOMP;
3921 return log_unit_error_errno(unit, r, "Failed to apply clock restrictions: %m");
3922 }
3923
2897b343
MP
3924 r = apply_private_devices(unit, context);
3925 if (r < 0) {
3926 *exit_status = EXIT_SECCOMP;
f5e65279 3927 return log_unit_error_errno(unit, r, "Failed to set up private devices: %m");
2897b343
MP
3928 }
3929
3930 r = apply_syscall_archs(unit, context);
3931 if (r < 0) {
3932 *exit_status = EXIT_SECCOMP;
f5e65279
MB
3933 return log_unit_error_errno(unit, r, "Failed to apply syscall architecture restrictions: %m");
3934 }
3935
3936 r = apply_lock_personality(unit, context);
3937 if (r < 0) {
3938 *exit_status = EXIT_SECCOMP;
3939 return log_unit_error_errno(unit, r, "Failed to lock personalities: %m");
5eef597e 3940 }
663996b3 3941
8a584da2
MP
3942 /* This really should remain the last step before the execve(), to make sure our own code is unaffected
3943 * by the filter as little as possible. */
f5e65279 3944 r = apply_syscall_filter(unit, context, needs_ambient_hack);
2897b343
MP
3945 if (r < 0) {
3946 *exit_status = EXIT_SECCOMP;
f5e65279 3947 return log_unit_error_errno(unit, r, "Failed to apply system call filters: %m");
5eef597e
MP
3948 }
3949#endif
3950 }
663996b3 3951
f5e65279
MB
3952 if (!strv_isempty(context->unset_environment)) {
3953 char **ee = NULL;
3954
3955 ee = strv_env_delete(accum_env, 1, context->unset_environment);
3956 if (!ee) {
3957 *exit_status = EXIT_MEMORY;
3958 return log_oom();
3959 }
3960
b012e921 3961 strv_free_and_replace(accum_env, ee);
f5e65279
MB
3962 }
3963
bb4f798a
MB
3964 if (!FLAGS_SET(command->flags, EXEC_COMMAND_NO_ENV_EXPAND)) {
3965 replaced_argv = replace_env_argv(command->argv, accum_env);
3966 if (!replaced_argv) {
3967 *exit_status = EXIT_MEMORY;
3968 return log_oom();
3969 }
3970 final_argv = replaced_argv;
3971 } else
3972 final_argv = command->argv;
663996b3 3973
1d42b86d 3974 if (DEBUG_LOGGING) {
5eef597e 3975 _cleanup_free_ char *line;
663996b3 3976
5eef597e 3977 line = exec_command_line(final_argv);
b012e921 3978 if (line)
e3bff60a 3979 log_struct(LOG_DEBUG,
e3bff60a
MP
3980 "EXECUTABLE=%s", command->path,
3981 LOG_UNIT_MESSAGE(unit, "Executing: %s", line),
81c58355 3982 LOG_UNIT_ID(unit),
b012e921 3983 LOG_UNIT_INVOCATION_ID(unit));
5eef597e 3984 }
e3bff60a 3985
6e866b33
MB
3986 if (exec_fd >= 0) {
3987 uint8_t hot = 1;
3988
3989 /* We have finished with all our initializations. Let's now let the manager know that. From this point
3990 * on, if the manager sees POLLHUP on the exec_fd, then execve() was successful. */
3991
3992 if (write(exec_fd, &hot, sizeof(hot)) < 0) {
3993 *exit_status = EXIT_EXEC;
3994 return log_unit_error_errno(unit, errno, "Failed to enable exec_fd: %m");
3995 }
3996 }
3997
5a920b42 3998 execve(command->path, final_argv, accum_env);
6e866b33
MB
3999 r = -errno;
4000
4001 if (exec_fd >= 0) {
4002 uint8_t hot = 0;
f5e65279 4003
6e866b33
MB
4004 /* The execve() failed. This means the exec_fd is still open. Which means we need to tell the manager
4005 * that POLLHUP on it no longer means execve() succeeded. */
4006
4007 if (write(exec_fd, &hot, sizeof(hot)) < 0) {
4008 *exit_status = EXIT_EXEC;
4009 return log_unit_error_errno(unit, errno, "Failed to disable exec_fd: %m");
4010 }
4011 }
4012
4013 if (r == -ENOENT && (command->flags & EXEC_COMMAND_IGNORE_FAILURE)) {
4014 log_struct_errno(LOG_INFO, r,
f5e65279
MB
4015 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
4016 LOG_UNIT_ID(unit),
4017 LOG_UNIT_INVOCATION_ID(unit),
4018 LOG_UNIT_MESSAGE(unit, "Executable %s missing, skipping: %m",
4019 command->path),
b012e921 4020 "EXECUTABLE=%s", command->path);
f5e65279
MB
4021 return 0;
4022 }
4023
e735f4d4 4024 *exit_status = EXIT_EXEC;
6e866b33 4025 return log_unit_error_errno(unit, r, "Failed to execute command: %m");
5eef597e 4026}
663996b3 4027
98393f85 4028static int exec_context_load_environment(const Unit *unit, const ExecContext *c, char ***l);
f2dec872 4029static int exec_context_named_iofds(const ExecContext *c, const ExecParameters *p, int named_iofds[static 3]);
98393f85 4030
e3bff60a
MP
4031int exec_spawn(Unit *unit,
4032 ExecCommand *command,
5eef597e
MP
4033 const ExecContext *context,
4034 const ExecParameters *params,
4035 ExecRuntime *runtime,
8a584da2 4036 DynamicCreds *dcreds,
5eef597e 4037 pid_t *ret) {
663996b3 4038
6e866b33
MB
4039 int socket_fd, r, named_iofds[3] = { -1, -1, -1 }, *fds = NULL;
4040 _cleanup_free_ char *subcgroup_path = NULL;
5eef597e 4041 _cleanup_strv_free_ char **files_env = NULL;
b012e921 4042 size_t n_storage_fds = 0, n_socket_fds = 0;
e735f4d4 4043 _cleanup_free_ char *line = NULL;
5eef597e 4044 pid_t pid;
663996b3 4045
e3bff60a 4046 assert(unit);
5eef597e
MP
4047 assert(command);
4048 assert(context);
4049 assert(ret);
4050 assert(params);
6e866b33 4051 assert(params->fds || (params->n_socket_fds + params->n_storage_fds <= 0));
663996b3 4052
5eef597e
MP
4053 if (context->std_input == EXEC_INPUT_SOCKET ||
4054 context->std_output == EXEC_OUTPUT_SOCKET ||
4055 context->std_error == EXEC_OUTPUT_SOCKET) {
4056
81c58355 4057 if (params->n_socket_fds > 1) {
e3bff60a 4058 log_unit_error(unit, "Got more than one socket.");
5eef597e 4059 return -EINVAL;
e735f4d4 4060 }
5eef597e 4061
81c58355
MB
4062 if (params->n_socket_fds == 0) {
4063 log_unit_error(unit, "Got no socket.");
4064 return -EINVAL;
4065 }
4066
5eef597e
MP
4067 socket_fd = params->fds[0];
4068 } else {
4069 socket_fd = -1;
4070 fds = params->fds;
81c58355 4071 n_socket_fds = params->n_socket_fds;
6e866b33 4072 n_storage_fds = params->n_storage_fds;
5eef597e
MP
4073 }
4074
98393f85 4075 r = exec_context_named_iofds(context, params, named_iofds);
8a584da2
MP
4076 if (r < 0)
4077 return log_unit_error_errno(unit, r, "Failed to load a named file descriptor: %m");
4078
e3bff60a 4079 r = exec_context_load_environment(unit, context, &files_env);
e735f4d4 4080 if (r < 0)
e3bff60a 4081 return log_unit_error_errno(unit, r, "Failed to load environment files: %m");
5eef597e 4082
6e866b33 4083 line = exec_command_line(command->argv);
5eef597e
MP
4084 if (!line)
4085 return log_oom();
4086
e3bff60a 4087 log_struct(LOG_DEBUG,
e3bff60a
MP
4088 LOG_UNIT_MESSAGE(unit, "About to execute: %s", line),
4089 "EXECUTABLE=%s", command->path,
81c58355 4090 LOG_UNIT_ID(unit),
b012e921 4091 LOG_UNIT_INVOCATION_ID(unit));
f5e65279 4092
6e866b33
MB
4093 if (params->cgroup_path) {
4094 r = exec_parameters_get_cgroup_path(params, &subcgroup_path);
4095 if (r < 0)
4096 return log_unit_error_errno(unit, r, "Failed to acquire subcgroup path: %m");
4097 if (r > 0) { /* We are using a child cgroup */
4098 r = cg_create(SYSTEMD_CGROUP_CONTROLLER, subcgroup_path);
4099 if (r < 0)
4100 return log_unit_error_errno(unit, r, "Failed to create control group '%s': %m", subcgroup_path);
4101 }
4102 }
4103
5eef597e
MP
4104 pid = fork();
4105 if (pid < 0)
4c89c718 4106 return log_unit_error_errno(unit, errno, "Failed to fork: %m");
663996b3 4107
5eef597e 4108 if (pid == 0) {
f5e65279 4109 int exit_status = EXIT_SUCCESS;
e735f4d4 4110
e3bff60a
MP
4111 r = exec_child(unit,
4112 command,
e735f4d4
MP
4113 context,
4114 params,
4115 runtime,
8a584da2 4116 dcreds,
e735f4d4 4117 socket_fd,
8a584da2 4118 named_iofds,
81c58355 4119 fds,
81c58355 4120 n_socket_fds,
6e866b33 4121 n_storage_fds,
e735f4d4 4122 files_env,
8a584da2 4123 unit->manager->user_lookup_fds[1],
f5e65279
MB
4124 &exit_status);
4125
f2dec872
BR
4126 if (r < 0) {
4127 const char *status =
4128 exit_status_to_string(exit_status,
4129 EXIT_STATUS_LIBC | EXIT_STATUS_SYSTEMD);
4130
f5e65279
MB
4131 log_struct_errno(LOG_ERR, r,
4132 "MESSAGE_ID=" SD_MESSAGE_SPAWN_FAILED_STR,
4133 LOG_UNIT_ID(unit),
4134 LOG_UNIT_INVOCATION_ID(unit),
4135 LOG_UNIT_MESSAGE(unit, "Failed at step %s spawning %s: %m",
f2dec872 4136 status, command->path),
b012e921 4137 "EXECUTABLE=%s", command->path);
f2dec872 4138 }
663996b3 4139
e735f4d4 4140 _exit(exit_status);
663996b3
MS
4141 }
4142
e3bff60a 4143 log_unit_debug(unit, "Forked %s as "PID_FMT, command->path, pid);
663996b3 4144
6e866b33
MB
4145 /* We add the new process to the cgroup both in the child (so that we can be sure that no user code is ever
4146 * executed outside of the cgroup) and in the parent (so that we can be sure that when we kill the cgroup the
4147 * process will be killed too). */
4148 if (subcgroup_path)
4149 (void) cg_attach(SYSTEMD_CGROUP_CONTROLLER, subcgroup_path, pid);
663996b3
MS
4150
4151 exec_status_start(&command->exec_status, pid);
4152
4153 *ret = pid;
4154 return 0;
4155}
4156
4157void exec_context_init(ExecContext *c) {
f5e65279
MB
4158 ExecDirectoryType i;
4159
663996b3
MS
4160 assert(c);
4161
4162 c->umask = 0022;
4163 c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
4164 c->cpu_sched_policy = SCHED_OTHER;
4165 c->syslog_priority = LOG_DAEMON|LOG_INFO;
4166 c->syslog_level_prefix = true;
663996b3 4167 c->ignore_sigpipe = true;
5eef597e 4168 c->timer_slack_nsec = NSEC_INFINITY;
e3bff60a 4169 c->personality = PERSONALITY_INVALID;
f5e65279
MB
4170 for (i = 0; i < _EXEC_DIRECTORY_TYPE_MAX; i++)
4171 c->directories[i].mode = 0755;
812752cc 4172 c->timeout_clean_usec = USEC_INFINITY;
4c89c718 4173 c->capability_bounding_set = CAP_ALL;
b012e921
MB
4174 assert_cc(NAMESPACE_FLAGS_INITIAL != NAMESPACE_FLAGS_ALL);
4175 c->restrict_namespaces = NAMESPACE_FLAGS_INITIAL;
52ad194e 4176 c->log_level_max = -1;
f2dec872 4177 numa_policy_reset(&c->numa_policy);
663996b3
MS
4178}
4179
60f067b4 4180void exec_context_done(ExecContext *c) {
f5e65279 4181 ExecDirectoryType i;
52ad194e 4182 size_t l;
663996b3
MS
4183
4184 assert(c);
4185
6300502b
MP
4186 c->environment = strv_free(c->environment);
4187 c->environment_files = strv_free(c->environment_files);
db2df898 4188 c->pass_environment = strv_free(c->pass_environment);
f5e65279 4189 c->unset_environment = strv_free(c->unset_environment);
663996b3 4190
b012e921 4191 rlimit_free_all(c->rlimit);
663996b3 4192
52ad194e 4193 for (l = 0; l < 3; l++) {
8a584da2 4194 c->stdio_fdname[l] = mfree(c->stdio_fdname[l]);
52ad194e
MB
4195 c->stdio_file[l] = mfree(c->stdio_file[l]);
4196 }
8a584da2 4197
6300502b
MP
4198 c->working_directory = mfree(c->working_directory);
4199 c->root_directory = mfree(c->root_directory);
2897b343 4200 c->root_image = mfree(c->root_image);
6300502b
MP
4201 c->tty_path = mfree(c->tty_path);
4202 c->syslog_identifier = mfree(c->syslog_identifier);
4203 c->user = mfree(c->user);
4204 c->group = mfree(c->group);
663996b3 4205
6300502b 4206 c->supplementary_groups = strv_free(c->supplementary_groups);
663996b3 4207
6300502b 4208 c->pam_name = mfree(c->pam_name);
663996b3 4209
5a920b42
MP
4210 c->read_only_paths = strv_free(c->read_only_paths);
4211 c->read_write_paths = strv_free(c->read_write_paths);
4212 c->inaccessible_paths = strv_free(c->inaccessible_paths);
663996b3 4213
2897b343 4214 bind_mount_free_many(c->bind_mounts, c->n_bind_mounts);
98393f85
MB
4215 c->bind_mounts = NULL;
4216 c->n_bind_mounts = 0;
4217 temporary_filesystem_free_many(c->temporary_filesystems, c->n_temporary_filesystems);
4218 c->temporary_filesystems = NULL;
4219 c->n_temporary_filesystems = 0;
2897b343 4220
f2dec872
BR
4221 cpu_set_reset(&c->cpu_set);
4222 numa_policy_reset(&c->numa_policy);
663996b3 4223
6300502b
MP
4224 c->utmp_id = mfree(c->utmp_id);
4225 c->selinux_context = mfree(c->selinux_context);
4226 c->apparmor_profile = mfree(c->apparmor_profile);
f5e65279 4227 c->smack_process_label = mfree(c->smack_process_label);
60f067b4 4228
52ad194e 4229 c->syscall_filter = hashmap_free(c->syscall_filter);
6300502b
MP
4230 c->syscall_archs = set_free(c->syscall_archs);
4231 c->address_families = set_free(c->address_families);
60f067b4 4232
f5e65279
MB
4233 for (i = 0; i < _EXEC_DIRECTORY_TYPE_MAX; i++)
4234 c->directories[i].paths = strv_free(c->directories[i].paths);
52ad194e
MB
4235
4236 c->log_level_max = -1;
4237
4238 exec_context_free_log_extra_fields(c);
4239
e1f67bc7
MB
4240 c->log_ratelimit_interval_usec = 0;
4241 c->log_ratelimit_burst = 0;
6e866b33 4242
52ad194e
MB
4243 c->stdin_data = mfree(c->stdin_data);
4244 c->stdin_data_size = 0;
bb4f798a
MB
4245
4246 c->network_namespace_path = mfree(c->network_namespace_path);
46cdbd49
BR
4247
4248 c->log_namespace = mfree(c->log_namespace);
60f067b4
JS
4249}
4250
98393f85 4251int exec_context_destroy_runtime_directory(const ExecContext *c, const char *runtime_prefix) {
60f067b4
JS
4252 char **i;
4253
4254 assert(c);
4255
4256 if (!runtime_prefix)
4257 return 0;
4258
f5e65279 4259 STRV_FOREACH(i, c->directories[EXEC_DIRECTORY_RUNTIME].paths) {
60f067b4
JS
4260 _cleanup_free_ char *p;
4261
812752cc
MB
4262 if (exec_directory_is_private(c, EXEC_DIRECTORY_RUNTIME))
4263 p = path_join(runtime_prefix, "private", *i);
4264 else
4265 p = path_join(runtime_prefix, *i);
60f067b4
JS
4266 if (!p)
4267 return -ENOMEM;
4268
bb4f798a
MB
4269 /* We execute this synchronously, since we need to be sure this is gone when we start the
4270 * service next. */
e3bff60a 4271 (void) rm_rf(p, REMOVE_ROOT);
60f067b4
JS
4272 }
4273
4274 return 0;
663996b3
MS
4275}
4276
98393f85 4277static void exec_command_done(ExecCommand *c) {
663996b3
MS
4278 assert(c);
4279
6300502b 4280 c->path = mfree(c->path);
6300502b 4281 c->argv = strv_free(c->argv);
663996b3
MS
4282}
4283
b012e921
MB
4284void exec_command_done_array(ExecCommand *c, size_t n) {
4285 size_t i;
663996b3
MS
4286
4287 for (i = 0; i < n; i++)
4288 exec_command_done(c+i);
4289}
4290
e735f4d4 4291ExecCommand* exec_command_free_list(ExecCommand *c) {
663996b3
MS
4292 ExecCommand *i;
4293
4294 while ((i = c)) {
60f067b4 4295 LIST_REMOVE(command, c, i);
663996b3
MS
4296 exec_command_done(i);
4297 free(i);
4298 }
e735f4d4
MP
4299
4300 return NULL;
663996b3
MS
4301}
4302
b012e921
MB
4303void exec_command_free_array(ExecCommand **c, size_t n) {
4304 size_t i;
663996b3 4305
e735f4d4
MP
4306 for (i = 0; i < n; i++)
4307 c[i] = exec_command_free_list(c[i]);
4308}
4309
6e866b33
MB
4310void exec_command_reset_status_array(ExecCommand *c, size_t n) {
4311 size_t i;
4312
4313 for (i = 0; i < n; i++)
4314 exec_status_reset(&c[i].exec_status);
4315}
4316
4317void exec_command_reset_status_list_array(ExecCommand **c, size_t n) {
4318 size_t i;
4319
4320 for (i = 0; i < n; i++) {
4321 ExecCommand *z;
4322
4323 LIST_FOREACH(command, z, c[i])
4324 exec_status_reset(&z->exec_status);
4325 }
4326}
4327
e735f4d4 4328typedef struct InvalidEnvInfo {
98393f85 4329 const Unit *unit;
e735f4d4
MP
4330 const char *path;
4331} InvalidEnvInfo;
4332
4333static void invalid_env(const char *p, void *userdata) {
4334 InvalidEnvInfo *info = userdata;
4335
e3bff60a 4336 log_unit_error(info->unit, "Ignoring invalid environment assignment '%s': %s", p, info->path);
663996b3
MS
4337}
4338
8a584da2
MP
4339const char* exec_context_fdname(const ExecContext *c, int fd_index) {
4340 assert(c);
4341
4342 switch (fd_index) {
52ad194e 4343
8a584da2
MP
4344 case STDIN_FILENO:
4345 if (c->std_input != EXEC_INPUT_NAMED_FD)
4346 return NULL;
52ad194e 4347
8a584da2 4348 return c->stdio_fdname[STDIN_FILENO] ?: "stdin";
52ad194e 4349
8a584da2
MP
4350 case STDOUT_FILENO:
4351 if (c->std_output != EXEC_OUTPUT_NAMED_FD)
4352 return NULL;
52ad194e 4353
8a584da2 4354 return c->stdio_fdname[STDOUT_FILENO] ?: "stdout";
52ad194e 4355
8a584da2
MP
4356 case STDERR_FILENO:
4357 if (c->std_error != EXEC_OUTPUT_NAMED_FD)
4358 return NULL;
52ad194e 4359
8a584da2 4360 return c->stdio_fdname[STDERR_FILENO] ?: "stderr";
52ad194e 4361
8a584da2
MP
4362 default:
4363 return NULL;
4364 }
4365}
4366
f2dec872
BR
4367static int exec_context_named_iofds(
4368 const ExecContext *c,
4369 const ExecParameters *p,
4370 int named_iofds[static 3]) {
4371
b012e921 4372 size_t i, targets;
2897b343 4373 const char* stdio_fdname[3];
b012e921 4374 size_t n_fds;
8a584da2
MP
4375
4376 assert(c);
4377 assert(p);
f2dec872 4378 assert(named_iofds);
8a584da2
MP
4379
4380 targets = (c->std_input == EXEC_INPUT_NAMED_FD) +
4381 (c->std_output == EXEC_OUTPUT_NAMED_FD) +
4382 (c->std_error == EXEC_OUTPUT_NAMED_FD);
4383
4384 for (i = 0; i < 3; i++)
4385 stdio_fdname[i] = exec_context_fdname(c, i);
4386
81c58355
MB
4387 n_fds = p->n_storage_fds + p->n_socket_fds;
4388
4389 for (i = 0; i < n_fds && targets > 0; i++)
2897b343
MP
4390 if (named_iofds[STDIN_FILENO] < 0 &&
4391 c->std_input == EXEC_INPUT_NAMED_FD &&
4392 stdio_fdname[STDIN_FILENO] &&
4393 streq(p->fd_names[i], stdio_fdname[STDIN_FILENO])) {
4394
8a584da2
MP
4395 named_iofds[STDIN_FILENO] = p->fds[i];
4396 targets--;
2897b343
MP
4397
4398 } else if (named_iofds[STDOUT_FILENO] < 0 &&
4399 c->std_output == EXEC_OUTPUT_NAMED_FD &&
4400 stdio_fdname[STDOUT_FILENO] &&
4401 streq(p->fd_names[i], stdio_fdname[STDOUT_FILENO])) {
4402
8a584da2
MP
4403 named_iofds[STDOUT_FILENO] = p->fds[i];
4404 targets--;
2897b343
MP
4405
4406 } else if (named_iofds[STDERR_FILENO] < 0 &&
4407 c->std_error == EXEC_OUTPUT_NAMED_FD &&
4408 stdio_fdname[STDERR_FILENO] &&
4409 streq(p->fd_names[i], stdio_fdname[STDERR_FILENO])) {
4410
8a584da2
MP
4411 named_iofds[STDERR_FILENO] = p->fds[i];
4412 targets--;
4413 }
4414
2897b343 4415 return targets == 0 ? 0 : -ENOENT;
8a584da2
MP
4416}
4417
98393f85 4418static int exec_context_load_environment(const Unit *unit, const ExecContext *c, char ***l) {
663996b3
MS
4419 char **i, **r = NULL;
4420
4421 assert(c);
4422 assert(l);
4423
4424 STRV_FOREACH(i, c->environment_files) {
4425 char *fn;
4426 int k;
81c58355 4427 unsigned n;
663996b3
MS
4428 bool ignore = false;
4429 char **p;
4430 _cleanup_globfree_ glob_t pglob = {};
663996b3
MS
4431
4432 fn = *i;
4433
4434 if (fn[0] == '-') {
4435 ignore = true;
aa27b158 4436 fn++;
663996b3
MS
4437 }
4438
4439 if (!path_is_absolute(fn)) {
4440 if (ignore)
4441 continue;
4442
4443 strv_free(r);
4444 return -EINVAL;
4445 }
4446
4447 /* Filename supports globbing, take all matching files */
81c58355
MB
4448 k = safe_glob(fn, 0, &pglob);
4449 if (k < 0) {
663996b3
MS
4450 if (ignore)
4451 continue;
4452
4453 strv_free(r);
81c58355 4454 return k;
663996b3 4455 }
663996b3 4456
81c58355
MB
4457 /* When we don't match anything, -ENOENT should be returned */
4458 assert(pglob.gl_pathc > 0);
4459
4460 for (n = 0; n < pglob.gl_pathc; n++) {
6e866b33 4461 k = load_env_file(NULL, pglob.gl_pathv[n], &p);
663996b3
MS
4462 if (k < 0) {
4463 if (ignore)
4464 continue;
4465
4466 strv_free(r);
4467 return k;
14228c0d 4468 }
663996b3 4469 /* Log invalid environment variables with filename */
e735f4d4
MP
4470 if (p) {
4471 InvalidEnvInfo info = {
e3bff60a 4472 .unit = unit,
e735f4d4
MP
4473 .path = pglob.gl_pathv[n]
4474 };
4475
4476 p = strv_env_clean_with_callback(p, invalid_env, &info);
4477 }
663996b3 4478
52ad194e 4479 if (!r)
663996b3
MS
4480 r = p;
4481 else {
4482 char **m;
4483
4484 m = strv_env_merge(2, r, p);
4485 strv_free(r);
4486 strv_free(p);
4487 if (!m)
4488 return -ENOMEM;
4489
4490 r = m;
4491 }
4492 }
4493 }
4494
4495 *l = r;
4496
4497 return 0;
4498}
4499
4500static bool tty_may_match_dev_console(const char *tty) {
98393f85 4501 _cleanup_free_ char *resolved = NULL;
663996b3 4502
4c89c718
MP
4503 if (!tty)
4504 return true;
4505
f5e65279 4506 tty = skip_dev_prefix(tty);
663996b3
MS
4507
4508 /* trivial identity? */
4509 if (streq(tty, "console"))
4510 return true;
4511
98393f85
MB
4512 if (resolve_dev_console(&resolved) < 0)
4513 return true; /* if we could not resolve, assume it may */
663996b3
MS
4514
4515 /* "tty0" means the active VC, so it may be the same sometimes */
bb4f798a 4516 return path_equal(resolved, tty) || (streq(resolved, "tty0") && tty_is_vc(tty));
663996b3
MS
4517}
4518
bb4f798a
MB
4519static bool exec_context_may_touch_tty(const ExecContext *ec) {
4520 assert(ec);
4c89c718 4521
bb4f798a 4522 return ec->tty_reset ||
4c89c718
MP
4523 ec->tty_vhangup ||
4524 ec->tty_vt_disallocate ||
663996b3
MS
4525 is_terminal_input(ec->std_input) ||
4526 is_terminal_output(ec->std_output) ||
bb4f798a
MB
4527 is_terminal_output(ec->std_error);
4528}
4529
4530bool exec_context_may_touch_console(const ExecContext *ec) {
4531
4532 return exec_context_may_touch_tty(ec) &&
4c89c718 4533 tty_may_match_dev_console(exec_context_tty_path(ec));
663996b3
MS
4534}
4535
4536static void strv_fprintf(FILE *f, char **l) {
4537 char **g;
4538
4539 assert(f);
4540
4541 STRV_FOREACH(g, l)
4542 fprintf(f, " %s", *g);
4543}
4544
98393f85 4545void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) {
812752cc 4546 char **e, **d, buf_clean[FORMAT_TIMESPAN_MAX];
52ad194e 4547 ExecDirectoryType dt;
663996b3 4548 unsigned i;
2897b343 4549 int r;
663996b3
MS
4550
4551 assert(c);
4552 assert(f);
4553
14228c0d 4554 prefix = strempty(prefix);
663996b3
MS
4555
4556 fprintf(f,
4557 "%sUMask: %04o\n"
4558 "%sWorkingDirectory: %s\n"
4559 "%sRootDirectory: %s\n"
4560 "%sNonBlocking: %s\n"
4561 "%sPrivateTmp: %s\n"
60f067b4 4562 "%sPrivateDevices: %s\n"
8a584da2
MP
4563 "%sProtectKernelTunables: %s\n"
4564 "%sProtectKernelModules: %s\n"
e1f67bc7 4565 "%sProtectKernelLogs: %s\n"
46cdbd49 4566 "%sProtectClock: %s\n"
8a584da2
MP
4567 "%sProtectControlGroups: %s\n"
4568 "%sPrivateNetwork: %s\n"
4569 "%sPrivateUsers: %s\n"
60f067b4
JS
4570 "%sProtectHome: %s\n"
4571 "%sProtectSystem: %s\n"
2897b343 4572 "%sMountAPIVFS: %s\n"
5a920b42
MP
4573 "%sIgnoreSIGPIPE: %s\n"
4574 "%sMemoryDenyWriteExecute: %s\n"
f5e65279 4575 "%sRestrictRealtime: %s\n"
bb4f798a
MB
4576 "%sRestrictSUIDSGID: %s\n"
4577 "%sKeyringMode: %s\n"
4578 "%sProtectHostname: %s\n",
663996b3
MS
4579 prefix, c->umask,
4580 prefix, c->working_directory ? c->working_directory : "/",
4581 prefix, c->root_directory ? c->root_directory : "/",
4582 prefix, yes_no(c->non_blocking),
4583 prefix, yes_no(c->private_tmp),
60f067b4 4584 prefix, yes_no(c->private_devices),
8a584da2
MP
4585 prefix, yes_no(c->protect_kernel_tunables),
4586 prefix, yes_no(c->protect_kernel_modules),
e1f67bc7 4587 prefix, yes_no(c->protect_kernel_logs),
46cdbd49 4588 prefix, yes_no(c->protect_clock),
8a584da2
MP
4589 prefix, yes_no(c->protect_control_groups),
4590 prefix, yes_no(c->private_network),
4591 prefix, yes_no(c->private_users),
60f067b4
JS
4592 prefix, protect_home_to_string(c->protect_home),
4593 prefix, protect_system_to_string(c->protect_system),
2897b343 4594 prefix, yes_no(c->mount_apivfs),
5a920b42
MP
4595 prefix, yes_no(c->ignore_sigpipe),
4596 prefix, yes_no(c->memory_deny_write_execute),
f5e65279 4597 prefix, yes_no(c->restrict_realtime),
bb4f798a
MB
4598 prefix, yes_no(c->restrict_suid_sgid),
4599 prefix, exec_keyring_mode_to_string(c->keyring_mode),
4600 prefix, yes_no(c->protect_hostname));
663996b3 4601
2897b343
MP
4602 if (c->root_image)
4603 fprintf(f, "%sRootImage: %s\n", prefix, c->root_image);
4604
663996b3
MS
4605 STRV_FOREACH(e, c->environment)
4606 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
4607
4608 STRV_FOREACH(e, c->environment_files)
4609 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
4610
db2df898
MP
4611 STRV_FOREACH(e, c->pass_environment)
4612 fprintf(f, "%sPassEnvironment: %s\n", prefix, *e);
4613
f5e65279
MB
4614 STRV_FOREACH(e, c->unset_environment)
4615 fprintf(f, "%sUnsetEnvironment: %s\n", prefix, *e);
4616
4617 fprintf(f, "%sRuntimeDirectoryPreserve: %s\n", prefix, exec_preserve_mode_to_string(c->runtime_directory_preserve_mode));
4618
4619 for (dt = 0; dt < _EXEC_DIRECTORY_TYPE_MAX; dt++) {
4620 fprintf(f, "%s%sMode: %04o\n", prefix, exec_directory_type_to_string(dt), c->directories[dt].mode);
db2df898 4621
f5e65279
MB
4622 STRV_FOREACH(d, c->directories[dt].paths)
4623 fprintf(f, "%s%s: %s\n", prefix, exec_directory_type_to_string(dt), *d);
4624 }
db2df898 4625
812752cc
MB
4626 fprintf(f,
4627 "%sTimeoutCleanSec: %s\n",
4628 prefix, format_timespan(buf_clean, sizeof(buf_clean), c->timeout_clean_usec, USEC_PER_SEC));
4629
663996b3
MS
4630 if (c->nice_set)
4631 fprintf(f,
4632 "%sNice: %i\n",
4633 prefix, c->nice);
4634
4635 if (c->oom_score_adjust_set)
4636 fprintf(f,
4637 "%sOOMScoreAdjust: %i\n",
4638 prefix, c->oom_score_adjust);
4639
4640 for (i = 0; i < RLIM_NLIMITS; i++)
4c89c718 4641 if (c->rlimit[i]) {
6e866b33 4642 fprintf(f, "%sLimit%s: " RLIM_FMT "\n",
60f067b4 4643 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
6e866b33 4644 fprintf(f, "%sLimit%sSoft: " RLIM_FMT "\n",
4c89c718
MP
4645 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_cur);
4646 }
663996b3
MS
4647
4648 if (c->ioprio_set) {
60f067b4 4649 _cleanup_free_ char *class_str = NULL;
663996b3 4650
f5e65279
MB
4651 r = ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
4652 if (r >= 0)
4653 fprintf(f, "%sIOSchedulingClass: %s\n", prefix, class_str);
4654
4655 fprintf(f, "%sIOPriority: %lu\n", prefix, IOPRIO_PRIO_DATA(c->ioprio));
663996b3
MS
4656 }
4657
4658 if (c->cpu_sched_set) {
60f067b4 4659 _cleanup_free_ char *policy_str = NULL;
663996b3 4660
f5e65279
MB
4661 r = sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
4662 if (r >= 0)
4663 fprintf(f, "%sCPUSchedulingPolicy: %s\n", prefix, policy_str);
4664
663996b3 4665 fprintf(f,
663996b3
MS
4666 "%sCPUSchedulingPriority: %i\n"
4667 "%sCPUSchedulingResetOnFork: %s\n",
663996b3
MS
4668 prefix, c->cpu_sched_priority,
4669 prefix, yes_no(c->cpu_sched_reset_on_fork));
663996b3
MS
4670 }
4671
f2dec872
BR
4672 if (c->cpu_set.set) {
4673 _cleanup_free_ char *affinity = NULL;
4674
4675 affinity = cpu_set_to_range_string(&c->cpu_set);
4676 fprintf(f, "%sCPUAffinity: %s\n", prefix, affinity);
4677 }
4678
4679 if (mpol_is_valid(numa_policy_get_type(&c->numa_policy))) {
4680 _cleanup_free_ char *nodes = NULL;
4681
4682 nodes = cpu_set_to_range_string(&c->numa_policy.nodes);
4683 fprintf(f, "%sNUMAPolicy: %s\n", prefix, mpol_to_string(numa_policy_get_type(&c->numa_policy)));
4684 fprintf(f, "%sNUMAMask: %s\n", prefix, strnull(nodes));
663996b3
MS
4685 }
4686
5eef597e 4687 if (c->timer_slack_nsec != NSEC_INFINITY)
60f067b4 4688 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
663996b3
MS
4689
4690 fprintf(f,
4691 "%sStandardInput: %s\n"
4692 "%sStandardOutput: %s\n"
4693 "%sStandardError: %s\n",
4694 prefix, exec_input_to_string(c->std_input),
4695 prefix, exec_output_to_string(c->std_output),
4696 prefix, exec_output_to_string(c->std_error));
4697
52ad194e
MB
4698 if (c->std_input == EXEC_INPUT_NAMED_FD)
4699 fprintf(f, "%sStandardInputFileDescriptorName: %s\n", prefix, c->stdio_fdname[STDIN_FILENO]);
4700 if (c->std_output == EXEC_OUTPUT_NAMED_FD)
4701 fprintf(f, "%sStandardOutputFileDescriptorName: %s\n", prefix, c->stdio_fdname[STDOUT_FILENO]);
4702 if (c->std_error == EXEC_OUTPUT_NAMED_FD)
4703 fprintf(f, "%sStandardErrorFileDescriptorName: %s\n", prefix, c->stdio_fdname[STDERR_FILENO]);
4704
4705 if (c->std_input == EXEC_INPUT_FILE)
4706 fprintf(f, "%sStandardInputFile: %s\n", prefix, c->stdio_file[STDIN_FILENO]);
4707 if (c->std_output == EXEC_OUTPUT_FILE)
4708 fprintf(f, "%sStandardOutputFile: %s\n", prefix, c->stdio_file[STDOUT_FILENO]);
6e866b33
MB
4709 if (c->std_output == EXEC_OUTPUT_FILE_APPEND)
4710 fprintf(f, "%sStandardOutputFileToAppend: %s\n", prefix, c->stdio_file[STDOUT_FILENO]);
52ad194e
MB
4711 if (c->std_error == EXEC_OUTPUT_FILE)
4712 fprintf(f, "%sStandardErrorFile: %s\n", prefix, c->stdio_file[STDERR_FILENO]);
6e866b33
MB
4713 if (c->std_error == EXEC_OUTPUT_FILE_APPEND)
4714 fprintf(f, "%sStandardErrorFileToAppend: %s\n", prefix, c->stdio_file[STDERR_FILENO]);
52ad194e 4715
663996b3
MS
4716 if (c->tty_path)
4717 fprintf(f,
4718 "%sTTYPath: %s\n"
4719 "%sTTYReset: %s\n"
4720 "%sTTYVHangup: %s\n"
4721 "%sTTYVTDisallocate: %s\n",
4722 prefix, c->tty_path,
4723 prefix, yes_no(c->tty_reset),
4724 prefix, yes_no(c->tty_vhangup),
4725 prefix, yes_no(c->tty_vt_disallocate));
4726
f5e65279
MB
4727 if (IN_SET(c->std_output,
4728 EXEC_OUTPUT_SYSLOG,
4729 EXEC_OUTPUT_KMSG,
4730 EXEC_OUTPUT_JOURNAL,
4731 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
4732 EXEC_OUTPUT_KMSG_AND_CONSOLE,
4733 EXEC_OUTPUT_JOURNAL_AND_CONSOLE) ||
4734 IN_SET(c->std_error,
4735 EXEC_OUTPUT_SYSLOG,
4736 EXEC_OUTPUT_KMSG,
4737 EXEC_OUTPUT_JOURNAL,
4738 EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
4739 EXEC_OUTPUT_KMSG_AND_CONSOLE,
4740 EXEC_OUTPUT_JOURNAL_AND_CONSOLE)) {
663996b3 4741
60f067b4 4742 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
663996b3 4743
f5e65279
MB
4744 r = log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
4745 if (r >= 0)
4746 fprintf(f, "%sSyslogFacility: %s\n", prefix, fac_str);
663996b3 4747
f5e65279
MB
4748 r = log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
4749 if (r >= 0)
4750 fprintf(f, "%sSyslogLevel: %s\n", prefix, lvl_str);
4751 }
663996b3 4752
52ad194e
MB
4753 if (c->log_level_max >= 0) {
4754 _cleanup_free_ char *t = NULL;
4755
4756 (void) log_level_to_string_alloc(c->log_level_max, &t);
4757
4758 fprintf(f, "%sLogLevelMax: %s\n", prefix, strna(t));
4759 }
4760
e1f67bc7 4761 if (c->log_ratelimit_interval_usec > 0) {
6e866b33
MB
4762 char buf_timespan[FORMAT_TIMESPAN_MAX];
4763
4764 fprintf(f,
4765 "%sLogRateLimitIntervalSec: %s\n",
e1f67bc7 4766 prefix, format_timespan(buf_timespan, sizeof(buf_timespan), c->log_ratelimit_interval_usec, USEC_PER_SEC));
6e866b33
MB
4767 }
4768
e1f67bc7
MB
4769 if (c->log_ratelimit_burst > 0)
4770 fprintf(f, "%sLogRateLimitBurst: %u\n", prefix, c->log_ratelimit_burst);
6e866b33 4771
52ad194e
MB
4772 if (c->n_log_extra_fields > 0) {
4773 size_t j;
4774
4775 for (j = 0; j < c->n_log_extra_fields; j++) {
4776 fprintf(f, "%sLogExtraFields: ", prefix);
4777 fwrite(c->log_extra_fields[j].iov_base,
4778 1, c->log_extra_fields[j].iov_len,
4779 f);
4780 fputc('\n', f);
4781 }
4782 }
4783
46cdbd49
BR
4784 if (c->log_namespace)
4785 fprintf(f, "%sLogNamespace: %s\n", prefix, c->log_namespace);
4786
f5e65279
MB
4787 if (c->secure_bits) {
4788 _cleanup_free_ char *str = NULL;
663996b3 4789
f5e65279
MB
4790 r = secure_bits_to_string_alloc(c->secure_bits, &str);
4791 if (r >= 0)
4792 fprintf(f, "%sSecure Bits: %s\n", prefix, str);
4793 }
4c89c718 4794
f5e65279
MB
4795 if (c->capability_bounding_set != CAP_ALL) {
4796 _cleanup_free_ char *str = NULL;
4797
4798 r = capability_set_to_string_alloc(c->capability_bounding_set, &str);
4799 if (r >= 0)
4800 fprintf(f, "%sCapabilityBoundingSet: %s\n", prefix, str);
4c89c718
MP
4801 }
4802
4803 if (c->capability_ambient_set != 0) {
f5e65279 4804 _cleanup_free_ char *str = NULL;
663996b3 4805
f5e65279
MB
4806 r = capability_set_to_string_alloc(c->capability_ambient_set, &str);
4807 if (r >= 0)
4808 fprintf(f, "%sAmbientCapabilities: %s\n", prefix, str);
663996b3
MS
4809 }
4810
4811 if (c->user)
4812 fprintf(f, "%sUser: %s\n", prefix, c->user);
4813 if (c->group)
4814 fprintf(f, "%sGroup: %s\n", prefix, c->group);
4815
8a584da2
MP
4816 fprintf(f, "%sDynamicUser: %s\n", prefix, yes_no(c->dynamic_user));
4817
f5e65279 4818 if (!strv_isempty(c->supplementary_groups)) {
663996b3
MS
4819 fprintf(f, "%sSupplementaryGroups:", prefix);
4820 strv_fprintf(f, c->supplementary_groups);
4821 fputs("\n", f);
4822 }
4823
4824 if (c->pam_name)
4825 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
4826
1d42b86d 4827 if (!strv_isempty(c->read_write_paths)) {
5a920b42
MP
4828 fprintf(f, "%sReadWritePaths:", prefix);
4829 strv_fprintf(f, c->read_write_paths);
663996b3
MS
4830 fputs("\n", f);
4831 }
4832
1d42b86d 4833 if (!strv_isempty(c->read_only_paths)) {
5a920b42
MP
4834 fprintf(f, "%sReadOnlyPaths:", prefix);
4835 strv_fprintf(f, c->read_only_paths);
663996b3
MS
4836 fputs("\n", f);
4837 }
4838
1d42b86d 4839 if (!strv_isempty(c->inaccessible_paths)) {
5a920b42
MP
4840 fprintf(f, "%sInaccessiblePaths:", prefix);
4841 strv_fprintf(f, c->inaccessible_paths);
663996b3
MS
4842 fputs("\n", f);
4843 }
4844
2897b343 4845 if (c->n_bind_mounts > 0)
98393f85
MB
4846 for (i = 0; i < c->n_bind_mounts; i++)
4847 fprintf(f, "%s%s: %s%s:%s:%s\n", prefix,
2897b343 4848 c->bind_mounts[i].read_only ? "BindReadOnlyPaths" : "BindPaths",
98393f85 4849 c->bind_mounts[i].ignore_enoent ? "-": "",
2897b343
MP
4850 c->bind_mounts[i].source,
4851 c->bind_mounts[i].destination,
4852 c->bind_mounts[i].recursive ? "rbind" : "norbind");
98393f85
MB
4853
4854 if (c->n_temporary_filesystems > 0)
4855 for (i = 0; i < c->n_temporary_filesystems; i++) {
4856 TemporaryFileSystem *t = c->temporary_filesystems + i;
4857
4858 fprintf(f, "%sTemporaryFileSystem: %s%s%s\n", prefix,
4859 t->path,
4860 isempty(t->options) ? "" : ":",
4861 strempty(t->options));
2897b343
MP
4862 }
4863
663996b3
MS
4864 if (c->utmp_id)
4865 fprintf(f,
4866 "%sUtmpIdentifier: %s\n",
4867 prefix, c->utmp_id);
60f067b4
JS
4868
4869 if (c->selinux_context)
4870 fprintf(f,
4871 "%sSELinuxContext: %s%s\n",
4872 prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
4873
f5e65279
MB
4874 if (c->apparmor_profile)
4875 fprintf(f,
4876 "%sAppArmorProfile: %s%s\n",
4877 prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
4878
4879 if (c->smack_process_label)
4880 fprintf(f,
4881 "%sSmackProcessLabel: %s%s\n",
4882 prefix, c->smack_process_label_ignore ? "-" : "", c->smack_process_label);
4883
e3bff60a 4884 if (c->personality != PERSONALITY_INVALID)
60f067b4
JS
4885 fprintf(f,
4886 "%sPersonality: %s\n",
4887 prefix, strna(personality_to_string(c->personality)));
4888
f5e65279
MB
4889 fprintf(f,
4890 "%sLockPersonality: %s\n",
4891 prefix, yes_no(c->lock_personality));
4892
60f067b4 4893 if (c->syscall_filter) {
f5e65279 4894#if HAVE_SECCOMP
60f067b4 4895 Iterator j;
52ad194e 4896 void *id, *val;
60f067b4
JS
4897 bool first = true;
4898#endif
4899
4900 fprintf(f,
4901 "%sSystemCallFilter: ",
4902 prefix);
4903
4904 if (!c->syscall_whitelist)
4905 fputc('~', f);
4906
f5e65279 4907#if HAVE_SECCOMP
52ad194e 4908 HASHMAP_FOREACH_KEY(val, id, c->syscall_filter, j) {
60f067b4 4909 _cleanup_free_ char *name = NULL;
52ad194e
MB
4910 const char *errno_name = NULL;
4911 int num = PTR_TO_INT(val);
60f067b4
JS
4912
4913 if (first)
4914 first = false;
4915 else
4916 fputc(' ', f);
4917
4918 name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
4919 fputs(strna(name), f);
52ad194e
MB
4920
4921 if (num >= 0) {
4922 errno_name = errno_to_name(num);
4923 if (errno_name)
4924 fprintf(f, ":%s", errno_name);
4925 else
4926 fprintf(f, ":%d", num);
4927 }
60f067b4
JS
4928 }
4929#endif
4930
4931 fputc('\n', f);
4932 }
4933
4934 if (c->syscall_archs) {
f5e65279 4935#if HAVE_SECCOMP
60f067b4
JS
4936 Iterator j;
4937 void *id;
4938#endif
4939
4940 fprintf(f,
4941 "%sSystemCallArchitectures:",
4942 prefix);
4943
f5e65279 4944#if HAVE_SECCOMP
60f067b4
JS
4945 SET_FOREACH(id, c->syscall_archs, j)
4946 fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
4947#endif
4948 fputc('\n', f);
4949 }
4950
2897b343
MP
4951 if (exec_context_restrict_namespaces_set(c)) {
4952 _cleanup_free_ char *s = NULL;
4953
b012e921 4954 r = namespace_flags_to_string(c->restrict_namespaces, &s);
2897b343
MP
4955 if (r >= 0)
4956 fprintf(f, "%sRestrictNamespaces: %s\n",
46cdbd49 4957 prefix, strna(s));
2897b343
MP
4958 }
4959
bb4f798a
MB
4960 if (c->network_namespace_path)
4961 fprintf(f,
4962 "%sNetworkNamespacePath: %s\n",
4963 prefix, c->network_namespace_path);
4964
52ad194e
MB
4965 if (c->syscall_errno > 0) {
4966 const char *errno_name;
4967
4968 fprintf(f, "%sSystemCallErrorNumber: ", prefix);
4969
4970 errno_name = errno_to_name(c->syscall_errno);
4971 if (errno_name)
4972 fprintf(f, "%s\n", errno_name);
4973 else
4974 fprintf(f, "%d\n", c->syscall_errno);
4975 }
663996b3
MS
4976}
4977
98393f85 4978bool exec_context_maintains_privileges(const ExecContext *c) {
f47781d8
MP
4979 assert(c);
4980
5a920b42 4981 /* Returns true if the process forked off would run under
f47781d8
MP
4982 * an unchanged UID or as root. */
4983
4984 if (!c->user)
4985 return true;
4986
4987 if (streq(c->user, "root") || streq(c->user, "0"))
4988 return true;
4989
4990 return false;
4991}
4992
98393f85 4993int exec_context_get_effective_ioprio(const ExecContext *c) {
81c58355
MB
4994 int p;
4995
4996 assert(c);
4997
4998 if (c->ioprio_set)
4999 return c->ioprio;
5000
5001 p = ioprio_get(IOPRIO_WHO_PROCESS, 0);
5002 if (p < 0)
5003 return IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 4);
5004
5005 return p;
5006}
5007
52ad194e
MB
5008void exec_context_free_log_extra_fields(ExecContext *c) {
5009 size_t l;
5010
5011 assert(c);
5012
5013 for (l = 0; l < c->n_log_extra_fields; l++)
5014 free(c->log_extra_fields[l].iov_base);
5015 c->log_extra_fields = mfree(c->log_extra_fields);
5016 c->n_log_extra_fields = 0;
5017}
5018
bb4f798a
MB
5019void exec_context_revert_tty(ExecContext *c) {
5020 int r;
5021
5022 assert(c);
5023
5024 /* First, reset the TTY (possibly kicking everybody else from the TTY) */
5025 exec_context_tty_reset(c, NULL);
5026
5027 /* And then undo what chown_terminal() did earlier. Note that we only do this if we have a path
5028 * configured. If the TTY was passed to us as file descriptor we assume the TTY is opened and managed
5029 * by whoever passed it to us and thus knows better when and how to chmod()/chown() it back. */
5030
5031 if (exec_context_may_touch_tty(c)) {
5032 const char *path;
5033
5034 path = exec_context_tty_path(c);
5035 if (path) {
5036 r = chmod_and_chown(path, TTY_MODE, 0, TTY_GID);
5037 if (r < 0 && r != -ENOENT)
5038 log_warning_errno(r, "Failed to reset TTY ownership/access mode of %s, ignoring: %m", path);
5039 }
5040 }
5041}
5042
f2dec872
BR
5043int exec_context_get_clean_directories(
5044 ExecContext *c,
5045 char **prefix,
5046 ExecCleanMask mask,
5047 char ***ret) {
5048
5049 _cleanup_strv_free_ char **l = NULL;
5050 ExecDirectoryType t;
5051 int r;
5052
5053 assert(c);
5054 assert(prefix);
5055 assert(ret);
5056
5057 for (t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++) {
5058 char **i;
5059
5060 if (!FLAGS_SET(mask, 1U << t))
5061 continue;
5062
5063 if (!prefix[t])
5064 continue;
5065
5066 STRV_FOREACH(i, c->directories[t].paths) {
5067 char *j;
5068
5069 j = path_join(prefix[t], *i);
5070 if (!j)
5071 return -ENOMEM;
5072
5073 r = strv_consume(&l, j);
5074 if (r < 0)
5075 return r;
812752cc
MB
5076
5077 /* Also remove private directories unconditionally. */
5078 if (t != EXEC_DIRECTORY_CONFIGURATION) {
5079 j = path_join(prefix[t], "private", *i);
5080 if (!j)
5081 return -ENOMEM;
5082
5083 r = strv_consume(&l, j);
5084 if (r < 0)
5085 return r;
5086 }
f2dec872
BR
5087 }
5088 }
5089
5090 *ret = TAKE_PTR(l);
5091 return 0;
5092}
5093
5094int exec_context_get_clean_mask(ExecContext *c, ExecCleanMask *ret) {
5095 ExecCleanMask mask = 0;
5096
5097 assert(c);
5098 assert(ret);
5099
5100 for (ExecDirectoryType t = 0; t < _EXEC_DIRECTORY_TYPE_MAX; t++)
5101 if (!strv_isempty(c->directories[t].paths))
5102 mask |= 1U << t;
5103
5104 *ret = mask;
5105 return 0;
5106}
5107
663996b3
MS
5108void exec_status_start(ExecStatus *s, pid_t pid) {
5109 assert(s);
5110
6e866b33
MB
5111 *s = (ExecStatus) {
5112 .pid = pid,
5113 };
5114
663996b3
MS
5115 dual_timestamp_get(&s->start_timestamp);
5116}
5117
98393f85 5118void exec_status_exit(ExecStatus *s, const ExecContext *context, pid_t pid, int code, int status) {
663996b3
MS
5119 assert(s);
5120
6e866b33
MB
5121 if (s->pid != pid) {
5122 *s = (ExecStatus) {
5123 .pid = pid,
5124 };
5125 }
663996b3 5126
663996b3
MS
5127 dual_timestamp_get(&s->exit_timestamp);
5128
5129 s->code = code;
5130 s->status = status;
5131
bb4f798a
MB
5132 if (context && context->utmp_id)
5133 (void) utmp_put_dead_process(context->utmp_id, pid, code, status);
663996b3
MS
5134}
5135
6e866b33
MB
5136void exec_status_reset(ExecStatus *s) {
5137 assert(s);
5138
5139 *s = (ExecStatus) {};
5140}
5141
98393f85 5142void exec_status_dump(const ExecStatus *s, FILE *f, const char *prefix) {
663996b3
MS
5143 char buf[FORMAT_TIMESTAMP_MAX];
5144
5145 assert(s);
5146 assert(f);
5147
663996b3
MS
5148 if (s->pid <= 0)
5149 return;
5150
5eef597e
MP
5151 prefix = strempty(prefix);
5152
663996b3 5153 fprintf(f,
60f067b4
JS
5154 "%sPID: "PID_FMT"\n",
5155 prefix, s->pid);
663996b3 5156
8a584da2 5157 if (dual_timestamp_is_set(&s->start_timestamp))
663996b3
MS
5158 fprintf(f,
5159 "%sStart Timestamp: %s\n",
5160 prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
5161
8a584da2 5162 if (dual_timestamp_is_set(&s->exit_timestamp))
663996b3
MS
5163 fprintf(f,
5164 "%sExit Timestamp: %s\n"
5165 "%sExit Code: %s\n"
5166 "%sExit Status: %i\n",
5167 prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
5168 prefix, sigchld_code_to_string(s->code),
5169 prefix, s->status);
5170}
5171
98393f85 5172static char *exec_command_line(char **argv) {
663996b3
MS
5173 size_t k;
5174 char *n, *p, **a;
5175 bool first = true;
5176
5177 assert(argv);
5178
5179 k = 1;
5180 STRV_FOREACH(a, argv)
5181 k += strlen(*a)+3;
5182
8a584da2
MP
5183 n = new(char, k);
5184 if (!n)
663996b3
MS
5185 return NULL;
5186
5187 p = n;
5188 STRV_FOREACH(a, argv) {
5189
5190 if (!first)
5191 *(p++) = ' ';
5192 else
5193 first = false;
5194
5195 if (strpbrk(*a, WHITESPACE)) {
5196 *(p++) = '\'';
5197 p = stpcpy(p, *a);
5198 *(p++) = '\'';
5199 } else
5200 p = stpcpy(p, *a);
5201
5202 }
5203
5204 *p = 0;
5205
5206 /* FIXME: this doesn't really handle arguments that have
5207 * spaces and ticks in them */
5208
5209 return n;
5210}
5211
98393f85 5212static void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
e842803a 5213 _cleanup_free_ char *cmd = NULL;
5eef597e 5214 const char *prefix2;
663996b3
MS
5215
5216 assert(c);
5217 assert(f);
5218
5eef597e 5219 prefix = strempty(prefix);
e735f4d4 5220 prefix2 = strjoina(prefix, "\t");
663996b3
MS
5221
5222 cmd = exec_command_line(c->argv);
663996b3
MS
5223 fprintf(f,
5224 "%sCommand Line: %s\n",
f2dec872 5225 prefix, cmd ? cmd : strerror_safe(ENOMEM));
663996b3 5226
663996b3 5227 exec_status_dump(&c->exec_status, f, prefix2);
663996b3
MS
5228}
5229
5230void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
5231 assert(f);
5232
5eef597e 5233 prefix = strempty(prefix);
663996b3
MS
5234
5235 LIST_FOREACH(command, c, c)
5236 exec_command_dump(c, f, prefix);
5237}
5238
5239void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
5240 ExecCommand *end;
5241
5242 assert(l);
5243 assert(e);
5244
5245 if (*l) {
5246 /* It's kind of important, that we keep the order here */
60f067b4
JS
5247 LIST_FIND_TAIL(command, *l, end);
5248 LIST_INSERT_AFTER(command, *l, end, e);
663996b3
MS
5249 } else
5250 *l = e;
5251}
5252
5253int exec_command_set(ExecCommand *c, const char *path, ...) {
5254 va_list ap;
5255 char **l, *p;
5256
5257 assert(c);
5258 assert(path);
5259
5260 va_start(ap, path);
5261 l = strv_new_ap(path, ap);
5262 va_end(ap);
5263
5264 if (!l)
5265 return -ENOMEM;
5266
60f067b4
JS
5267 p = strdup(path);
5268 if (!p) {
663996b3
MS
5269 strv_free(l);
5270 return -ENOMEM;
5271 }
5272
6e866b33 5273 free_and_replace(c->path, p);
663996b3 5274
b012e921 5275 return strv_free_and_replace(c->argv, l);
663996b3
MS
5276}
5277
5eef597e
MP
5278int exec_command_append(ExecCommand *c, const char *path, ...) {
5279 _cleanup_strv_free_ char **l = NULL;
5280 va_list ap;
5281 int r;
5282
5283 assert(c);
5284 assert(path);
5285
5286 va_start(ap, path);
5287 l = strv_new_ap(path, ap);
5288 va_end(ap);
5289
5290 if (!l)
5291 return -ENOMEM;
5292
6300502b 5293 r = strv_extend_strv(&c->argv, l, false);
5eef597e
MP
5294 if (r < 0)
5295 return r;
5296
5297 return 0;
5298}
5299
98393f85
MB
5300static void *remove_tmpdir_thread(void *p) {
5301 _cleanup_free_ char *path = p;
5eef597e 5302
98393f85
MB
5303 (void) rm_rf(path, REMOVE_ROOT|REMOVE_PHYSICAL);
5304 return NULL;
5305}
5306
5307static ExecRuntime* exec_runtime_free(ExecRuntime *rt, bool destroy) {
5308 int r;
5309
5310 if (!rt)
5311 return NULL;
5312
5313 if (rt->manager)
5314 (void) hashmap_remove(rt->manager->exec_runtime_by_id, rt->id);
5315
5316 /* When destroy is true, then rm_rf tmp_dir and var_tmp_dir. */
5317 if (destroy && rt->tmp_dir) {
5318 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
5319
5320 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
5321 if (r < 0) {
5322 log_warning_errno(r, "Failed to nuke %s: %m", rt->tmp_dir);
5323 free(rt->tmp_dir);
5324 }
60f067b4 5325
98393f85
MB
5326 rt->tmp_dir = NULL;
5327 }
5328
5329 if (destroy && rt->var_tmp_dir) {
5330 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
5331
5332 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
5333 if (r < 0) {
5334 log_warning_errno(r, "Failed to nuke %s: %m", rt->var_tmp_dir);
5335 free(rt->var_tmp_dir);
5336 }
5337
5338 rt->var_tmp_dir = NULL;
5339 }
5340
5341 rt->id = mfree(rt->id);
5342 rt->tmp_dir = mfree(rt->tmp_dir);
5343 rt->var_tmp_dir = mfree(rt->var_tmp_dir);
5344 safe_close_pair(rt->netns_storage_socket);
5345 return mfree(rt);
5346}
5347
5348static void exec_runtime_freep(ExecRuntime **rt) {
bb4f798a 5349 (void) exec_runtime_free(*rt, false);
98393f85
MB
5350}
5351
bb4f798a
MB
5352static int exec_runtime_allocate(ExecRuntime **ret) {
5353 ExecRuntime *n;
60f067b4 5354
bb4f798a
MB
5355 assert(ret);
5356
5357 n = new(ExecRuntime, 1);
5358 if (!n)
60f067b4
JS
5359 return -ENOMEM;
5360
bb4f798a
MB
5361 *n = (ExecRuntime) {
5362 .netns_storage_socket = { -1, -1 },
5363 };
5364
5365 *ret = n;
60f067b4
JS
5366 return 0;
5367}
5368
98393f85
MB
5369static int exec_runtime_add(
5370 Manager *m,
5371 const char *id,
5372 const char *tmp_dir,
5373 const char *var_tmp_dir,
5374 const int netns_storage_socket[2],
5375 ExecRuntime **ret) {
5376
5377 _cleanup_(exec_runtime_freep) ExecRuntime *rt = NULL;
60f067b4
JS
5378 int r;
5379
98393f85 5380 assert(m);
60f067b4
JS
5381 assert(id);
5382
98393f85
MB
5383 r = hashmap_ensure_allocated(&m->exec_runtime_by_id, &string_hash_ops);
5384 if (r < 0)
5385 return r;
60f067b4 5386
98393f85 5387 r = exec_runtime_allocate(&rt);
60f067b4
JS
5388 if (r < 0)
5389 return r;
5390
98393f85
MB
5391 rt->id = strdup(id);
5392 if (!rt->id)
5393 return -ENOMEM;
5394
5395 if (tmp_dir) {
5396 rt->tmp_dir = strdup(tmp_dir);
5397 if (!rt->tmp_dir)
5398 return -ENOMEM;
5399
5400 /* When tmp_dir is set, then we require var_tmp_dir is also set. */
5401 assert(var_tmp_dir);
5402 rt->var_tmp_dir = strdup(var_tmp_dir);
5403 if (!rt->var_tmp_dir)
5404 return -ENOMEM;
5405 }
5406
5407 if (netns_storage_socket) {
5408 rt->netns_storage_socket[0] = netns_storage_socket[0];
5409 rt->netns_storage_socket[1] = netns_storage_socket[1];
60f067b4
JS
5410 }
5411
98393f85
MB
5412 r = hashmap_put(m->exec_runtime_by_id, rt->id, rt);
5413 if (r < 0)
5414 return r;
5415
5416 rt->manager = m;
5417
5418 if (ret)
5419 *ret = rt;
5420
5421 /* do not remove created ExecRuntime object when the operation succeeds. */
5422 rt = NULL;
5423 return 0;
5424}
5425
5426static int exec_runtime_make(Manager *m, const ExecContext *c, const char *id, ExecRuntime **ret) {
5427 _cleanup_free_ char *tmp_dir = NULL, *var_tmp_dir = NULL;
bb4f798a 5428 _cleanup_close_pair_ int netns_storage_socket[2] = { -1, -1 };
98393f85
MB
5429 int r;
5430
5431 assert(m);
5432 assert(c);
5433 assert(id);
5434
5435 /* It is not necessary to create ExecRuntime object. */
bb4f798a 5436 if (!c->private_network && !c->private_tmp && !c->network_namespace_path)
98393f85
MB
5437 return 0;
5438
5439 if (c->private_tmp) {
5440 r = setup_tmp_dirs(id, &tmp_dir, &var_tmp_dir);
60f067b4
JS
5441 if (r < 0)
5442 return r;
5443 }
5444
bb4f798a 5445 if (c->private_network || c->network_namespace_path) {
98393f85
MB
5446 if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, netns_storage_socket) < 0)
5447 return -errno;
5448 }
5449
5450 r = exec_runtime_add(m, id, tmp_dir, var_tmp_dir, netns_storage_socket, ret);
5451 if (r < 0)
5452 return r;
5453
5454 /* Avoid cleanup */
bb4f798a 5455 netns_storage_socket[0] = netns_storage_socket[1] = -1;
60f067b4
JS
5456 return 1;
5457}
5458
98393f85
MB
5459int exec_runtime_acquire(Manager *m, const ExecContext *c, const char *id, bool create, ExecRuntime **ret) {
5460 ExecRuntime *rt;
5461 int r;
60f067b4 5462
98393f85
MB
5463 assert(m);
5464 assert(id);
5465 assert(ret);
5466
5467 rt = hashmap_get(m->exec_runtime_by_id, id);
5468 if (rt)
5469 /* We already have a ExecRuntime object, let's increase the ref count and reuse it */
5470 goto ref;
60f067b4 5471
98393f85
MB
5472 if (!create)
5473 return 0;
60f067b4 5474
98393f85
MB
5475 /* If not found, then create a new object. */
5476 r = exec_runtime_make(m, c, id, &rt);
5477 if (r <= 0)
5478 /* When r == 0, it is not necessary to create ExecRuntime object. */
5479 return r;
5480
5481ref:
5482 /* increment reference counter. */
5483 rt->n_ref++;
5484 *ret = rt;
5485 return 1;
5486}
5487
5488ExecRuntime *exec_runtime_unref(ExecRuntime *rt, bool destroy) {
5489 if (!rt)
60f067b4
JS
5490 return NULL;
5491
98393f85 5492 assert(rt->n_ref > 0);
60f067b4 5493
98393f85
MB
5494 rt->n_ref--;
5495 if (rt->n_ref > 0)
e3bff60a
MP
5496 return NULL;
5497
98393f85 5498 return exec_runtime_free(rt, destroy);
60f067b4
JS
5499}
5500
98393f85
MB
5501int exec_runtime_serialize(const Manager *m, FILE *f, FDSet *fds) {
5502 ExecRuntime *rt;
5503 Iterator i;
5504
5505 assert(m);
60f067b4
JS
5506 assert(f);
5507 assert(fds);
5508
98393f85
MB
5509 HASHMAP_FOREACH(rt, m->exec_runtime_by_id, i) {
5510 fprintf(f, "exec-runtime=%s", rt->id);
60f067b4 5511
98393f85
MB
5512 if (rt->tmp_dir)
5513 fprintf(f, " tmp-dir=%s", rt->tmp_dir);
60f067b4 5514
98393f85
MB
5515 if (rt->var_tmp_dir)
5516 fprintf(f, " var-tmp-dir=%s", rt->var_tmp_dir);
60f067b4 5517
98393f85
MB
5518 if (rt->netns_storage_socket[0] >= 0) {
5519 int copy;
60f067b4 5520
98393f85
MB
5521 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
5522 if (copy < 0)
5523 return copy;
60f067b4 5524
98393f85
MB
5525 fprintf(f, " netns-socket-0=%i", copy);
5526 }
60f067b4 5527
98393f85
MB
5528 if (rt->netns_storage_socket[1] >= 0) {
5529 int copy;
60f067b4 5530
98393f85
MB
5531 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
5532 if (copy < 0)
5533 return copy;
5534
5535 fprintf(f, " netns-socket-1=%i", copy);
5536 }
60f067b4 5537
98393f85 5538 fputc('\n', f);
60f067b4
JS
5539 }
5540
5541 return 0;
5542}
5543
98393f85
MB
5544int exec_runtime_deserialize_compat(Unit *u, const char *key, const char *value, FDSet *fds) {
5545 _cleanup_(exec_runtime_freep) ExecRuntime *rt_create = NULL;
5546 ExecRuntime *rt;
60f067b4
JS
5547 int r;
5548
98393f85
MB
5549 /* This is for the migration from old (v237 or earlier) deserialization text.
5550 * Due to the bug #7790, this may not work with the units that use JoinsNamespaceOf=.
5551 * Even if the ExecRuntime object originally created by the other unit, we cannot judge
5552 * so or not from the serialized text, then we always creates a new object owned by this. */
5553
5554 assert(u);
60f067b4
JS
5555 assert(key);
5556 assert(value);
5557
98393f85
MB
5558 /* Manager manages ExecRuntime objects by the unit id.
5559 * So, we omit the serialized text when the unit does not have id (yet?)... */
5560 if (isempty(u->id)) {
5561 log_unit_debug(u, "Invocation ID not found. Dropping runtime parameter.");
5562 return 0;
5563 }
60f067b4 5564
98393f85
MB
5565 r = hashmap_ensure_allocated(&u->manager->exec_runtime_by_id, &string_hash_ops);
5566 if (r < 0) {
5567 log_unit_debug_errno(u, r, "Failed to allocate storage for runtime parameter: %m");
5568 return 0;
5569 }
5570
5571 rt = hashmap_get(u->manager->exec_runtime_by_id, u->id);
5572 if (!rt) {
5573 r = exec_runtime_allocate(&rt_create);
60f067b4 5574 if (r < 0)
e3bff60a 5575 return log_oom();
60f067b4 5576
98393f85
MB
5577 rt_create->id = strdup(u->id);
5578 if (!rt_create->id)
5579 return log_oom();
5580
5581 rt = rt_create;
5582 }
5583
5584 if (streq(key, "tmp-dir")) {
5585 char *copy;
5586
60f067b4
JS
5587 copy = strdup(value);
5588 if (!copy)
5589 return log_oom();
5590
98393f85 5591 free_and_replace(rt->tmp_dir, copy);
60f067b4
JS
5592
5593 } else if (streq(key, "var-tmp-dir")) {
5594 char *copy;
5595
60f067b4
JS
5596 copy = strdup(value);
5597 if (!copy)
5598 return log_oom();
5599
98393f85 5600 free_and_replace(rt->var_tmp_dir, copy);
60f067b4
JS
5601
5602 } else if (streq(key, "netns-socket-0")) {
5603 int fd;
5604
98393f85 5605 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd)) {
e3bff60a 5606 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
98393f85 5607 return 0;
60f067b4 5608 }
98393f85
MB
5609
5610 safe_close(rt->netns_storage_socket[0]);
5611 rt->netns_storage_socket[0] = fdset_remove(fds, fd);
5612
60f067b4
JS
5613 } else if (streq(key, "netns-socket-1")) {
5614 int fd;
5615
98393f85 5616 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd)) {
e3bff60a 5617 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
98393f85 5618 return 0;
60f067b4 5619 }
98393f85
MB
5620
5621 safe_close(rt->netns_storage_socket[1]);
5622 rt->netns_storage_socket[1] = fdset_remove(fds, fd);
60f067b4
JS
5623 } else
5624 return 0;
5625
98393f85
MB
5626 /* If the object is newly created, then put it to the hashmap which manages ExecRuntime objects. */
5627 if (rt_create) {
5628 r = hashmap_put(u->manager->exec_runtime_by_id, rt_create->id, rt_create);
5629 if (r < 0) {
b012e921 5630 log_unit_debug_errno(u, r, "Failed to put runtime parameter to manager's storage: %m");
98393f85
MB
5631 return 0;
5632 }
60f067b4 5633
98393f85 5634 rt_create->manager = u->manager;
60f067b4 5635
98393f85
MB
5636 /* Avoid cleanup */
5637 rt_create = NULL;
5638 }
60f067b4 5639
98393f85
MB
5640 return 1;
5641}
60f067b4 5642
98393f85
MB
5643void exec_runtime_deserialize_one(Manager *m, const char *value, FDSet *fds) {
5644 char *id = NULL, *tmp_dir = NULL, *var_tmp_dir = NULL;
5645 int r, fd0 = -1, fd1 = -1;
5646 const char *p, *v = value;
5647 size_t n;
60f067b4 5648
98393f85
MB
5649 assert(m);
5650 assert(value);
5651 assert(fds);
60f067b4 5652
98393f85
MB
5653 n = strcspn(v, " ");
5654 id = strndupa(v, n);
5655 if (v[n] != ' ')
5656 goto finalize;
5657 p = v + n + 1;
5658
5659 v = startswith(p, "tmp-dir=");
5660 if (v) {
5661 n = strcspn(v, " ");
5662 tmp_dir = strndupa(v, n);
5663 if (v[n] != ' ')
5664 goto finalize;
5665 p = v + n + 1;
5666 }
5667
5668 v = startswith(p, "var-tmp-dir=");
5669 if (v) {
5670 n = strcspn(v, " ");
5671 var_tmp_dir = strndupa(v, n);
5672 if (v[n] != ' ')
5673 goto finalize;
5674 p = v + n + 1;
5675 }
5676
5677 v = startswith(p, "netns-socket-0=");
5678 if (v) {
5679 char *buf;
5680
5681 n = strcspn(v, " ");
5682 buf = strndupa(v, n);
5683 if (safe_atoi(buf, &fd0) < 0 || !fdset_contains(fds, fd0)) {
5684 log_debug("Unable to process exec-runtime netns fd specification.");
5685 return;
60f067b4 5686 }
98393f85
MB
5687 fd0 = fdset_remove(fds, fd0);
5688 if (v[n] != ' ')
5689 goto finalize;
5690 p = v + n + 1;
60f067b4
JS
5691 }
5692
98393f85
MB
5693 v = startswith(p, "netns-socket-1=");
5694 if (v) {
5695 char *buf;
60f067b4 5696
98393f85
MB
5697 n = strcspn(v, " ");
5698 buf = strndupa(v, n);
5699 if (safe_atoi(buf, &fd1) < 0 || !fdset_contains(fds, fd1)) {
5700 log_debug("Unable to process exec-runtime netns fd specification.");
5701 return;
60f067b4 5702 }
98393f85
MB
5703 fd1 = fdset_remove(fds, fd1);
5704 }
60f067b4 5705
98393f85
MB
5706finalize:
5707
5708 r = exec_runtime_add(m, id, tmp_dir, var_tmp_dir, (int[]) { fd0, fd1 }, NULL);
6e866b33 5709 if (r < 0)
98393f85 5710 log_debug_errno(r, "Failed to add exec-runtime: %m");
98393f85 5711}
60f067b4 5712
98393f85
MB
5713void exec_runtime_vacuum(Manager *m) {
5714 ExecRuntime *rt;
5715 Iterator i;
5716
5717 assert(m);
5718
5719 /* Free unreferenced ExecRuntime objects. This is used after manager deserialization process. */
5720
5721 HASHMAP_FOREACH(rt, m->exec_runtime_by_id, i) {
5722 if (rt->n_ref > 0)
5723 continue;
5724
5725 (void) exec_runtime_free(rt, false);
5726 }
60f067b4
JS
5727}
5728
6e866b33
MB
5729void exec_params_clear(ExecParameters *p) {
5730 if (!p)
5731 return;
5732
5733 strv_free(p->environment);
5734}
5735
663996b3
MS
5736static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
5737 [EXEC_INPUT_NULL] = "null",
5738 [EXEC_INPUT_TTY] = "tty",
5739 [EXEC_INPUT_TTY_FORCE] = "tty-force",
5740 [EXEC_INPUT_TTY_FAIL] = "tty-fail",
8a584da2
MP
5741 [EXEC_INPUT_SOCKET] = "socket",
5742 [EXEC_INPUT_NAMED_FD] = "fd",
52ad194e
MB
5743 [EXEC_INPUT_DATA] = "data",
5744 [EXEC_INPUT_FILE] = "file",
663996b3
MS
5745};
5746
5747DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
5748
5749static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
5750 [EXEC_OUTPUT_INHERIT] = "inherit",
5751 [EXEC_OUTPUT_NULL] = "null",
5752 [EXEC_OUTPUT_TTY] = "tty",
5753 [EXEC_OUTPUT_SYSLOG] = "syslog",
5754 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
5755 [EXEC_OUTPUT_KMSG] = "kmsg",
5756 [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
5757 [EXEC_OUTPUT_JOURNAL] = "journal",
5758 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
8a584da2
MP
5759 [EXEC_OUTPUT_SOCKET] = "socket",
5760 [EXEC_OUTPUT_NAMED_FD] = "fd",
52ad194e 5761 [EXEC_OUTPUT_FILE] = "file",
6e866b33 5762 [EXEC_OUTPUT_FILE_APPEND] = "append",
663996b3
MS
5763};
5764
5765DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);
13d276d0
MP
5766
5767static const char* const exec_utmp_mode_table[_EXEC_UTMP_MODE_MAX] = {
5768 [EXEC_UTMP_INIT] = "init",
5769 [EXEC_UTMP_LOGIN] = "login",
5770 [EXEC_UTMP_USER] = "user",
5771};
5772
5773DEFINE_STRING_TABLE_LOOKUP(exec_utmp_mode, ExecUtmpMode);
f5e65279
MB
5774
5775static const char* const exec_preserve_mode_table[_EXEC_PRESERVE_MODE_MAX] = {
5776 [EXEC_PRESERVE_NO] = "no",
5777 [EXEC_PRESERVE_YES] = "yes",
5778 [EXEC_PRESERVE_RESTART] = "restart",
5779};
5780
5781DEFINE_STRING_TABLE_LOOKUP_WITH_BOOLEAN(exec_preserve_mode, ExecPreserveMode, EXEC_PRESERVE_YES);
5782
f2dec872 5783/* This table maps ExecDirectoryType to the setting it is configured with in the unit */
f5e65279
MB
5784static const char* const exec_directory_type_table[_EXEC_DIRECTORY_TYPE_MAX] = {
5785 [EXEC_DIRECTORY_RUNTIME] = "RuntimeDirectory",
5786 [EXEC_DIRECTORY_STATE] = "StateDirectory",
5787 [EXEC_DIRECTORY_CACHE] = "CacheDirectory",
5788 [EXEC_DIRECTORY_LOGS] = "LogsDirectory",
5789 [EXEC_DIRECTORY_CONFIGURATION] = "ConfigurationDirectory",
5790};
5791
5792DEFINE_STRING_TABLE_LOOKUP(exec_directory_type, ExecDirectoryType);
5793
f2dec872
BR
5794/* And this table maps ExecDirectoryType too, but to a generic term identifying the type of resource. This
5795 * one is supposed to be generic enough to be used for unit types that don't use ExecContext and per-unit
5796 * directories, specifically .timer units with their timestamp touch file. */
5797static const char* const exec_resource_type_table[_EXEC_DIRECTORY_TYPE_MAX] = {
5798 [EXEC_DIRECTORY_RUNTIME] = "runtime",
5799 [EXEC_DIRECTORY_STATE] = "state",
5800 [EXEC_DIRECTORY_CACHE] = "cache",
5801 [EXEC_DIRECTORY_LOGS] = "logs",
5802 [EXEC_DIRECTORY_CONFIGURATION] = "configuration",
5803};
5804
5805DEFINE_STRING_TABLE_LOOKUP(exec_resource_type, ExecDirectoryType);
5806
5807/* And this table also maps ExecDirectoryType, to the environment variable we pass the selected directory to
5808 * the service payload in. */
6e866b33
MB
5809static const char* const exec_directory_env_name_table[_EXEC_DIRECTORY_TYPE_MAX] = {
5810 [EXEC_DIRECTORY_RUNTIME] = "RUNTIME_DIRECTORY",
5811 [EXEC_DIRECTORY_STATE] = "STATE_DIRECTORY",
5812 [EXEC_DIRECTORY_CACHE] = "CACHE_DIRECTORY",
5813 [EXEC_DIRECTORY_LOGS] = "LOGS_DIRECTORY",
5814 [EXEC_DIRECTORY_CONFIGURATION] = "CONFIGURATION_DIRECTORY",
5815};
5816
5817DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(exec_directory_env_name, ExecDirectoryType);
5818
f5e65279
MB
5819static const char* const exec_keyring_mode_table[_EXEC_KEYRING_MODE_MAX] = {
5820 [EXEC_KEYRING_INHERIT] = "inherit",
5821 [EXEC_KEYRING_PRIVATE] = "private",
5822 [EXEC_KEYRING_SHARED] = "shared",
5823};
5824
5825DEFINE_STRING_TABLE_LOOKUP(exec_keyring_mode, ExecKeyringMode);