]> git.proxmox.com Git - systemd.git/blob - src/core/execute.c
94cc10173825df7a56101030b162cd9248be438e
[systemd.git] / src / core / execute.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2010 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <errno.h>
23 #include <fcntl.h>
24 #include <unistd.h>
25 #include <string.h>
26 #include <signal.h>
27 #include <sys/socket.h>
28 #include <sys/un.h>
29 #include <sys/prctl.h>
30 #include <sys/stat.h>
31 #include <grp.h>
32 #include <poll.h>
33 #include <glob.h>
34 #include <sys/personality.h>
35
36 #ifdef HAVE_PAM
37 #include <security/pam_appl.h>
38 #endif
39
40 #ifdef HAVE_SELINUX
41 #include <selinux/selinux.h>
42 #endif
43
44 #ifdef HAVE_SECCOMP
45 #include <seccomp.h>
46 #endif
47
48 #ifdef HAVE_APPARMOR
49 #include <sys/apparmor.h>
50 #endif
51
52 #include "sd-messages.h"
53 #include "rm-rf.h"
54 #include "strv.h"
55 #include "macro.h"
56 #include "capability.h"
57 #include "util.h"
58 #include "log.h"
59 #include "ioprio.h"
60 #include "securebits.h"
61 #include "namespace.h"
62 #include "exit-status.h"
63 #include "missing.h"
64 #include "utmp-wtmp.h"
65 #include "def.h"
66 #include "path-util.h"
67 #include "env-util.h"
68 #include "fileio.h"
69 #include "unit.h"
70 #include "async.h"
71 #include "selinux-util.h"
72 #include "errno-list.h"
73 #include "af-list.h"
74 #include "mkdir.h"
75 #include "smack-util.h"
76 #include "bus-endpoint.h"
77 #include "cap-list.h"
78 #include "formats-util.h"
79 #include "process-util.h"
80 #include "terminal-util.h"
81 #include "signal-util.h"
82
83 #ifdef HAVE_APPARMOR
84 #include "apparmor-util.h"
85 #endif
86
87 #ifdef HAVE_SECCOMP
88 #include "seccomp-util.h"
89 #endif
90
91 #include "execute.h"
92
93 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
94 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
95
96 /* This assumes there is a 'tty' group */
97 #define TTY_MODE 0620
98
99 #define SNDBUF_SIZE (8*1024*1024)
100
101 static int shift_fds(int fds[], unsigned n_fds) {
102 int start, restart_from;
103
104 if (n_fds <= 0)
105 return 0;
106
107 /* Modifies the fds array! (sorts it) */
108
109 assert(fds);
110
111 start = 0;
112 for (;;) {
113 int i;
114
115 restart_from = -1;
116
117 for (i = start; i < (int) n_fds; i++) {
118 int nfd;
119
120 /* Already at right index? */
121 if (fds[i] == i+3)
122 continue;
123
124 if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
125 return -errno;
126
127 safe_close(fds[i]);
128 fds[i] = nfd;
129
130 /* Hmm, the fd we wanted isn't free? Then
131 * let's remember that and try again from here */
132 if (nfd != i+3 && restart_from < 0)
133 restart_from = i;
134 }
135
136 if (restart_from < 0)
137 break;
138
139 start = restart_from;
140 }
141
142 return 0;
143 }
144
145 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
146 unsigned i;
147 int r;
148
149 if (n_fds <= 0)
150 return 0;
151
152 assert(fds);
153
154 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
155
156 for (i = 0; i < n_fds; i++) {
157
158 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
159 return r;
160
161 /* We unconditionally drop FD_CLOEXEC from the fds,
162 * since after all we want to pass these fds to our
163 * children */
164
165 if ((r = fd_cloexec(fds[i], false)) < 0)
166 return r;
167 }
168
169 return 0;
170 }
171
172 _pure_ static const char *tty_path(const ExecContext *context) {
173 assert(context);
174
175 if (context->tty_path)
176 return context->tty_path;
177
178 return "/dev/console";
179 }
180
181 static void exec_context_tty_reset(const ExecContext *context) {
182 assert(context);
183
184 if (context->tty_vhangup)
185 terminal_vhangup(tty_path(context));
186
187 if (context->tty_reset)
188 reset_terminal(tty_path(context));
189
190 if (context->tty_vt_disallocate && context->tty_path)
191 vt_disallocate(context->tty_path);
192 }
193
194 static bool is_terminal_output(ExecOutput o) {
195 return
196 o == EXEC_OUTPUT_TTY ||
197 o == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
198 o == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
199 o == EXEC_OUTPUT_JOURNAL_AND_CONSOLE;
200 }
201
202 static int open_null_as(int flags, int nfd) {
203 int fd, r;
204
205 assert(nfd >= 0);
206
207 fd = open("/dev/null", flags|O_NOCTTY);
208 if (fd < 0)
209 return -errno;
210
211 if (fd != nfd) {
212 r = dup2(fd, nfd) < 0 ? -errno : nfd;
213 safe_close(fd);
214 } else
215 r = nfd;
216
217 return r;
218 }
219
220 static int connect_journal_socket(int fd, uid_t uid, gid_t gid) {
221 union sockaddr_union sa = {
222 .un.sun_family = AF_UNIX,
223 .un.sun_path = "/run/systemd/journal/stdout",
224 };
225 uid_t olduid = UID_INVALID;
226 gid_t oldgid = GID_INVALID;
227 int r;
228
229 if (gid != GID_INVALID) {
230 oldgid = getgid();
231
232 r = setegid(gid);
233 if (r < 0)
234 return -errno;
235 }
236
237 if (uid != UID_INVALID) {
238 olduid = getuid();
239
240 r = seteuid(uid);
241 if (r < 0) {
242 r = -errno;
243 goto restore_gid;
244 }
245 }
246
247 r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
248 if (r < 0)
249 r = -errno;
250
251 /* If we fail to restore the uid or gid, things will likely
252 fail later on. This should only happen if an LSM interferes. */
253
254 if (uid != UID_INVALID)
255 (void) seteuid(olduid);
256
257 restore_gid:
258 if (gid != GID_INVALID)
259 (void) setegid(oldgid);
260
261 return r;
262 }
263
264 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd, uid_t uid, gid_t gid) {
265 int fd, r;
266
267 assert(context);
268 assert(output < _EXEC_OUTPUT_MAX);
269 assert(ident);
270 assert(nfd >= 0);
271
272 fd = socket(AF_UNIX, SOCK_STREAM, 0);
273 if (fd < 0)
274 return -errno;
275
276 r = connect_journal_socket(fd, uid, gid);
277 if (r < 0)
278 return r;
279
280 if (shutdown(fd, SHUT_RD) < 0) {
281 safe_close(fd);
282 return -errno;
283 }
284
285 fd_inc_sndbuf(fd, SNDBUF_SIZE);
286
287 dprintf(fd,
288 "%s\n"
289 "%s\n"
290 "%i\n"
291 "%i\n"
292 "%i\n"
293 "%i\n"
294 "%i\n",
295 context->syslog_identifier ? context->syslog_identifier : ident,
296 unit_id,
297 context->syslog_priority,
298 !!context->syslog_level_prefix,
299 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
300 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
301 is_terminal_output(output));
302
303 if (fd != nfd) {
304 r = dup2(fd, nfd) < 0 ? -errno : nfd;
305 safe_close(fd);
306 } else
307 r = nfd;
308
309 return r;
310 }
311 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
312 int fd, r;
313
314 assert(path);
315 assert(nfd >= 0);
316
317 if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
318 return fd;
319
320 if (fd != nfd) {
321 r = dup2(fd, nfd) < 0 ? -errno : nfd;
322 safe_close(fd);
323 } else
324 r = nfd;
325
326 return r;
327 }
328
329 static bool is_terminal_input(ExecInput i) {
330 return
331 i == EXEC_INPUT_TTY ||
332 i == EXEC_INPUT_TTY_FORCE ||
333 i == EXEC_INPUT_TTY_FAIL;
334 }
335
336 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
337
338 if (is_terminal_input(std_input) && !apply_tty_stdin)
339 return EXEC_INPUT_NULL;
340
341 if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
342 return EXEC_INPUT_NULL;
343
344 return std_input;
345 }
346
347 static int fixup_output(ExecOutput std_output, int socket_fd) {
348
349 if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
350 return EXEC_OUTPUT_INHERIT;
351
352 return std_output;
353 }
354
355 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
356 ExecInput i;
357
358 assert(context);
359
360 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
361
362 switch (i) {
363
364 case EXEC_INPUT_NULL:
365 return open_null_as(O_RDONLY, STDIN_FILENO);
366
367 case EXEC_INPUT_TTY:
368 case EXEC_INPUT_TTY_FORCE:
369 case EXEC_INPUT_TTY_FAIL: {
370 int fd, r;
371
372 fd = acquire_terminal(tty_path(context),
373 i == EXEC_INPUT_TTY_FAIL,
374 i == EXEC_INPUT_TTY_FORCE,
375 false,
376 USEC_INFINITY);
377 if (fd < 0)
378 return fd;
379
380 if (fd != STDIN_FILENO) {
381 r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
382 safe_close(fd);
383 } else
384 r = STDIN_FILENO;
385
386 return r;
387 }
388
389 case EXEC_INPUT_SOCKET:
390 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
391
392 default:
393 assert_not_reached("Unknown input type");
394 }
395 }
396
397 static int setup_output(Unit *unit, const ExecContext *context, int fileno, int socket_fd, const char *ident, bool apply_tty_stdin, uid_t uid, gid_t gid) {
398 ExecOutput o;
399 ExecInput i;
400 int r;
401
402 assert(unit);
403 assert(context);
404 assert(ident);
405
406 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
407 o = fixup_output(context->std_output, socket_fd);
408
409 if (fileno == STDERR_FILENO) {
410 ExecOutput e;
411 e = fixup_output(context->std_error, socket_fd);
412
413 /* This expects the input and output are already set up */
414
415 /* Don't change the stderr file descriptor if we inherit all
416 * the way and are not on a tty */
417 if (e == EXEC_OUTPUT_INHERIT &&
418 o == EXEC_OUTPUT_INHERIT &&
419 i == EXEC_INPUT_NULL &&
420 !is_terminal_input(context->std_input) &&
421 getppid () != 1)
422 return fileno;
423
424 /* Duplicate from stdout if possible */
425 if (e == o || e == EXEC_OUTPUT_INHERIT)
426 return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
427
428 o = e;
429
430 } else if (o == EXEC_OUTPUT_INHERIT) {
431 /* If input got downgraded, inherit the original value */
432 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
433 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
434
435 /* If the input is connected to anything that's not a /dev/null, inherit that... */
436 if (i != EXEC_INPUT_NULL)
437 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
438
439 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
440 if (getppid() != 1)
441 return fileno;
442
443 /* We need to open /dev/null here anew, to get the right access mode. */
444 return open_null_as(O_WRONLY, fileno);
445 }
446
447 switch (o) {
448
449 case EXEC_OUTPUT_NULL:
450 return open_null_as(O_WRONLY, fileno);
451
452 case EXEC_OUTPUT_TTY:
453 if (is_terminal_input(i))
454 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
455
456 /* We don't reset the terminal if this is just about output */
457 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
458
459 case EXEC_OUTPUT_SYSLOG:
460 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
461 case EXEC_OUTPUT_KMSG:
462 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
463 case EXEC_OUTPUT_JOURNAL:
464 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
465 r = connect_logger_as(context, o, ident, unit->id, fileno, uid, gid);
466 if (r < 0) {
467 log_unit_error_errno(unit, r, "Failed to connect %s to the journal socket, ignoring: %m", fileno == STDOUT_FILENO ? "stdout" : "stderr");
468 r = open_null_as(O_WRONLY, fileno);
469 }
470 return r;
471
472 case EXEC_OUTPUT_SOCKET:
473 assert(socket_fd >= 0);
474 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
475
476 default:
477 assert_not_reached("Unknown error type");
478 }
479 }
480
481 static int chown_terminal(int fd, uid_t uid) {
482 struct stat st;
483
484 assert(fd >= 0);
485
486 /* This might fail. What matters are the results. */
487 (void) fchown(fd, uid, -1);
488 (void) fchmod(fd, TTY_MODE);
489
490 if (fstat(fd, &st) < 0)
491 return -errno;
492
493 if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
494 return -EPERM;
495
496 return 0;
497 }
498
499 static int setup_confirm_stdio(int *_saved_stdin,
500 int *_saved_stdout) {
501 int fd = -1, saved_stdin, saved_stdout = -1, r;
502
503 assert(_saved_stdin);
504 assert(_saved_stdout);
505
506 saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
507 if (saved_stdin < 0)
508 return -errno;
509
510 saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
511 if (saved_stdout < 0) {
512 r = errno;
513 goto fail;
514 }
515
516 fd = acquire_terminal(
517 "/dev/console",
518 false,
519 false,
520 false,
521 DEFAULT_CONFIRM_USEC);
522 if (fd < 0) {
523 r = fd;
524 goto fail;
525 }
526
527 r = chown_terminal(fd, getuid());
528 if (r < 0)
529 goto fail;
530
531 if (dup2(fd, STDIN_FILENO) < 0) {
532 r = -errno;
533 goto fail;
534 }
535
536 if (dup2(fd, STDOUT_FILENO) < 0) {
537 r = -errno;
538 goto fail;
539 }
540
541 if (fd >= 2)
542 safe_close(fd);
543
544 *_saved_stdin = saved_stdin;
545 *_saved_stdout = saved_stdout;
546
547 return 0;
548
549 fail:
550 safe_close(saved_stdout);
551 safe_close(saved_stdin);
552 safe_close(fd);
553
554 return r;
555 }
556
557 _printf_(1, 2) static int write_confirm_message(const char *format, ...) {
558 _cleanup_close_ int fd = -1;
559 va_list ap;
560
561 assert(format);
562
563 fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
564 if (fd < 0)
565 return fd;
566
567 va_start(ap, format);
568 vdprintf(fd, format, ap);
569 va_end(ap);
570
571 return 0;
572 }
573
574 static int restore_confirm_stdio(int *saved_stdin,
575 int *saved_stdout) {
576
577 int r = 0;
578
579 assert(saved_stdin);
580 assert(saved_stdout);
581
582 release_terminal();
583
584 if (*saved_stdin >= 0)
585 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
586 r = -errno;
587
588 if (*saved_stdout >= 0)
589 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
590 r = -errno;
591
592 safe_close(*saved_stdin);
593 safe_close(*saved_stdout);
594
595 return r;
596 }
597
598 static int ask_for_confirmation(char *response, char **argv) {
599 int saved_stdout = -1, saved_stdin = -1, r;
600 _cleanup_free_ char *line = NULL;
601
602 r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
603 if (r < 0)
604 return r;
605
606 line = exec_command_line(argv);
607 if (!line)
608 return -ENOMEM;
609
610 r = ask_char(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
611
612 restore_confirm_stdio(&saved_stdin, &saved_stdout);
613
614 return r;
615 }
616
617 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
618 bool keep_groups = false;
619 int r;
620
621 assert(context);
622
623 /* Lookup and set GID and supplementary group list. Here too
624 * we avoid NSS lookups for gid=0. */
625
626 if (context->group || username) {
627
628 if (context->group) {
629 const char *g = context->group;
630
631 if ((r = get_group_creds(&g, &gid)) < 0)
632 return r;
633 }
634
635 /* First step, initialize groups from /etc/groups */
636 if (username && gid != 0) {
637 if (initgroups(username, gid) < 0)
638 return -errno;
639
640 keep_groups = true;
641 }
642
643 /* Second step, set our gids */
644 if (setresgid(gid, gid, gid) < 0)
645 return -errno;
646 }
647
648 if (context->supplementary_groups) {
649 int ngroups_max, k;
650 gid_t *gids;
651 char **i;
652
653 /* Final step, initialize any manually set supplementary groups */
654 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
655
656 if (!(gids = new(gid_t, ngroups_max)))
657 return -ENOMEM;
658
659 if (keep_groups) {
660 if ((k = getgroups(ngroups_max, gids)) < 0) {
661 free(gids);
662 return -errno;
663 }
664 } else
665 k = 0;
666
667 STRV_FOREACH(i, context->supplementary_groups) {
668 const char *g;
669
670 if (k >= ngroups_max) {
671 free(gids);
672 return -E2BIG;
673 }
674
675 g = *i;
676 r = get_group_creds(&g, gids+k);
677 if (r < 0) {
678 free(gids);
679 return r;
680 }
681
682 k++;
683 }
684
685 if (setgroups(k, gids) < 0) {
686 free(gids);
687 return -errno;
688 }
689
690 free(gids);
691 }
692
693 return 0;
694 }
695
696 static int enforce_user(const ExecContext *context, uid_t uid) {
697 assert(context);
698
699 /* Sets (but doesn't lookup) the uid and make sure we keep the
700 * capabilities while doing so. */
701
702 if (context->capabilities) {
703 _cleanup_cap_free_ cap_t d = NULL;
704 static const cap_value_t bits[] = {
705 CAP_SETUID, /* Necessary so that we can run setresuid() below */
706 CAP_SETPCAP /* Necessary so that we can set PR_SET_SECUREBITS later on */
707 };
708
709 /* First step: If we need to keep capabilities but
710 * drop privileges we need to make sure we keep our
711 * caps, while we drop privileges. */
712 if (uid != 0) {
713 int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
714
715 if (prctl(PR_GET_SECUREBITS) != sb)
716 if (prctl(PR_SET_SECUREBITS, sb) < 0)
717 return -errno;
718 }
719
720 /* Second step: set the capabilities. This will reduce
721 * the capabilities to the minimum we need. */
722
723 d = cap_dup(context->capabilities);
724 if (!d)
725 return -errno;
726
727 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
728 cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0)
729 return -errno;
730
731 if (cap_set_proc(d) < 0)
732 return -errno;
733 }
734
735 /* Third step: actually set the uids */
736 if (setresuid(uid, uid, uid) < 0)
737 return -errno;
738
739 /* At this point we should have all necessary capabilities but
740 are otherwise a normal user. However, the caps might got
741 corrupted due to the setresuid() so we need clean them up
742 later. This is done outside of this call. */
743
744 return 0;
745 }
746
747 #ifdef HAVE_PAM
748
749 static int null_conv(
750 int num_msg,
751 const struct pam_message **msg,
752 struct pam_response **resp,
753 void *appdata_ptr) {
754
755 /* We don't support conversations */
756
757 return PAM_CONV_ERR;
758 }
759
760 static int setup_pam(
761 const char *name,
762 const char *user,
763 uid_t uid,
764 const char *tty,
765 char ***pam_env,
766 int fds[], unsigned n_fds) {
767
768 static const struct pam_conv conv = {
769 .conv = null_conv,
770 .appdata_ptr = NULL
771 };
772
773 pam_handle_t *handle = NULL;
774 sigset_t old_ss;
775 int pam_code = PAM_SUCCESS;
776 int err;
777 char **e = NULL;
778 bool close_session = false;
779 pid_t pam_pid = 0, parent_pid;
780 int flags = 0;
781
782 assert(name);
783 assert(user);
784 assert(pam_env);
785
786 /* We set up PAM in the parent process, then fork. The child
787 * will then stay around until killed via PR_GET_PDEATHSIG or
788 * systemd via the cgroup logic. It will then remove the PAM
789 * session again. The parent process will exec() the actual
790 * daemon. We do things this way to ensure that the main PID
791 * of the daemon is the one we initially fork()ed. */
792
793 if (log_get_max_level() < LOG_DEBUG)
794 flags |= PAM_SILENT;
795
796 pam_code = pam_start(name, user, &conv, &handle);
797 if (pam_code != PAM_SUCCESS) {
798 handle = NULL;
799 goto fail;
800 }
801
802 if (tty) {
803 pam_code = pam_set_item(handle, PAM_TTY, tty);
804 if (pam_code != PAM_SUCCESS)
805 goto fail;
806 }
807
808 pam_code = pam_acct_mgmt(handle, flags);
809 if (pam_code != PAM_SUCCESS)
810 goto fail;
811
812 pam_code = pam_open_session(handle, flags);
813 if (pam_code != PAM_SUCCESS)
814 goto fail;
815
816 close_session = true;
817
818 e = pam_getenvlist(handle);
819 if (!e) {
820 pam_code = PAM_BUF_ERR;
821 goto fail;
822 }
823
824 /* Block SIGTERM, so that we know that it won't get lost in
825 * the child */
826
827 assert_se(sigprocmask_many(SIG_BLOCK, &old_ss, SIGTERM, -1) >= 0);
828
829 parent_pid = getpid();
830
831 pam_pid = fork();
832 if (pam_pid < 0)
833 goto fail;
834
835 if (pam_pid == 0) {
836 int sig;
837 int r = EXIT_PAM;
838
839 /* The child's job is to reset the PAM session on
840 * termination */
841
842 /* This string must fit in 10 chars (i.e. the length
843 * of "/sbin/init"), to look pretty in /bin/ps */
844 rename_process("(sd-pam)");
845
846 /* Make sure we don't keep open the passed fds in this
847 child. We assume that otherwise only those fds are
848 open here that have been opened by PAM. */
849 close_many(fds, n_fds);
850
851 /* Drop privileges - we don't need any to pam_close_session
852 * and this will make PR_SET_PDEATHSIG work in most cases.
853 * If this fails, ignore the error - but expect sd-pam threads
854 * to fail to exit normally */
855 if (setresuid(uid, uid, uid) < 0)
856 log_error_errno(r, "Error: Failed to setresuid() in sd-pam: %m");
857
858 (void) ignore_signals(SIGPIPE, -1);
859
860 /* Wait until our parent died. This will only work if
861 * the above setresuid() succeeds, otherwise the kernel
862 * will not allow unprivileged parents kill their privileged
863 * children this way. We rely on the control groups kill logic
864 * to do the rest for us. */
865 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
866 goto child_finish;
867
868 /* Check if our parent process might already have
869 * died? */
870 if (getppid() == parent_pid) {
871 sigset_t ss;
872
873 assert_se(sigemptyset(&ss) >= 0);
874 assert_se(sigaddset(&ss, SIGTERM) >= 0);
875
876 for (;;) {
877 if (sigwait(&ss, &sig) < 0) {
878 if (errno == EINTR)
879 continue;
880
881 goto child_finish;
882 }
883
884 assert(sig == SIGTERM);
885 break;
886 }
887 }
888
889 /* If our parent died we'll end the session */
890 if (getppid() != parent_pid) {
891 pam_code = pam_close_session(handle, flags);
892 if (pam_code != PAM_SUCCESS)
893 goto child_finish;
894 }
895
896 r = 0;
897
898 child_finish:
899 pam_end(handle, pam_code | flags);
900 _exit(r);
901 }
902
903 /* If the child was forked off successfully it will do all the
904 * cleanups, so forget about the handle here. */
905 handle = NULL;
906
907 /* Unblock SIGTERM again in the parent */
908 assert_se(sigprocmask(SIG_SETMASK, &old_ss, NULL) >= 0);
909
910 /* We close the log explicitly here, since the PAM modules
911 * might have opened it, but we don't want this fd around. */
912 closelog();
913
914 *pam_env = e;
915 e = NULL;
916
917 return 0;
918
919 fail:
920 if (pam_code != PAM_SUCCESS) {
921 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
922 err = -EPERM; /* PAM errors do not map to errno */
923 } else {
924 log_error_errno(errno, "PAM failed: %m");
925 err = -errno;
926 }
927
928 if (handle) {
929 if (close_session)
930 pam_code = pam_close_session(handle, flags);
931
932 pam_end(handle, pam_code | flags);
933 }
934
935 strv_free(e);
936
937 closelog();
938
939 if (pam_pid > 1) {
940 kill(pam_pid, SIGTERM);
941 kill(pam_pid, SIGCONT);
942 }
943
944 return err;
945 }
946 #endif
947
948 static void rename_process_from_path(const char *path) {
949 char process_name[11];
950 const char *p;
951 size_t l;
952
953 /* This resulting string must fit in 10 chars (i.e. the length
954 * of "/sbin/init") to look pretty in /bin/ps */
955
956 p = basename(path);
957 if (isempty(p)) {
958 rename_process("(...)");
959 return;
960 }
961
962 l = strlen(p);
963 if (l > 8) {
964 /* The end of the process name is usually more
965 * interesting, since the first bit might just be
966 * "systemd-" */
967 p = p + l - 8;
968 l = 8;
969 }
970
971 process_name[0] = '(';
972 memcpy(process_name+1, p, l);
973 process_name[1+l] = ')';
974 process_name[1+l+1] = 0;
975
976 rename_process(process_name);
977 }
978
979 #ifdef HAVE_SECCOMP
980
981 static int apply_seccomp(const ExecContext *c) {
982 uint32_t negative_action, action;
983 scmp_filter_ctx *seccomp;
984 Iterator i;
985 void *id;
986 int r;
987
988 assert(c);
989
990 negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
991
992 seccomp = seccomp_init(c->syscall_whitelist ? negative_action : SCMP_ACT_ALLOW);
993 if (!seccomp)
994 return -ENOMEM;
995
996 if (c->syscall_archs) {
997
998 SET_FOREACH(id, c->syscall_archs, i) {
999 r = seccomp_arch_add(seccomp, PTR_TO_UINT32(id) - 1);
1000 if (r == -EEXIST)
1001 continue;
1002 if (r < 0)
1003 goto finish;
1004 }
1005
1006 } else {
1007 r = seccomp_add_secondary_archs(seccomp);
1008 if (r < 0)
1009 goto finish;
1010 }
1011
1012 action = c->syscall_whitelist ? SCMP_ACT_ALLOW : negative_action;
1013 SET_FOREACH(id, c->syscall_filter, i) {
1014 r = seccomp_rule_add(seccomp, action, PTR_TO_INT(id) - 1, 0);
1015 if (r < 0)
1016 goto finish;
1017 }
1018
1019 r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1020 if (r < 0)
1021 goto finish;
1022
1023 r = seccomp_load(seccomp);
1024
1025 finish:
1026 seccomp_release(seccomp);
1027 return r;
1028 }
1029
1030 static int apply_address_families(const ExecContext *c) {
1031 scmp_filter_ctx *seccomp;
1032 Iterator i;
1033 int r;
1034
1035 assert(c);
1036
1037 seccomp = seccomp_init(SCMP_ACT_ALLOW);
1038 if (!seccomp)
1039 return -ENOMEM;
1040
1041 r = seccomp_add_secondary_archs(seccomp);
1042 if (r < 0)
1043 goto finish;
1044
1045 if (c->address_families_whitelist) {
1046 int af, first = 0, last = 0;
1047 void *afp;
1048
1049 /* If this is a whitelist, we first block the address
1050 * families that are out of range and then everything
1051 * that is not in the set. First, we find the lowest
1052 * and highest address family in the set. */
1053
1054 SET_FOREACH(afp, c->address_families, i) {
1055 af = PTR_TO_INT(afp);
1056
1057 if (af <= 0 || af >= af_max())
1058 continue;
1059
1060 if (first == 0 || af < first)
1061 first = af;
1062
1063 if (last == 0 || af > last)
1064 last = af;
1065 }
1066
1067 assert((first == 0) == (last == 0));
1068
1069 if (first == 0) {
1070
1071 /* No entries in the valid range, block everything */
1072 r = seccomp_rule_add(
1073 seccomp,
1074 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1075 SCMP_SYS(socket),
1076 0);
1077 if (r < 0)
1078 goto finish;
1079
1080 } else {
1081
1082 /* Block everything below the first entry */
1083 r = seccomp_rule_add(
1084 seccomp,
1085 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1086 SCMP_SYS(socket),
1087 1,
1088 SCMP_A0(SCMP_CMP_LT, first));
1089 if (r < 0)
1090 goto finish;
1091
1092 /* Block everything above the last entry */
1093 r = seccomp_rule_add(
1094 seccomp,
1095 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1096 SCMP_SYS(socket),
1097 1,
1098 SCMP_A0(SCMP_CMP_GT, last));
1099 if (r < 0)
1100 goto finish;
1101
1102 /* Block everything between the first and last
1103 * entry */
1104 for (af = 1; af < af_max(); af++) {
1105
1106 if (set_contains(c->address_families, INT_TO_PTR(af)))
1107 continue;
1108
1109 r = seccomp_rule_add(
1110 seccomp,
1111 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1112 SCMP_SYS(socket),
1113 1,
1114 SCMP_A0(SCMP_CMP_EQ, af));
1115 if (r < 0)
1116 goto finish;
1117 }
1118 }
1119
1120 } else {
1121 void *af;
1122
1123 /* If this is a blacklist, then generate one rule for
1124 * each address family that are then combined in OR
1125 * checks. */
1126
1127 SET_FOREACH(af, c->address_families, i) {
1128
1129 r = seccomp_rule_add(
1130 seccomp,
1131 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1132 SCMP_SYS(socket),
1133 1,
1134 SCMP_A0(SCMP_CMP_EQ, PTR_TO_INT(af)));
1135 if (r < 0)
1136 goto finish;
1137 }
1138 }
1139
1140 r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1141 if (r < 0)
1142 goto finish;
1143
1144 r = seccomp_load(seccomp);
1145
1146 finish:
1147 seccomp_release(seccomp);
1148 return r;
1149 }
1150
1151 #endif
1152
1153 static void do_idle_pipe_dance(int idle_pipe[4]) {
1154 assert(idle_pipe);
1155
1156
1157 safe_close(idle_pipe[1]);
1158 safe_close(idle_pipe[2]);
1159
1160 if (idle_pipe[0] >= 0) {
1161 int r;
1162
1163 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1164
1165 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
1166 /* Signal systemd that we are bored and want to continue. */
1167 r = write(idle_pipe[3], "x", 1);
1168 if (r > 0)
1169 /* Wait for systemd to react to the signal above. */
1170 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
1171 }
1172
1173 safe_close(idle_pipe[0]);
1174
1175 }
1176
1177 safe_close(idle_pipe[3]);
1178 }
1179
1180 static int build_environment(
1181 const ExecContext *c,
1182 unsigned n_fds,
1183 usec_t watchdog_usec,
1184 const char *home,
1185 const char *username,
1186 const char *shell,
1187 char ***ret) {
1188
1189 _cleanup_strv_free_ char **our_env = NULL;
1190 unsigned n_env = 0;
1191 char *x;
1192
1193 assert(c);
1194 assert(ret);
1195
1196 our_env = new0(char*, 10);
1197 if (!our_env)
1198 return -ENOMEM;
1199
1200 if (n_fds > 0) {
1201 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid()) < 0)
1202 return -ENOMEM;
1203 our_env[n_env++] = x;
1204
1205 if (asprintf(&x, "LISTEN_FDS=%u", n_fds) < 0)
1206 return -ENOMEM;
1207 our_env[n_env++] = x;
1208 }
1209
1210 if (watchdog_usec > 0) {
1211 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid()) < 0)
1212 return -ENOMEM;
1213 our_env[n_env++] = x;
1214
1215 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, watchdog_usec) < 0)
1216 return -ENOMEM;
1217 our_env[n_env++] = x;
1218 }
1219
1220 if (home) {
1221 x = strappend("HOME=", home);
1222 if (!x)
1223 return -ENOMEM;
1224 our_env[n_env++] = x;
1225 }
1226
1227 if (username) {
1228 x = strappend("LOGNAME=", username);
1229 if (!x)
1230 return -ENOMEM;
1231 our_env[n_env++] = x;
1232
1233 x = strappend("USER=", username);
1234 if (!x)
1235 return -ENOMEM;
1236 our_env[n_env++] = x;
1237 }
1238
1239 if (shell) {
1240 x = strappend("SHELL=", shell);
1241 if (!x)
1242 return -ENOMEM;
1243 our_env[n_env++] = x;
1244 }
1245
1246 if (is_terminal_input(c->std_input) ||
1247 c->std_output == EXEC_OUTPUT_TTY ||
1248 c->std_error == EXEC_OUTPUT_TTY ||
1249 c->tty_path) {
1250
1251 x = strdup(default_term_for_tty(tty_path(c)));
1252 if (!x)
1253 return -ENOMEM;
1254 our_env[n_env++] = x;
1255 }
1256
1257 our_env[n_env++] = NULL;
1258 assert(n_env <= 10);
1259
1260 *ret = our_env;
1261 our_env = NULL;
1262
1263 return 0;
1264 }
1265
1266 static bool exec_needs_mount_namespace(
1267 const ExecContext *context,
1268 const ExecParameters *params,
1269 ExecRuntime *runtime) {
1270
1271 assert(context);
1272 assert(params);
1273
1274 if (!strv_isempty(context->read_write_dirs) ||
1275 !strv_isempty(context->read_only_dirs) ||
1276 !strv_isempty(context->inaccessible_dirs))
1277 return true;
1278
1279 if (context->mount_flags != 0)
1280 return true;
1281
1282 if (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir))
1283 return true;
1284
1285 if (params->bus_endpoint_path)
1286 return true;
1287
1288 if (context->private_devices ||
1289 context->protect_system != PROTECT_SYSTEM_NO ||
1290 context->protect_home != PROTECT_HOME_NO)
1291 return true;
1292
1293 return false;
1294 }
1295
1296 static int exec_child(
1297 Unit *unit,
1298 ExecCommand *command,
1299 const ExecContext *context,
1300 const ExecParameters *params,
1301 ExecRuntime *runtime,
1302 char **argv,
1303 int socket_fd,
1304 int *fds, unsigned n_fds,
1305 char **files_env,
1306 int *exit_status) {
1307
1308 _cleanup_strv_free_ char **our_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL;
1309 _cleanup_free_ char *mac_selinux_context_net = NULL;
1310 const char *username = NULL, *home = NULL, *shell = NULL;
1311 unsigned n_dont_close = 0;
1312 int dont_close[n_fds + 4];
1313 uid_t uid = UID_INVALID;
1314 gid_t gid = GID_INVALID;
1315 int i, r;
1316 bool needs_mount_namespace;
1317
1318 assert(unit);
1319 assert(command);
1320 assert(context);
1321 assert(params);
1322 assert(exit_status);
1323
1324 rename_process_from_path(command->path);
1325
1326 /* We reset exactly these signals, since they are the
1327 * only ones we set to SIG_IGN in the main daemon. All
1328 * others we leave untouched because we set them to
1329 * SIG_DFL or a valid handler initially, both of which
1330 * will be demoted to SIG_DFL. */
1331 (void) default_signals(SIGNALS_CRASH_HANDLER,
1332 SIGNALS_IGNORE, -1);
1333
1334 if (context->ignore_sigpipe)
1335 (void) ignore_signals(SIGPIPE, -1);
1336
1337 r = reset_signal_mask();
1338 if (r < 0) {
1339 *exit_status = EXIT_SIGNAL_MASK;
1340 return r;
1341 }
1342
1343 if (params->idle_pipe)
1344 do_idle_pipe_dance(params->idle_pipe);
1345
1346 /* Close sockets very early to make sure we don't
1347 * block init reexecution because it cannot bind its
1348 * sockets */
1349
1350 log_forget_fds();
1351
1352 if (socket_fd >= 0)
1353 dont_close[n_dont_close++] = socket_fd;
1354 if (n_fds > 0) {
1355 memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
1356 n_dont_close += n_fds;
1357 }
1358 if (params->bus_endpoint_fd >= 0)
1359 dont_close[n_dont_close++] = params->bus_endpoint_fd;
1360 if (runtime) {
1361 if (runtime->netns_storage_socket[0] >= 0)
1362 dont_close[n_dont_close++] = runtime->netns_storage_socket[0];
1363 if (runtime->netns_storage_socket[1] >= 0)
1364 dont_close[n_dont_close++] = runtime->netns_storage_socket[1];
1365 }
1366
1367 r = close_all_fds(dont_close, n_dont_close);
1368 if (r < 0) {
1369 *exit_status = EXIT_FDS;
1370 return r;
1371 }
1372
1373 if (!context->same_pgrp)
1374 if (setsid() < 0) {
1375 *exit_status = EXIT_SETSID;
1376 return -errno;
1377 }
1378
1379 exec_context_tty_reset(context);
1380
1381 if (params->confirm_spawn) {
1382 char response;
1383
1384 r = ask_for_confirmation(&response, argv);
1385 if (r == -ETIMEDOUT)
1386 write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1387 else if (r < 0)
1388 write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-r));
1389 else if (response == 's') {
1390 write_confirm_message("Skipping execution.\n");
1391 *exit_status = EXIT_CONFIRM;
1392 return -ECANCELED;
1393 } else if (response == 'n') {
1394 write_confirm_message("Failing execution.\n");
1395 *exit_status = 0;
1396 return 0;
1397 }
1398 }
1399
1400 if (context->user) {
1401 username = context->user;
1402 r = get_user_creds(&username, &uid, &gid, &home, &shell);
1403 if (r < 0) {
1404 *exit_status = EXIT_USER;
1405 return r;
1406 }
1407 }
1408
1409 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1410 * must sure to drop O_NONBLOCK */
1411 if (socket_fd >= 0)
1412 fd_nonblock(socket_fd, false);
1413
1414 r = setup_input(context, socket_fd, params->apply_tty_stdin);
1415 if (r < 0) {
1416 *exit_status = EXIT_STDIN;
1417 return r;
1418 }
1419
1420 r = setup_output(unit, context, STDOUT_FILENO, socket_fd, basename(command->path), params->apply_tty_stdin, uid, gid);
1421 if (r < 0) {
1422 *exit_status = EXIT_STDOUT;
1423 return r;
1424 }
1425
1426 r = setup_output(unit, context, STDERR_FILENO, socket_fd, basename(command->path), params->apply_tty_stdin, uid, gid);
1427 if (r < 0) {
1428 *exit_status = EXIT_STDERR;
1429 return r;
1430 }
1431
1432 if (params->cgroup_path) {
1433 r = cg_attach_everywhere(params->cgroup_supported, params->cgroup_path, 0, NULL, NULL);
1434 if (r < 0) {
1435 *exit_status = EXIT_CGROUP;
1436 return r;
1437 }
1438 }
1439
1440 if (context->oom_score_adjust_set) {
1441 char t[DECIMAL_STR_MAX(context->oom_score_adjust)];
1442
1443 /* When we can't make this change due to EPERM, then
1444 * let's silently skip over it. User namespaces
1445 * prohibit write access to this file, and we
1446 * shouldn't trip up over that. */
1447
1448 sprintf(t, "%i", context->oom_score_adjust);
1449 r = write_string_file("/proc/self/oom_score_adj", t);
1450 if (r == -EPERM || r == -EACCES) {
1451 log_open();
1452 log_unit_debug_errno(unit, r, "Failed to adjust OOM setting, assuming containerized execution, ignoring: %m");
1453 log_close();
1454 } else if (r < 0) {
1455 *exit_status = EXIT_OOM_ADJUST;
1456 return -errno;
1457 }
1458 }
1459
1460 if (context->nice_set)
1461 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1462 *exit_status = EXIT_NICE;
1463 return -errno;
1464 }
1465
1466 if (context->cpu_sched_set) {
1467 struct sched_param param = {
1468 .sched_priority = context->cpu_sched_priority,
1469 };
1470
1471 r = sched_setscheduler(0,
1472 context->cpu_sched_policy |
1473 (context->cpu_sched_reset_on_fork ?
1474 SCHED_RESET_ON_FORK : 0),
1475 &param);
1476 if (r < 0) {
1477 *exit_status = EXIT_SETSCHEDULER;
1478 return -errno;
1479 }
1480 }
1481
1482 if (context->cpuset)
1483 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1484 *exit_status = EXIT_CPUAFFINITY;
1485 return -errno;
1486 }
1487
1488 if (context->ioprio_set)
1489 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1490 *exit_status = EXIT_IOPRIO;
1491 return -errno;
1492 }
1493
1494 if (context->timer_slack_nsec != NSEC_INFINITY)
1495 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1496 *exit_status = EXIT_TIMERSLACK;
1497 return -errno;
1498 }
1499
1500 if (context->personality != PERSONALITY_INVALID)
1501 if (personality(context->personality) < 0) {
1502 *exit_status = EXIT_PERSONALITY;
1503 return -errno;
1504 }
1505
1506 if (context->utmp_id)
1507 utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1508
1509 if (context->user && is_terminal_input(context->std_input)) {
1510 r = chown_terminal(STDIN_FILENO, uid);
1511 if (r < 0) {
1512 *exit_status = EXIT_STDIN;
1513 return r;
1514 }
1515 }
1516
1517 if (params->bus_endpoint_fd >= 0 && context->bus_endpoint) {
1518 uid_t ep_uid = (uid == UID_INVALID) ? 0 : uid;
1519
1520 r = bus_kernel_set_endpoint_policy(params->bus_endpoint_fd, ep_uid, context->bus_endpoint);
1521 if (r < 0) {
1522 *exit_status = EXIT_BUS_ENDPOINT;
1523 return r;
1524 }
1525 }
1526
1527 /* If delegation is enabled we'll pass ownership of the cgroup
1528 * (but only in systemd's own controller hierarchy!) to the
1529 * user of the new process. */
1530 if (params->cgroup_path && context->user && params->cgroup_delegate) {
1531 r = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0644, uid, gid);
1532 if (r < 0) {
1533 *exit_status = EXIT_CGROUP;
1534 return r;
1535 }
1536
1537
1538 r = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0755, uid, gid);
1539 if (r < 0) {
1540 *exit_status = EXIT_CGROUP;
1541 return r;
1542 }
1543 }
1544
1545 if (!strv_isempty(context->runtime_directory) && params->runtime_prefix) {
1546 char **rt;
1547
1548 STRV_FOREACH(rt, context->runtime_directory) {
1549 _cleanup_free_ char *p;
1550
1551 p = strjoin(params->runtime_prefix, "/", *rt, NULL);
1552 if (!p) {
1553 *exit_status = EXIT_RUNTIME_DIRECTORY;
1554 return -ENOMEM;
1555 }
1556
1557 r = mkdir_safe_label(p, context->runtime_directory_mode, uid, gid);
1558 if (r < 0) {
1559 *exit_status = EXIT_RUNTIME_DIRECTORY;
1560 return r;
1561 }
1562 }
1563 }
1564
1565 if (params->apply_permissions) {
1566 r = enforce_groups(context, username, gid);
1567 if (r < 0) {
1568 *exit_status = EXIT_GROUP;
1569 return r;
1570 }
1571 }
1572
1573 umask(context->umask);
1574
1575 #ifdef HAVE_PAM
1576 if (params->apply_permissions && context->pam_name && username) {
1577 r = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1578 if (r < 0) {
1579 *exit_status = EXIT_PAM;
1580 return r;
1581 }
1582 }
1583 #endif
1584
1585 if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
1586 r = setup_netns(runtime->netns_storage_socket);
1587 if (r < 0) {
1588 *exit_status = EXIT_NETWORK;
1589 return r;
1590 }
1591 }
1592
1593 needs_mount_namespace = exec_needs_mount_namespace(context, params, runtime);
1594
1595 if (needs_mount_namespace) {
1596 char *tmp = NULL, *var = NULL;
1597
1598 /* The runtime struct only contains the parent
1599 * of the private /tmp, which is
1600 * non-accessible to world users. Inside of it
1601 * there's a /tmp that is sticky, and that's
1602 * the one we want to use here. */
1603
1604 if (context->private_tmp && runtime) {
1605 if (runtime->tmp_dir)
1606 tmp = strjoina(runtime->tmp_dir, "/tmp");
1607 if (runtime->var_tmp_dir)
1608 var = strjoina(runtime->var_tmp_dir, "/tmp");
1609 }
1610
1611 r = setup_namespace(
1612 params->apply_chroot ? context->root_directory : NULL,
1613 context->read_write_dirs,
1614 context->read_only_dirs,
1615 context->inaccessible_dirs,
1616 tmp,
1617 var,
1618 params->bus_endpoint_path,
1619 context->private_devices,
1620 context->protect_home,
1621 context->protect_system,
1622 context->mount_flags);
1623
1624 /* If we couldn't set up the namespace this is
1625 * probably due to a missing capability. In this case,
1626 * silently proceeed. */
1627 if (r == -EPERM || r == -EACCES) {
1628 log_open();
1629 log_unit_debug_errno(unit, r, "Failed to set up namespace, assuming containerized execution, ignoring: %m");
1630 log_close();
1631 } else if (r < 0) {
1632 *exit_status = EXIT_NAMESPACE;
1633 return r;
1634 }
1635 }
1636
1637 if (params->apply_chroot) {
1638 if (!needs_mount_namespace && context->root_directory)
1639 if (chroot(context->root_directory) < 0) {
1640 *exit_status = EXIT_CHROOT;
1641 return -errno;
1642 }
1643
1644 if (chdir(context->working_directory ?: "/") < 0 &&
1645 !context->working_directory_missing_ok) {
1646 *exit_status = EXIT_CHDIR;
1647 return -errno;
1648 }
1649 } else {
1650 _cleanup_free_ char *d = NULL;
1651
1652 if (asprintf(&d, "%s/%s",
1653 context->root_directory ?: "",
1654 context->working_directory ?: "") < 0) {
1655 *exit_status = EXIT_MEMORY;
1656 return -ENOMEM;
1657 }
1658
1659 if (chdir(d) < 0 &&
1660 !context->working_directory_missing_ok) {
1661 *exit_status = EXIT_CHDIR;
1662 return -errno;
1663 }
1664 }
1665
1666 #ifdef HAVE_SELINUX
1667 if (params->apply_permissions && mac_selinux_use() && params->selinux_context_net && socket_fd >= 0) {
1668 r = mac_selinux_get_child_mls_label(socket_fd, command->path, context->selinux_context, &mac_selinux_context_net);
1669 if (r < 0) {
1670 *exit_status = EXIT_SELINUX_CONTEXT;
1671 return r;
1672 }
1673 }
1674 #endif
1675
1676 /* We repeat the fd closing here, to make sure that
1677 * nothing is leaked from the PAM modules. Note that
1678 * we are more aggressive this time since socket_fd
1679 * and the netns fds we don't need anymore. The custom
1680 * endpoint fd was needed to upload the policy and can
1681 * now be closed as well. */
1682 r = close_all_fds(fds, n_fds);
1683 if (r >= 0)
1684 r = shift_fds(fds, n_fds);
1685 if (r >= 0)
1686 r = flags_fds(fds, n_fds, context->non_blocking);
1687 if (r < 0) {
1688 *exit_status = EXIT_FDS;
1689 return r;
1690 }
1691
1692 if (params->apply_permissions) {
1693
1694 for (i = 0; i < _RLIMIT_MAX; i++) {
1695 if (!context->rlimit[i])
1696 continue;
1697
1698 if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1699 *exit_status = EXIT_LIMITS;
1700 return -errno;
1701 }
1702 }
1703
1704 if (context->capability_bounding_set_drop) {
1705 r = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1706 if (r < 0) {
1707 *exit_status = EXIT_CAPABILITIES;
1708 return r;
1709 }
1710 }
1711
1712 #ifdef HAVE_SMACK
1713 if (context->smack_process_label) {
1714 r = mac_smack_apply_pid(0, context->smack_process_label);
1715 if (r < 0) {
1716 *exit_status = EXIT_SMACK_PROCESS_LABEL;
1717 return r;
1718 }
1719 }
1720 #endif
1721
1722 if (context->user) {
1723 r = enforce_user(context, uid);
1724 if (r < 0) {
1725 *exit_status = EXIT_USER;
1726 return r;
1727 }
1728 }
1729
1730 /* PR_GET_SECUREBITS is not privileged, while
1731 * PR_SET_SECUREBITS is. So to suppress
1732 * potential EPERMs we'll try not to call
1733 * PR_SET_SECUREBITS unless necessary. */
1734 if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1735 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1736 *exit_status = EXIT_SECUREBITS;
1737 return -errno;
1738 }
1739
1740 if (context->capabilities)
1741 if (cap_set_proc(context->capabilities) < 0) {
1742 *exit_status = EXIT_CAPABILITIES;
1743 return -errno;
1744 }
1745
1746 if (context->no_new_privileges)
1747 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1748 *exit_status = EXIT_NO_NEW_PRIVILEGES;
1749 return -errno;
1750 }
1751
1752 #ifdef HAVE_SECCOMP
1753 if (context->address_families_whitelist ||
1754 !set_isempty(context->address_families)) {
1755 r = apply_address_families(context);
1756 if (r < 0) {
1757 *exit_status = EXIT_ADDRESS_FAMILIES;
1758 return r;
1759 }
1760 }
1761
1762 if (context->syscall_whitelist ||
1763 !set_isempty(context->syscall_filter) ||
1764 !set_isempty(context->syscall_archs)) {
1765 r = apply_seccomp(context);
1766 if (r < 0) {
1767 *exit_status = EXIT_SECCOMP;
1768 return r;
1769 }
1770 }
1771 #endif
1772
1773 #ifdef HAVE_SELINUX
1774 if (mac_selinux_use()) {
1775 char *exec_context = mac_selinux_context_net ?: context->selinux_context;
1776
1777 if (exec_context) {
1778 r = setexeccon(exec_context);
1779 if (r < 0) {
1780 *exit_status = EXIT_SELINUX_CONTEXT;
1781 return r;
1782 }
1783 }
1784 }
1785 #endif
1786
1787 #ifdef HAVE_APPARMOR
1788 if (context->apparmor_profile && mac_apparmor_use()) {
1789 r = aa_change_onexec(context->apparmor_profile);
1790 if (r < 0 && !context->apparmor_profile_ignore) {
1791 *exit_status = EXIT_APPARMOR_PROFILE;
1792 return -errno;
1793 }
1794 }
1795 #endif
1796 }
1797
1798 r = build_environment(context, n_fds, params->watchdog_usec, home, username, shell, &our_env);
1799 if (r < 0) {
1800 *exit_status = EXIT_MEMORY;
1801 return r;
1802 }
1803
1804 final_env = strv_env_merge(5,
1805 params->environment,
1806 our_env,
1807 context->environment,
1808 files_env,
1809 pam_env,
1810 NULL);
1811 if (!final_env) {
1812 *exit_status = EXIT_MEMORY;
1813 return -ENOMEM;
1814 }
1815
1816 final_argv = replace_env_argv(argv, final_env);
1817 if (!final_argv) {
1818 *exit_status = EXIT_MEMORY;
1819 return -ENOMEM;
1820 }
1821
1822 final_env = strv_env_clean(final_env);
1823
1824 if (_unlikely_(log_get_max_level() >= LOG_DEBUG)) {
1825 _cleanup_free_ char *line;
1826
1827 line = exec_command_line(final_argv);
1828 if (line) {
1829 log_open();
1830 log_struct(LOG_DEBUG,
1831 LOG_UNIT_ID(unit),
1832 "EXECUTABLE=%s", command->path,
1833 LOG_UNIT_MESSAGE(unit, "Executing: %s", line),
1834 NULL);
1835 log_close();
1836 }
1837 }
1838
1839 execve(command->path, final_argv, final_env);
1840 *exit_status = EXIT_EXEC;
1841 return -errno;
1842 }
1843
1844 int exec_spawn(Unit *unit,
1845 ExecCommand *command,
1846 const ExecContext *context,
1847 const ExecParameters *params,
1848 ExecRuntime *runtime,
1849 pid_t *ret) {
1850
1851 _cleanup_strv_free_ char **files_env = NULL;
1852 int *fds = NULL; unsigned n_fds = 0;
1853 _cleanup_free_ char *line = NULL;
1854 int socket_fd, r;
1855 char **argv;
1856 pid_t pid;
1857
1858 assert(unit);
1859 assert(command);
1860 assert(context);
1861 assert(ret);
1862 assert(params);
1863 assert(params->fds || params->n_fds <= 0);
1864
1865 if (context->std_input == EXEC_INPUT_SOCKET ||
1866 context->std_output == EXEC_OUTPUT_SOCKET ||
1867 context->std_error == EXEC_OUTPUT_SOCKET) {
1868
1869 if (params->n_fds != 1) {
1870 log_unit_error(unit, "Got more than one socket.");
1871 return -EINVAL;
1872 }
1873
1874 socket_fd = params->fds[0];
1875 } else {
1876 socket_fd = -1;
1877 fds = params->fds;
1878 n_fds = params->n_fds;
1879 }
1880
1881 r = exec_context_load_environment(unit, context, &files_env);
1882 if (r < 0)
1883 return log_unit_error_errno(unit, r, "Failed to load environment files: %m");
1884
1885 argv = params->argv ?: command->argv;
1886 line = exec_command_line(argv);
1887 if (!line)
1888 return log_oom();
1889
1890 log_struct(LOG_DEBUG,
1891 LOG_UNIT_ID(unit),
1892 LOG_UNIT_MESSAGE(unit, "About to execute: %s", line),
1893 "EXECUTABLE=%s", command->path,
1894 NULL);
1895 pid = fork();
1896 if (pid < 0)
1897 return log_unit_error_errno(unit, r, "Failed to fork: %m");
1898
1899 if (pid == 0) {
1900 int exit_status;
1901
1902 r = exec_child(unit,
1903 command,
1904 context,
1905 params,
1906 runtime,
1907 argv,
1908 socket_fd,
1909 fds, n_fds,
1910 files_env,
1911 &exit_status);
1912 if (r < 0) {
1913 log_open();
1914 log_struct_errno(LOG_ERR, r,
1915 LOG_MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
1916 LOG_UNIT_ID(unit),
1917 LOG_UNIT_MESSAGE(unit, "Failed at step %s spawning %s: %m",
1918 exit_status_to_string(exit_status, EXIT_STATUS_SYSTEMD),
1919 command->path),
1920 "EXECUTABLE=%s", command->path,
1921 NULL);
1922 }
1923
1924 _exit(exit_status);
1925 }
1926
1927 log_unit_debug(unit, "Forked %s as "PID_FMT, command->path, pid);
1928
1929 /* We add the new process to the cgroup both in the child (so
1930 * that we can be sure that no user code is ever executed
1931 * outside of the cgroup) and in the parent (so that we can be
1932 * sure that when we kill the cgroup the process will be
1933 * killed too). */
1934 if (params->cgroup_path)
1935 (void) cg_attach(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, pid);
1936
1937 exec_status_start(&command->exec_status, pid);
1938
1939 *ret = pid;
1940 return 0;
1941 }
1942
1943 void exec_context_init(ExecContext *c) {
1944 assert(c);
1945
1946 c->umask = 0022;
1947 c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1948 c->cpu_sched_policy = SCHED_OTHER;
1949 c->syslog_priority = LOG_DAEMON|LOG_INFO;
1950 c->syslog_level_prefix = true;
1951 c->ignore_sigpipe = true;
1952 c->timer_slack_nsec = NSEC_INFINITY;
1953 c->personality = PERSONALITY_INVALID;
1954 c->runtime_directory_mode = 0755;
1955 }
1956
1957 void exec_context_done(ExecContext *c) {
1958 unsigned l;
1959
1960 assert(c);
1961
1962 strv_free(c->environment);
1963 c->environment = NULL;
1964
1965 strv_free(c->environment_files);
1966 c->environment_files = NULL;
1967
1968 for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1969 free(c->rlimit[l]);
1970 c->rlimit[l] = NULL;
1971 }
1972
1973 free(c->working_directory);
1974 c->working_directory = NULL;
1975 free(c->root_directory);
1976 c->root_directory = NULL;
1977
1978 free(c->tty_path);
1979 c->tty_path = NULL;
1980
1981 free(c->syslog_identifier);
1982 c->syslog_identifier = NULL;
1983
1984 free(c->user);
1985 c->user = NULL;
1986
1987 free(c->group);
1988 c->group = NULL;
1989
1990 strv_free(c->supplementary_groups);
1991 c->supplementary_groups = NULL;
1992
1993 free(c->pam_name);
1994 c->pam_name = NULL;
1995
1996 if (c->capabilities) {
1997 cap_free(c->capabilities);
1998 c->capabilities = NULL;
1999 }
2000
2001 strv_free(c->read_only_dirs);
2002 c->read_only_dirs = NULL;
2003
2004 strv_free(c->read_write_dirs);
2005 c->read_write_dirs = NULL;
2006
2007 strv_free(c->inaccessible_dirs);
2008 c->inaccessible_dirs = NULL;
2009
2010 if (c->cpuset)
2011 CPU_FREE(c->cpuset);
2012
2013 free(c->utmp_id);
2014 c->utmp_id = NULL;
2015
2016 free(c->selinux_context);
2017 c->selinux_context = NULL;
2018
2019 free(c->apparmor_profile);
2020 c->apparmor_profile = NULL;
2021
2022 set_free(c->syscall_filter);
2023 c->syscall_filter = NULL;
2024
2025 set_free(c->syscall_archs);
2026 c->syscall_archs = NULL;
2027
2028 set_free(c->address_families);
2029 c->address_families = NULL;
2030
2031 strv_free(c->runtime_directory);
2032 c->runtime_directory = NULL;
2033
2034 bus_endpoint_free(c->bus_endpoint);
2035 c->bus_endpoint = NULL;
2036 }
2037
2038 int exec_context_destroy_runtime_directory(ExecContext *c, const char *runtime_prefix) {
2039 char **i;
2040
2041 assert(c);
2042
2043 if (!runtime_prefix)
2044 return 0;
2045
2046 STRV_FOREACH(i, c->runtime_directory) {
2047 _cleanup_free_ char *p;
2048
2049 p = strjoin(runtime_prefix, "/", *i, NULL);
2050 if (!p)
2051 return -ENOMEM;
2052
2053 /* We execute this synchronously, since we need to be
2054 * sure this is gone when we start the service
2055 * next. */
2056 (void) rm_rf(p, REMOVE_ROOT);
2057 }
2058
2059 return 0;
2060 }
2061
2062 void exec_command_done(ExecCommand *c) {
2063 assert(c);
2064
2065 free(c->path);
2066 c->path = NULL;
2067
2068 strv_free(c->argv);
2069 c->argv = NULL;
2070 }
2071
2072 void exec_command_done_array(ExecCommand *c, unsigned n) {
2073 unsigned i;
2074
2075 for (i = 0; i < n; i++)
2076 exec_command_done(c+i);
2077 }
2078
2079 ExecCommand* exec_command_free_list(ExecCommand *c) {
2080 ExecCommand *i;
2081
2082 while ((i = c)) {
2083 LIST_REMOVE(command, c, i);
2084 exec_command_done(i);
2085 free(i);
2086 }
2087
2088 return NULL;
2089 }
2090
2091 void exec_command_free_array(ExecCommand **c, unsigned n) {
2092 unsigned i;
2093
2094 for (i = 0; i < n; i++)
2095 c[i] = exec_command_free_list(c[i]);
2096 }
2097
2098 typedef struct InvalidEnvInfo {
2099 Unit *unit;
2100 const char *path;
2101 } InvalidEnvInfo;
2102
2103 static void invalid_env(const char *p, void *userdata) {
2104 InvalidEnvInfo *info = userdata;
2105
2106 log_unit_error(info->unit, "Ignoring invalid environment assignment '%s': %s", p, info->path);
2107 }
2108
2109 int exec_context_load_environment(Unit *unit, const ExecContext *c, char ***l) {
2110 char **i, **r = NULL;
2111
2112 assert(c);
2113 assert(l);
2114
2115 STRV_FOREACH(i, c->environment_files) {
2116 char *fn;
2117 int k;
2118 bool ignore = false;
2119 char **p;
2120 _cleanup_globfree_ glob_t pglob = {};
2121 int count, n;
2122
2123 fn = *i;
2124
2125 if (fn[0] == '-') {
2126 ignore = true;
2127 fn ++;
2128 }
2129
2130 if (!path_is_absolute(fn)) {
2131 if (ignore)
2132 continue;
2133
2134 strv_free(r);
2135 return -EINVAL;
2136 }
2137
2138 /* Filename supports globbing, take all matching files */
2139 errno = 0;
2140 if (glob(fn, 0, NULL, &pglob) != 0) {
2141 if (ignore)
2142 continue;
2143
2144 strv_free(r);
2145 return errno ? -errno : -EINVAL;
2146 }
2147 count = pglob.gl_pathc;
2148 if (count == 0) {
2149 if (ignore)
2150 continue;
2151
2152 strv_free(r);
2153 return -EINVAL;
2154 }
2155 for (n = 0; n < count; n++) {
2156 k = load_env_file(NULL, pglob.gl_pathv[n], NULL, &p);
2157 if (k < 0) {
2158 if (ignore)
2159 continue;
2160
2161 strv_free(r);
2162 return k;
2163 }
2164 /* Log invalid environment variables with filename */
2165 if (p) {
2166 InvalidEnvInfo info = {
2167 .unit = unit,
2168 .path = pglob.gl_pathv[n]
2169 };
2170
2171 p = strv_env_clean_with_callback(p, invalid_env, &info);
2172 }
2173
2174 if (r == NULL)
2175 r = p;
2176 else {
2177 char **m;
2178
2179 m = strv_env_merge(2, r, p);
2180 strv_free(r);
2181 strv_free(p);
2182 if (!m)
2183 return -ENOMEM;
2184
2185 r = m;
2186 }
2187 }
2188 }
2189
2190 *l = r;
2191
2192 return 0;
2193 }
2194
2195 static bool tty_may_match_dev_console(const char *tty) {
2196 _cleanup_free_ char *active = NULL;
2197 char *console;
2198
2199 if (startswith(tty, "/dev/"))
2200 tty += 5;
2201
2202 /* trivial identity? */
2203 if (streq(tty, "console"))
2204 return true;
2205
2206 console = resolve_dev_console(&active);
2207 /* if we could not resolve, assume it may */
2208 if (!console)
2209 return true;
2210
2211 /* "tty0" means the active VC, so it may be the same sometimes */
2212 return streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
2213 }
2214
2215 bool exec_context_may_touch_console(ExecContext *ec) {
2216 return (ec->tty_reset || ec->tty_vhangup || ec->tty_vt_disallocate ||
2217 is_terminal_input(ec->std_input) ||
2218 is_terminal_output(ec->std_output) ||
2219 is_terminal_output(ec->std_error)) &&
2220 tty_may_match_dev_console(tty_path(ec));
2221 }
2222
2223 static void strv_fprintf(FILE *f, char **l) {
2224 char **g;
2225
2226 assert(f);
2227
2228 STRV_FOREACH(g, l)
2229 fprintf(f, " %s", *g);
2230 }
2231
2232 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
2233 char **e;
2234 unsigned i;
2235
2236 assert(c);
2237 assert(f);
2238
2239 prefix = strempty(prefix);
2240
2241 fprintf(f,
2242 "%sUMask: %04o\n"
2243 "%sWorkingDirectory: %s\n"
2244 "%sRootDirectory: %s\n"
2245 "%sNonBlocking: %s\n"
2246 "%sPrivateTmp: %s\n"
2247 "%sPrivateNetwork: %s\n"
2248 "%sPrivateDevices: %s\n"
2249 "%sProtectHome: %s\n"
2250 "%sProtectSystem: %s\n"
2251 "%sIgnoreSIGPIPE: %s\n",
2252 prefix, c->umask,
2253 prefix, c->working_directory ? c->working_directory : "/",
2254 prefix, c->root_directory ? c->root_directory : "/",
2255 prefix, yes_no(c->non_blocking),
2256 prefix, yes_no(c->private_tmp),
2257 prefix, yes_no(c->private_network),
2258 prefix, yes_no(c->private_devices),
2259 prefix, protect_home_to_string(c->protect_home),
2260 prefix, protect_system_to_string(c->protect_system),
2261 prefix, yes_no(c->ignore_sigpipe));
2262
2263 STRV_FOREACH(e, c->environment)
2264 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
2265
2266 STRV_FOREACH(e, c->environment_files)
2267 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
2268
2269 if (c->nice_set)
2270 fprintf(f,
2271 "%sNice: %i\n",
2272 prefix, c->nice);
2273
2274 if (c->oom_score_adjust_set)
2275 fprintf(f,
2276 "%sOOMScoreAdjust: %i\n",
2277 prefix, c->oom_score_adjust);
2278
2279 for (i = 0; i < RLIM_NLIMITS; i++)
2280 if (c->rlimit[i])
2281 fprintf(f, "%s%s: "RLIM_FMT"\n",
2282 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
2283
2284 if (c->ioprio_set) {
2285 _cleanup_free_ char *class_str = NULL;
2286
2287 ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
2288 fprintf(f,
2289 "%sIOSchedulingClass: %s\n"
2290 "%sIOPriority: %i\n",
2291 prefix, strna(class_str),
2292 prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
2293 }
2294
2295 if (c->cpu_sched_set) {
2296 _cleanup_free_ char *policy_str = NULL;
2297
2298 sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
2299 fprintf(f,
2300 "%sCPUSchedulingPolicy: %s\n"
2301 "%sCPUSchedulingPriority: %i\n"
2302 "%sCPUSchedulingResetOnFork: %s\n",
2303 prefix, strna(policy_str),
2304 prefix, c->cpu_sched_priority,
2305 prefix, yes_no(c->cpu_sched_reset_on_fork));
2306 }
2307
2308 if (c->cpuset) {
2309 fprintf(f, "%sCPUAffinity:", prefix);
2310 for (i = 0; i < c->cpuset_ncpus; i++)
2311 if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
2312 fprintf(f, " %u", i);
2313 fputs("\n", f);
2314 }
2315
2316 if (c->timer_slack_nsec != NSEC_INFINITY)
2317 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
2318
2319 fprintf(f,
2320 "%sStandardInput: %s\n"
2321 "%sStandardOutput: %s\n"
2322 "%sStandardError: %s\n",
2323 prefix, exec_input_to_string(c->std_input),
2324 prefix, exec_output_to_string(c->std_output),
2325 prefix, exec_output_to_string(c->std_error));
2326
2327 if (c->tty_path)
2328 fprintf(f,
2329 "%sTTYPath: %s\n"
2330 "%sTTYReset: %s\n"
2331 "%sTTYVHangup: %s\n"
2332 "%sTTYVTDisallocate: %s\n",
2333 prefix, c->tty_path,
2334 prefix, yes_no(c->tty_reset),
2335 prefix, yes_no(c->tty_vhangup),
2336 prefix, yes_no(c->tty_vt_disallocate));
2337
2338 if (c->std_output == EXEC_OUTPUT_SYSLOG ||
2339 c->std_output == EXEC_OUTPUT_KMSG ||
2340 c->std_output == EXEC_OUTPUT_JOURNAL ||
2341 c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2342 c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2343 c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
2344 c->std_error == EXEC_OUTPUT_SYSLOG ||
2345 c->std_error == EXEC_OUTPUT_KMSG ||
2346 c->std_error == EXEC_OUTPUT_JOURNAL ||
2347 c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2348 c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2349 c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
2350
2351 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
2352
2353 log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
2354 log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
2355
2356 fprintf(f,
2357 "%sSyslogFacility: %s\n"
2358 "%sSyslogLevel: %s\n",
2359 prefix, strna(fac_str),
2360 prefix, strna(lvl_str));
2361 }
2362
2363 if (c->capabilities) {
2364 _cleanup_cap_free_charp_ char *t;
2365
2366 t = cap_to_text(c->capabilities, NULL);
2367 if (t)
2368 fprintf(f, "%sCapabilities: %s\n", prefix, t);
2369 }
2370
2371 if (c->secure_bits)
2372 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
2373 prefix,
2374 (c->secure_bits & 1<<SECURE_KEEP_CAPS) ? " keep-caps" : "",
2375 (c->secure_bits & 1<<SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
2376 (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
2377 (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
2378 (c->secure_bits & 1<<SECURE_NOROOT) ? " noroot" : "",
2379 (c->secure_bits & 1<<SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
2380
2381 if (c->capability_bounding_set_drop) {
2382 unsigned long l;
2383 fprintf(f, "%sCapabilityBoundingSet:", prefix);
2384
2385 for (l = 0; l <= cap_last_cap(); l++)
2386 if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l)))
2387 fprintf(f, " %s", strna(capability_to_name(l)));
2388
2389 fputs("\n", f);
2390 }
2391
2392 if (c->user)
2393 fprintf(f, "%sUser: %s\n", prefix, c->user);
2394 if (c->group)
2395 fprintf(f, "%sGroup: %s\n", prefix, c->group);
2396
2397 if (strv_length(c->supplementary_groups) > 0) {
2398 fprintf(f, "%sSupplementaryGroups:", prefix);
2399 strv_fprintf(f, c->supplementary_groups);
2400 fputs("\n", f);
2401 }
2402
2403 if (c->pam_name)
2404 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
2405
2406 if (strv_length(c->read_write_dirs) > 0) {
2407 fprintf(f, "%sReadWriteDirs:", prefix);
2408 strv_fprintf(f, c->read_write_dirs);
2409 fputs("\n", f);
2410 }
2411
2412 if (strv_length(c->read_only_dirs) > 0) {
2413 fprintf(f, "%sReadOnlyDirs:", prefix);
2414 strv_fprintf(f, c->read_only_dirs);
2415 fputs("\n", f);
2416 }
2417
2418 if (strv_length(c->inaccessible_dirs) > 0) {
2419 fprintf(f, "%sInaccessibleDirs:", prefix);
2420 strv_fprintf(f, c->inaccessible_dirs);
2421 fputs("\n", f);
2422 }
2423
2424 if (c->utmp_id)
2425 fprintf(f,
2426 "%sUtmpIdentifier: %s\n",
2427 prefix, c->utmp_id);
2428
2429 if (c->selinux_context)
2430 fprintf(f,
2431 "%sSELinuxContext: %s%s\n",
2432 prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
2433
2434 if (c->personality != PERSONALITY_INVALID)
2435 fprintf(f,
2436 "%sPersonality: %s\n",
2437 prefix, strna(personality_to_string(c->personality)));
2438
2439 if (c->syscall_filter) {
2440 #ifdef HAVE_SECCOMP
2441 Iterator j;
2442 void *id;
2443 bool first = true;
2444 #endif
2445
2446 fprintf(f,
2447 "%sSystemCallFilter: ",
2448 prefix);
2449
2450 if (!c->syscall_whitelist)
2451 fputc('~', f);
2452
2453 #ifdef HAVE_SECCOMP
2454 SET_FOREACH(id, c->syscall_filter, j) {
2455 _cleanup_free_ char *name = NULL;
2456
2457 if (first)
2458 first = false;
2459 else
2460 fputc(' ', f);
2461
2462 name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
2463 fputs(strna(name), f);
2464 }
2465 #endif
2466
2467 fputc('\n', f);
2468 }
2469
2470 if (c->syscall_archs) {
2471 #ifdef HAVE_SECCOMP
2472 Iterator j;
2473 void *id;
2474 #endif
2475
2476 fprintf(f,
2477 "%sSystemCallArchitectures:",
2478 prefix);
2479
2480 #ifdef HAVE_SECCOMP
2481 SET_FOREACH(id, c->syscall_archs, j)
2482 fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
2483 #endif
2484 fputc('\n', f);
2485 }
2486
2487 if (c->syscall_errno != 0)
2488 fprintf(f,
2489 "%sSystemCallErrorNumber: %s\n",
2490 prefix, strna(errno_to_name(c->syscall_errno)));
2491
2492 if (c->apparmor_profile)
2493 fprintf(f,
2494 "%sAppArmorProfile: %s%s\n",
2495 prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
2496 }
2497
2498 bool exec_context_maintains_privileges(ExecContext *c) {
2499 assert(c);
2500
2501 /* Returns true if the process forked off would run run under
2502 * an unchanged UID or as root. */
2503
2504 if (!c->user)
2505 return true;
2506
2507 if (streq(c->user, "root") || streq(c->user, "0"))
2508 return true;
2509
2510 return false;
2511 }
2512
2513 void exec_status_start(ExecStatus *s, pid_t pid) {
2514 assert(s);
2515
2516 zero(*s);
2517 s->pid = pid;
2518 dual_timestamp_get(&s->start_timestamp);
2519 }
2520
2521 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
2522 assert(s);
2523
2524 if (s->pid && s->pid != pid)
2525 zero(*s);
2526
2527 s->pid = pid;
2528 dual_timestamp_get(&s->exit_timestamp);
2529
2530 s->code = code;
2531 s->status = status;
2532
2533 if (context) {
2534 if (context->utmp_id)
2535 utmp_put_dead_process(context->utmp_id, pid, code, status);
2536
2537 exec_context_tty_reset(context);
2538 }
2539 }
2540
2541 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
2542 char buf[FORMAT_TIMESTAMP_MAX];
2543
2544 assert(s);
2545 assert(f);
2546
2547 if (s->pid <= 0)
2548 return;
2549
2550 prefix = strempty(prefix);
2551
2552 fprintf(f,
2553 "%sPID: "PID_FMT"\n",
2554 prefix, s->pid);
2555
2556 if (s->start_timestamp.realtime > 0)
2557 fprintf(f,
2558 "%sStart Timestamp: %s\n",
2559 prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
2560
2561 if (s->exit_timestamp.realtime > 0)
2562 fprintf(f,
2563 "%sExit Timestamp: %s\n"
2564 "%sExit Code: %s\n"
2565 "%sExit Status: %i\n",
2566 prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
2567 prefix, sigchld_code_to_string(s->code),
2568 prefix, s->status);
2569 }
2570
2571 char *exec_command_line(char **argv) {
2572 size_t k;
2573 char *n, *p, **a;
2574 bool first = true;
2575
2576 assert(argv);
2577
2578 k = 1;
2579 STRV_FOREACH(a, argv)
2580 k += strlen(*a)+3;
2581
2582 if (!(n = new(char, k)))
2583 return NULL;
2584
2585 p = n;
2586 STRV_FOREACH(a, argv) {
2587
2588 if (!first)
2589 *(p++) = ' ';
2590 else
2591 first = false;
2592
2593 if (strpbrk(*a, WHITESPACE)) {
2594 *(p++) = '\'';
2595 p = stpcpy(p, *a);
2596 *(p++) = '\'';
2597 } else
2598 p = stpcpy(p, *a);
2599
2600 }
2601
2602 *p = 0;
2603
2604 /* FIXME: this doesn't really handle arguments that have
2605 * spaces and ticks in them */
2606
2607 return n;
2608 }
2609
2610 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2611 _cleanup_free_ char *cmd = NULL;
2612 const char *prefix2;
2613
2614 assert(c);
2615 assert(f);
2616
2617 prefix = strempty(prefix);
2618 prefix2 = strjoina(prefix, "\t");
2619
2620 cmd = exec_command_line(c->argv);
2621 fprintf(f,
2622 "%sCommand Line: %s\n",
2623 prefix, cmd ? cmd : strerror(ENOMEM));
2624
2625 exec_status_dump(&c->exec_status, f, prefix2);
2626 }
2627
2628 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2629 assert(f);
2630
2631 prefix = strempty(prefix);
2632
2633 LIST_FOREACH(command, c, c)
2634 exec_command_dump(c, f, prefix);
2635 }
2636
2637 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2638 ExecCommand *end;
2639
2640 assert(l);
2641 assert(e);
2642
2643 if (*l) {
2644 /* It's kind of important, that we keep the order here */
2645 LIST_FIND_TAIL(command, *l, end);
2646 LIST_INSERT_AFTER(command, *l, end, e);
2647 } else
2648 *l = e;
2649 }
2650
2651 int exec_command_set(ExecCommand *c, const char *path, ...) {
2652 va_list ap;
2653 char **l, *p;
2654
2655 assert(c);
2656 assert(path);
2657
2658 va_start(ap, path);
2659 l = strv_new_ap(path, ap);
2660 va_end(ap);
2661
2662 if (!l)
2663 return -ENOMEM;
2664
2665 p = strdup(path);
2666 if (!p) {
2667 strv_free(l);
2668 return -ENOMEM;
2669 }
2670
2671 free(c->path);
2672 c->path = p;
2673
2674 strv_free(c->argv);
2675 c->argv = l;
2676
2677 return 0;
2678 }
2679
2680 int exec_command_append(ExecCommand *c, const char *path, ...) {
2681 _cleanup_strv_free_ char **l = NULL;
2682 va_list ap;
2683 int r;
2684
2685 assert(c);
2686 assert(path);
2687
2688 va_start(ap, path);
2689 l = strv_new_ap(path, ap);
2690 va_end(ap);
2691
2692 if (!l)
2693 return -ENOMEM;
2694
2695 r = strv_extend_strv(&c->argv, l);
2696 if (r < 0)
2697 return r;
2698
2699 return 0;
2700 }
2701
2702
2703 static int exec_runtime_allocate(ExecRuntime **rt) {
2704
2705 if (*rt)
2706 return 0;
2707
2708 *rt = new0(ExecRuntime, 1);
2709 if (!*rt)
2710 return -ENOMEM;
2711
2712 (*rt)->n_ref = 1;
2713 (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
2714
2715 return 0;
2716 }
2717
2718 int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) {
2719 int r;
2720
2721 assert(rt);
2722 assert(c);
2723 assert(id);
2724
2725 if (*rt)
2726 return 1;
2727
2728 if (!c->private_network && !c->private_tmp)
2729 return 0;
2730
2731 r = exec_runtime_allocate(rt);
2732 if (r < 0)
2733 return r;
2734
2735 if (c->private_network && (*rt)->netns_storage_socket[0] < 0) {
2736 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, (*rt)->netns_storage_socket) < 0)
2737 return -errno;
2738 }
2739
2740 if (c->private_tmp && !(*rt)->tmp_dir) {
2741 r = setup_tmp_dirs(id, &(*rt)->tmp_dir, &(*rt)->var_tmp_dir);
2742 if (r < 0)
2743 return r;
2744 }
2745
2746 return 1;
2747 }
2748
2749 ExecRuntime *exec_runtime_ref(ExecRuntime *r) {
2750 assert(r);
2751 assert(r->n_ref > 0);
2752
2753 r->n_ref++;
2754 return r;
2755 }
2756
2757 ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
2758
2759 if (!r)
2760 return NULL;
2761
2762 assert(r->n_ref > 0);
2763
2764 r->n_ref--;
2765 if (r->n_ref > 0)
2766 return NULL;
2767
2768 free(r->tmp_dir);
2769 free(r->var_tmp_dir);
2770 safe_close_pair(r->netns_storage_socket);
2771 free(r);
2772
2773 return NULL;
2774 }
2775
2776 int exec_runtime_serialize(Unit *u, ExecRuntime *rt, FILE *f, FDSet *fds) {
2777 assert(u);
2778 assert(f);
2779 assert(fds);
2780
2781 if (!rt)
2782 return 0;
2783
2784 if (rt->tmp_dir)
2785 unit_serialize_item(u, f, "tmp-dir", rt->tmp_dir);
2786
2787 if (rt->var_tmp_dir)
2788 unit_serialize_item(u, f, "var-tmp-dir", rt->var_tmp_dir);
2789
2790 if (rt->netns_storage_socket[0] >= 0) {
2791 int copy;
2792
2793 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
2794 if (copy < 0)
2795 return copy;
2796
2797 unit_serialize_item_format(u, f, "netns-socket-0", "%i", copy);
2798 }
2799
2800 if (rt->netns_storage_socket[1] >= 0) {
2801 int copy;
2802
2803 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
2804 if (copy < 0)
2805 return copy;
2806
2807 unit_serialize_item_format(u, f, "netns-socket-1", "%i", copy);
2808 }
2809
2810 return 0;
2811 }
2812
2813 int exec_runtime_deserialize_item(Unit *u, ExecRuntime **rt, const char *key, const char *value, FDSet *fds) {
2814 int r;
2815
2816 assert(rt);
2817 assert(key);
2818 assert(value);
2819
2820 if (streq(key, "tmp-dir")) {
2821 char *copy;
2822
2823 r = exec_runtime_allocate(rt);
2824 if (r < 0)
2825 return log_oom();
2826
2827 copy = strdup(value);
2828 if (!copy)
2829 return log_oom();
2830
2831 free((*rt)->tmp_dir);
2832 (*rt)->tmp_dir = copy;
2833
2834 } else if (streq(key, "var-tmp-dir")) {
2835 char *copy;
2836
2837 r = exec_runtime_allocate(rt);
2838 if (r < 0)
2839 return log_oom();
2840
2841 copy = strdup(value);
2842 if (!copy)
2843 return log_oom();
2844
2845 free((*rt)->var_tmp_dir);
2846 (*rt)->var_tmp_dir = copy;
2847
2848 } else if (streq(key, "netns-socket-0")) {
2849 int fd;
2850
2851 r = exec_runtime_allocate(rt);
2852 if (r < 0)
2853 return log_oom();
2854
2855 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2856 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
2857 else {
2858 safe_close((*rt)->netns_storage_socket[0]);
2859 (*rt)->netns_storage_socket[0] = fdset_remove(fds, fd);
2860 }
2861 } else if (streq(key, "netns-socket-1")) {
2862 int fd;
2863
2864 r = exec_runtime_allocate(rt);
2865 if (r < 0)
2866 return log_oom();
2867
2868 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2869 log_unit_debug(u, "Failed to parse netns socket value: %s", value);
2870 else {
2871 safe_close((*rt)->netns_storage_socket[1]);
2872 (*rt)->netns_storage_socket[1] = fdset_remove(fds, fd);
2873 }
2874 } else
2875 return 0;
2876
2877 return 1;
2878 }
2879
2880 static void *remove_tmpdir_thread(void *p) {
2881 _cleanup_free_ char *path = p;
2882
2883 (void) rm_rf(path, REMOVE_ROOT|REMOVE_PHYSICAL);
2884 return NULL;
2885 }
2886
2887 void exec_runtime_destroy(ExecRuntime *rt) {
2888 int r;
2889
2890 if (!rt)
2891 return;
2892
2893 /* If there are multiple users of this, let's leave the stuff around */
2894 if (rt->n_ref > 1)
2895 return;
2896
2897 if (rt->tmp_dir) {
2898 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
2899
2900 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
2901 if (r < 0) {
2902 log_warning_errno(r, "Failed to nuke %s: %m", rt->tmp_dir);
2903 free(rt->tmp_dir);
2904 }
2905
2906 rt->tmp_dir = NULL;
2907 }
2908
2909 if (rt->var_tmp_dir) {
2910 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
2911
2912 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
2913 if (r < 0) {
2914 log_warning_errno(r, "Failed to nuke %s: %m", rt->var_tmp_dir);
2915 free(rt->var_tmp_dir);
2916 }
2917
2918 rt->var_tmp_dir = NULL;
2919 }
2920
2921 safe_close_pair(rt->netns_storage_socket);
2922 }
2923
2924 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2925 [EXEC_INPUT_NULL] = "null",
2926 [EXEC_INPUT_TTY] = "tty",
2927 [EXEC_INPUT_TTY_FORCE] = "tty-force",
2928 [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2929 [EXEC_INPUT_SOCKET] = "socket"
2930 };
2931
2932 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2933
2934 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2935 [EXEC_OUTPUT_INHERIT] = "inherit",
2936 [EXEC_OUTPUT_NULL] = "null",
2937 [EXEC_OUTPUT_TTY] = "tty",
2938 [EXEC_OUTPUT_SYSLOG] = "syslog",
2939 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2940 [EXEC_OUTPUT_KMSG] = "kmsg",
2941 [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2942 [EXEC_OUTPUT_JOURNAL] = "journal",
2943 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2944 [EXEC_OUTPUT_SOCKET] = "socket"
2945 };
2946
2947 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);