1 /* SPDX-License-Identifier: LGPL-2.1+ */
9 #include <linux/unistd.h>
16 #include <sys/mount.h>
17 #include <sys/param.h>
18 #include <sys/prctl.h>
19 #include <sys/socket.h>
20 #include <sys/syscall.h>
25 #include <lxc/lxccontainer.h>
30 #include "cgroups/cgroup.h"
31 #include "cgroups/cgroup_utils.h"
39 #include "lxcseccomp.h"
42 #include "memory_utils.h"
43 #include "mount_utils.h"
44 #include "namespace.h"
45 #include "process_utils.h"
47 #include "syscall_wrappers.h"
51 lxc_log_define(attach
, lxc
);
53 /* Define default options if no options are supplied by the user. */
54 static lxc_attach_options_t attach_static_default_options
= LXC_ATTACH_OPTIONS_DEFAULT
;
57 * The context used to attach to the container.
58 * @attach_flags : the attach flags specified in lxc_attach_options_t
59 * @init_pid : the PID of the container's init process
60 * @dfd_init_pid : file descriptor to /proc/@init_pid
61 * __Must be closed in attach_context_security_barrier()__!
62 * @dfd_self_pid : file descriptor to /proc/self
63 * __Must be closed in attach_context_security_barrier()__!
64 * @setup_ns_uid : if CLONE_NEWUSER is specified will contain the uid used
65 * during attach setup.
66 * @setup_ns_gid : if CLONE_NEWUSER is specified will contain the gid used
67 * during attach setup.
68 * @target_ns_uid : if CLONE_NEWUSER is specified the uid that the final
69 * program will be run with.
70 * @target_ns_gid : if CLONE_NEWUSER is specified the gid that the final
71 * program will be run with.
72 * @target_host_uid : if CLONE_NEWUSER is specified the uid that the final
73 * program will be run with on the host.
74 * @target_host_gid : if CLONE_NEWUSER is specified the gid that the final
75 * program will be run with on the host.
76 * @lsm_label : LSM label to be used for the attaching process
77 * @container : the container we're attaching o
78 * @personality : the personality to use for the final program
79 * @capability : the capability mask of the @init_pid
80 * @ns_inherited : flags of namespaces that the final program will inherit
82 * @ns_fd : file descriptors to @init_pid's namespaces
84 struct attach_context
{
85 unsigned int attach_flags
;
94 uid_t target_host_uid
;
95 uid_t target_host_gid
;
97 struct lxc_container
*container
;
98 personality_t personality
;
99 unsigned long long capability_mask
;
101 int ns_fd
[LXC_NS_MAX
];
102 struct lsm_ops
*lsm_ops
;
105 static pid_t
pidfd_get_pid(int dfd_init_pid
, int pidfd
)
107 __do_free
char *line
= NULL
;
108 __do_fclose
FILE *f
= NULL
;
110 char path
[STRLITERALLEN("fdinfo/") + INTTYPE_TO_STRLEN(int) + 1 ] = "fdinfo/";
113 if (dfd_init_pid
< 0 || pidfd
< 0)
114 return ret_errno(EBADF
);
116 ret
= strnprintf(path
+ STRLITERALLEN("fdinfo/"), INTTYPE_TO_STRLEN(int), "%d", pidfd
);
118 return ret_errno(EIO
);
120 f
= fdopen_at(dfd_init_pid
, path
, "re", PROTECT_OPEN
, PROTECT_LOOKUP_BENEATH
);
124 while (getline(&line
, &len
, f
) != -1) {
125 const char *prefix
= "Pid:\t";
126 const size_t prefix_len
= STRLITERALLEN("Pid:\t");
130 if (!strnequal(slider
, prefix
, prefix_len
))
133 slider
+= prefix_len
;
134 slider
= lxc_trim_whitespace_in_place(slider
);
136 ret
= lxc_safe_int(slider
, &pid
);
143 return ret_errno(ENOENT
);
146 static inline bool sync_wake_pid(int fd
, pid_t pid
)
148 return lxc_write_nointr(fd
, &pid
, sizeof(pid_t
)) == sizeof(pid_t
);
151 static inline bool sync_wait_pid(int fd
, pid_t
*pid
)
153 return lxc_read_nointr(fd
, pid
, sizeof(pid_t
)) == sizeof(pid_t
);
156 static inline bool sync_wake_fd(int fd
, int fd_send
)
158 return lxc_abstract_unix_send_fds(fd
, &fd_send
, 1, NULL
, 0) > 0;
161 static inline bool sync_wait_fd(int fd
, int *fd_recv
)
163 return lxc_abstract_unix_recv_one_fd(fd
, fd_recv
, NULL
, 0) > 0;
166 static bool attach_lsm(lxc_attach_options_t
*options
)
168 return (options
->attach_flags
& (LXC_ATTACH_LSM
| LXC_ATTACH_LSM_LABEL
));
171 static struct attach_context
*alloc_attach_context(void)
173 struct attach_context
*ctx
;
175 ctx
= zalloc(sizeof(struct attach_context
));
177 return ret_set_errno(NULL
, ENOMEM
);
179 ctx
->init_pid
= -ESRCH
;
181 ctx
->dfd_self_pid
= -EBADF
;
182 ctx
->dfd_init_pid
= -EBADF
;
183 ctx
->init_pidfd
= -EBADF
;
185 ctx
->setup_ns_uid
= LXC_INVALID_UID
;
186 ctx
->setup_ns_gid
= LXC_INVALID_GID
;
187 ctx
->target_ns_uid
= LXC_INVALID_UID
;
188 ctx
->target_ns_gid
= LXC_INVALID_GID
;
189 ctx
->target_host_uid
= LXC_INVALID_UID
;
190 ctx
->target_host_gid
= LXC_INVALID_GID
;
192 for (lxc_namespace_t i
= 0; i
< LXC_NS_MAX
; i
++)
193 ctx
->ns_fd
[i
] = -EBADF
;
198 static int get_personality(const char *name
, const char *lxcpath
,
199 personality_t
*personality
)
201 __do_free
char *p
= NULL
;
205 p
= lxc_cmd_get_config_item(name
, "lxc.arch", lxcpath
);
207 *personality
= LXC_ARCH_UNCHANGED
;
211 ret
= lxc_config_parse_arch(p
, &per
);
213 return syserror("Failed to parse personality");
219 static int userns_setup_ids(struct attach_context
*ctx
,
220 lxc_attach_options_t
*options
)
222 __do_free
char *line
= NULL
;
223 __do_fclose
FILE *f_gidmap
= NULL
, *f_uidmap
= NULL
;
225 uid_t init_ns_uid
= LXC_INVALID_UID
;
226 gid_t init_ns_gid
= LXC_INVALID_GID
;
227 uid_t nsuid
, hostuid
, range_uid
;
228 gid_t nsgid
, hostgid
, range_gid
;
230 if (!(options
->namespaces
& CLONE_NEWUSER
))
233 f_uidmap
= fdopen_at(ctx
->dfd_init_pid
, "uid_map", "re", PROTECT_OPEN
, PROTECT_LOOKUP_BENEATH
);
235 return log_error_errno(-errno
, errno
, "Failed to open uid_map");
237 while (getline(&line
, &len
, f_uidmap
) != -1) {
238 if (sscanf(line
, "%u %u %u", &nsuid
, &hostuid
, &range_uid
) != 3)
241 if (0 >= nsuid
&& 0 < nsuid
+ range_uid
) {
242 ctx
->setup_ns_uid
= 0;
243 TRACE("Container has mapping for uid 0");
247 if (ctx
->target_host_uid
>= hostuid
&& ctx
->target_host_uid
< hostuid
+ range_uid
) {
248 init_ns_uid
= (ctx
->target_host_uid
- hostuid
) + nsuid
;
249 TRACE("Container runs with uid %d", init_ns_uid
);
253 f_gidmap
= fdopen_at(ctx
->dfd_init_pid
, "gid_map", "re", PROTECT_OPEN
, PROTECT_LOOKUP_BENEATH
);
255 return log_error_errno(-errno
, errno
, "Failed to open gid_map");
257 while (getline(&line
, &len
, f_gidmap
) != -1) {
258 if (sscanf(line
, "%u %u %u", &nsgid
, &hostgid
, &range_gid
) != 3)
261 if (0 >= nsgid
&& 0 < nsgid
+ range_gid
) {
262 ctx
->setup_ns_gid
= 0;
263 TRACE("Container has mapping for gid 0");
267 if (ctx
->target_host_gid
>= hostgid
&& ctx
->target_host_gid
< hostgid
+ range_gid
) {
268 init_ns_gid
= (ctx
->target_host_gid
- hostgid
) + nsgid
;
269 TRACE("Container runs with gid %d", init_ns_gid
);
273 if (ctx
->setup_ns_uid
== LXC_INVALID_UID
)
274 ctx
->setup_ns_uid
= init_ns_uid
;
276 if (ctx
->setup_ns_gid
== LXC_INVALID_UID
)
277 ctx
->setup_ns_gid
= init_ns_gid
;
282 static void userns_target_ids(struct attach_context
*ctx
, lxc_attach_options_t
*options
)
284 if (options
->uid
!= LXC_INVALID_UID
)
285 ctx
->target_ns_uid
= options
->uid
;
286 else if (options
->namespaces
& CLONE_NEWUSER
)
287 ctx
->target_ns_uid
= ctx
->setup_ns_uid
;
289 ctx
->target_ns_uid
= 0;
291 if (ctx
->target_ns_uid
== LXC_INVALID_UID
)
292 WARN("Invalid uid specified");
294 if (options
->gid
!= LXC_INVALID_GID
)
295 ctx
->target_ns_gid
= options
->gid
;
296 else if (options
->namespaces
& CLONE_NEWUSER
)
297 ctx
->target_ns_gid
= ctx
->setup_ns_gid
;
299 ctx
->target_ns_gid
= 0;
301 if (ctx
->target_ns_gid
== LXC_INVALID_GID
)
302 WARN("Invalid gid specified");
305 static int parse_init_status(struct attach_context
*ctx
, lxc_attach_options_t
*options
)
307 __do_free
char *line
= NULL
;
308 __do_fclose
FILE *f
= NULL
;
310 bool caps_found
= false;
313 f
= fdopen_at(ctx
->dfd_init_pid
, "status", "re", PROTECT_OPEN
, PROTECT_LOOKUP_BENEATH
);
315 return log_error_errno(-errno
, errno
, "Failed to open status file");
317 while (getline(&line
, &len
, f
) != -1) {
318 signed long value
= -1;
321 * Format is: real, effective, saved set user, fs we only care
324 ret
= sscanf(line
, "Uid: %ld", &value
);
325 if (ret
!= EOF
&& ret
== 1) {
326 ctx
->target_host_uid
= (uid_t
)value
;
327 TRACE("Container's init process runs with hostuid %d", ctx
->target_host_uid
);
331 ret
= sscanf(line
, "Gid: %ld", &value
);
332 if (ret
!= EOF
&& ret
== 1) {
333 ctx
->target_host_gid
= (gid_t
)value
;
334 TRACE("Container's init process runs with hostgid %d", ctx
->target_host_gid
);
338 ret
= sscanf(line
, "CapBnd: %llx", &ctx
->capability_mask
);
339 if (ret
!= EOF
&& ret
== 1) {
345 if (ctx
->target_host_uid
!= LXC_INVALID_UID
&&
346 ctx
->target_host_gid
!= LXC_INVALID_GID
&&
352 ret
= userns_setup_ids(ctx
, options
);
354 return log_error_errno(ret
, errno
, "Failed to get setup ids");
355 userns_target_ids(ctx
, options
);
360 static bool pidfd_setns_supported(struct attach_context
*ctx
)
365 * The ability to attach to time namespaces came after the introduction
366 * of of using pidfds for attaching to namespaces. To avoid having to
367 * special-case both CLONE_NEWUSER and CLONE_NEWTIME handling, let's
368 * use CLONE_NEWTIME as gatekeeper.
370 if (ctx
->init_pidfd
>= 0)
371 ret
= setns(ctx
->init_pidfd
, CLONE_NEWTIME
);
374 TRACE("Attaching to namespaces via pidfds %s",
375 ret
? "unsupported" : "supported");
379 static int get_attach_context(struct attach_context
*ctx
,
380 struct lxc_container
*container
,
381 lxc_attach_options_t
*options
)
383 __do_free
char *lsm_label
= NULL
;
385 char path
[LXC_PROC_PID_LEN
];
387 ctx
->container
= container
;
388 ctx
->attach_flags
= options
->attach_flags
;
390 ctx
->dfd_self_pid
= open_at(-EBADF
, "/proc/self",
391 PROTECT_OPATH_FILE
& ~O_NOFOLLOW
,
392 (PROTECT_LOOKUP_ABSOLUTE_WITH_SYMLINKS
& ~RESOLVE_NO_XDEV
), 0);
393 if (ctx
->dfd_self_pid
< 0)
394 return log_error_errno(-errno
, errno
, "Failed to open /proc/self");
396 ctx
->init_pidfd
= lxc_cmd_get_init_pidfd(container
->name
, container
->config_path
);
397 if (ctx
->init_pidfd
>= 0)
398 ctx
->init_pid
= pidfd_get_pid(ctx
->dfd_self_pid
, ctx
->init_pidfd
);
400 ctx
->init_pid
= lxc_cmd_get_init_pid(container
->name
, container
->config_path
);
401 if (ctx
->init_pid
< 0)
402 return log_error(-1, "Failed to get init pid");
404 ret
= strnprintf(path
, sizeof(path
), "/proc/%d", ctx
->init_pid
);
406 return ret_errno(EIO
);
408 ctx
->dfd_init_pid
= open_at(-EBADF
, path
,
409 PROTECT_OPATH_DIRECTORY
,
410 (PROTECT_LOOKUP_ABSOLUTE
& ~RESOLVE_NO_XDEV
), 0);
411 if (ctx
->dfd_init_pid
< 0)
412 return log_error_errno(-errno
, errno
, "Failed to open /proc/%d", ctx
->init_pid
);
414 if (ctx
->init_pidfd
>= 0) {
415 ret
= lxc_raw_pidfd_send_signal(ctx
->init_pidfd
, 0, NULL
, 0);
417 return log_error_errno(-errno
, errno
, "Container process exited or PID has been recycled");
419 TRACE("Container process still running and PID was not recycled");
421 if (!pidfd_setns_supported(ctx
)) {
422 /* We can't risk leaking file descriptors during attach. */
423 if (close(ctx
->init_pidfd
))
424 return log_error_errno(-errno
, errno
, "Failed to close pidfd");
426 ctx
->init_pidfd
= -EBADF
;
427 TRACE("Attaching to namespaces via pidfds not supported");
431 /* Determine which namespaces the container was created with. */
432 if (options
->namespaces
== -1) {
433 options
->namespaces
= lxc_cmd_get_clone_flags(container
->name
, container
->config_path
);
434 if (options
->namespaces
== -1)
435 return log_error_errno(-EINVAL
, EINVAL
, "Failed to automatically determine the namespaces which the container uses");
437 for (lxc_namespace_t i
= 0; i
< LXC_NS_MAX
; i
++) {
438 if (ns_info
[i
].clone_flag
& CLONE_NEWCGROUP
)
439 if (!(options
->attach_flags
& LXC_ATTACH_MOVE_TO_CGROUP
) ||
443 if (ns_info
[i
].clone_flag
& options
->namespaces
)
446 ctx
->ns_inherited
|= ns_info
[i
].clone_flag
;
450 ret
= parse_init_status(ctx
, options
);
452 return log_error_errno(-errno
, errno
, "Failed to open parse file");
454 ctx
->lsm_ops
= lsm_init_static();
456 if (attach_lsm(options
)) {
457 if (ctx
->attach_flags
& LXC_ATTACH_LSM_LABEL
)
458 lsm_label
= options
->lsm_label
;
460 lsm_label
= ctx
->lsm_ops
->process_label_get_at(ctx
->lsm_ops
, ctx
->dfd_init_pid
);
462 WARN("No security context received");
464 INFO("Retrieved security context %s", lsm_label
);
467 ret
= get_personality(container
->name
, container
->config_path
, &ctx
->personality
);
469 return log_error_errno(ret
, errno
, "Failed to get personality of the container");
471 if (!ctx
->container
->lxc_conf
) {
472 ctx
->container
->lxc_conf
= lxc_conf_init();
473 if (!ctx
->container
->lxc_conf
)
474 return log_error_errno(-ENOMEM
, ENOMEM
, "Failed to allocate new lxc config");
477 ctx
->lsm_label
= move_ptr(lsm_label
);
481 static int same_nsfd(int dfd_pid1
, int dfd_pid2
, const char *ns_path
)
484 struct stat ns_st1
, ns_st2
;
486 ret
= fstatat(dfd_pid1
, ns_path
, &ns_st1
, 0);
490 ret
= fstatat(dfd_pid2
, ns_path
, &ns_st2
, 0);
494 /* processes are in the same namespace */
495 if ((ns_st1
.st_dev
== ns_st2
.st_dev
) &&
496 (ns_st1
.st_ino
== ns_st2
.st_ino
))
502 static int same_ns(int dfd_pid1
, int dfd_pid2
, const char *ns_path
)
504 __do_close
int ns_fd2
= -EBADF
;
507 ns_fd2
= open_at(dfd_pid2
, ns_path
, PROTECT_OPEN_WITH_TRAILING_SYMLINKS
,
508 (PROTECT_LOOKUP_BENEATH_WITH_MAGICLINKS
&
509 ~(RESOLVE_NO_XDEV
| RESOLVE_BENEATH
)), 0);
513 return syserror("Failed to open %d(%s)", dfd_pid2
, ns_path
);
516 ret
= same_nsfd(dfd_pid1
, dfd_pid2
, ns_path
);
521 return ret_errno(ENOENT
);
523 /* processes are in different namespaces */
524 return move_fd(ns_fd2
);
530 static int __prepare_namespaces_pidfd(struct attach_context
*ctx
)
532 for (lxc_namespace_t i
= 0; i
< LXC_NS_MAX
; i
++) {
535 ret
= same_nsfd(ctx
->dfd_self_pid
,
537 ns_info
[i
].proc_path
);
542 ctx
->ns_inherited
&= ~ns_info
[i
].clone_flag
;
543 TRACE("Shared %s namespace doesn't need attach", ns_info
[i
].proc_name
);
546 TRACE("Different %s namespace needs attach", ns_info
[i
].proc_name
);
550 return syserror("Failed to determine whether %s namespace is shared",
551 ns_info
[i
].proc_name
);
557 static int __prepare_namespaces_nsfd(struct attach_context
*ctx
,
558 lxc_attach_options_t
*options
)
560 for (lxc_namespace_t i
= 0; i
< LXC_NS_MAX
; i
++) {
563 if (options
->namespaces
& ns_info
[i
].clone_flag
)
564 ctx
->ns_fd
[i
] = open_at(ctx
->dfd_init_pid
,
565 ns_info
[i
].proc_path
,
566 PROTECT_OPEN_WITH_TRAILING_SYMLINKS
,
567 (PROTECT_LOOKUP_BENEATH_WITH_MAGICLINKS
&
568 ~(RESOLVE_NO_XDEV
| RESOLVE_BENEATH
)),
570 else if (ctx
->ns_inherited
& ns_info
[i
].clone_flag
)
571 ctx
->ns_fd
[i
] = same_ns(ctx
->dfd_self_pid
,
573 ns_info
[i
].proc_path
);
577 if (ctx
->ns_fd
[i
] >= 0)
580 if (ctx
->ns_fd
[i
] == -ENOENT
) {
581 ctx
->ns_inherited
&= ~ns_info
[i
].clone_flag
;
585 /* We failed to preserve the namespace. */
586 SYSERROR("Failed to preserve %s namespace of %d",
587 ns_info
[i
].proc_name
, ctx
->init_pid
);
589 /* Close all already opened file descriptors before we return an
590 * error, so we don't leak them.
592 for (j
= 0; j
< i
; j
++)
593 close_prot_errno_disarm(ctx
->ns_fd
[j
]);
601 static int prepare_namespaces(struct attach_context
*ctx
,
602 lxc_attach_options_t
*options
)
604 if (ctx
->init_pidfd
< 0)
605 return __prepare_namespaces_nsfd(ctx
, options
);
607 return __prepare_namespaces_pidfd(ctx
);
610 static inline void put_namespaces(struct attach_context
*ctx
)
612 if (ctx
->init_pidfd
< 0) {
613 for (int i
= 0; i
< LXC_NS_MAX
; i
++)
614 close_prot_errno_disarm(ctx
->ns_fd
[i
]);
618 static int __attach_namespaces_pidfd(struct attach_context
*ctx
,
619 lxc_attach_options_t
*options
)
621 unsigned int ns_flags
= options
->namespaces
| ctx
->ns_inherited
;
624 /* The common case is to attach to all namespaces. */
625 ret
= setns(ctx
->init_pidfd
, ns_flags
);
627 return log_error_errno(-errno
, errno
,
628 "Failed to attach to namespaces via pidfd");
630 /* We can't risk leaking file descriptors into the container. */
631 if (close(ctx
->init_pidfd
))
632 return log_error_errno(-errno
, errno
, "Failed to close pidfd");
633 ctx
->init_pidfd
= -EBADF
;
635 return log_trace(0, "Attached to container namespaces via pidfd");
638 static int __attach_namespaces_nsfd(struct attach_context
*ctx
,
639 lxc_attach_options_t
*options
)
643 for (lxc_namespace_t i
= 0; i
< LXC_NS_MAX
; i
++) {
646 if (ctx
->ns_fd
[i
] < 0)
649 ret
= setns(ctx
->ns_fd
[i
], ns_info
[i
].clone_flag
);
651 return log_error_errno(-errno
, errno
,
652 "Failed to attach to %s namespace of %d",
653 ns_info
[i
].proc_name
,
656 if (close(ctx
->ns_fd
[i
])) {
658 SYSERROR("Failed to close file descriptor for %s namespace",
659 ns_info
[i
].proc_name
);
661 ctx
->ns_fd
[i
] = -EBADF
;
667 static int attach_namespaces(struct attach_context
*ctx
,
668 lxc_attach_options_t
*options
)
670 if (lxc_log_trace()) {
671 for (lxc_namespace_t i
= 0; i
< LXC_NS_MAX
; i
++) {
672 if (ns_info
[i
].clone_flag
& options
->namespaces
) {
673 TRACE("Attaching to %s namespace", ns_info
[i
].proc_name
);
676 if (ns_info
[i
].clone_flag
& ctx
->ns_inherited
) {
677 TRACE("Sharing %s namespace", ns_info
[i
].proc_name
);
680 TRACE("Inheriting %s namespace", ns_info
[i
].proc_name
);
684 if (ctx
->init_pidfd
< 0)
685 return __attach_namespaces_nsfd(ctx
, options
);
687 return __attach_namespaces_pidfd(ctx
, options
);
690 static void put_attach_context(struct attach_context
*ctx
)
693 if (!(ctx
->attach_flags
& LXC_ATTACH_LSM_LABEL
))
694 free_disarm(ctx
->lsm_label
);
695 close_prot_errno_disarm(ctx
->dfd_init_pid
);
697 if (ctx
->container
) {
698 lxc_container_put(ctx
->container
);
699 ctx
->container
= NULL
;
708 * Place anything in here that needs to be get rid of before we move into the
709 * container's context and fail hard if we can't.
711 static bool attach_context_security_barrier(struct attach_context
*ctx
)
714 if (close(ctx
->dfd_self_pid
))
716 ctx
->dfd_self_pid
= -EBADF
;
718 if (close(ctx
->dfd_init_pid
))
720 ctx
->dfd_init_pid
= -EBADF
;
726 int lxc_attach_remount_sys_proc(void)
730 ret
= unshare(CLONE_NEWNS
);
732 return log_error_errno(-1, errno
, "Failed to unshare mount namespace");
734 if (detect_shared_rootfs() && mount(NULL
, "/", NULL
, MS_SLAVE
| MS_REC
, NULL
))
735 SYSERROR("Failed to recursively turn root mount tree into dependent mount. Continuing...");
737 /* Assume /proc is always mounted, so remount it. */
738 ret
= umount2("/proc", MNT_DETACH
);
740 return log_error_errno(-1, errno
, "Failed to unmount /proc");
742 ret
= mount("none", "/proc", "proc", 0, NULL
);
744 return log_error_errno(-1, errno
, "Failed to remount /proc");
747 * Try to umount /sys. If it's not a mount point, we'll get EINVAL, then
748 * we ignore it because it may not have been mounted in the first place.
750 ret
= umount2("/sys", MNT_DETACH
);
751 if (ret
< 0 && errno
!= EINVAL
)
752 return log_error_errno(-1, errno
, "Failed to unmount /sys");
755 if (ret
== 0 && mount("none", "/sys", "sysfs", 0, NULL
))
756 return log_error_errno(-1, errno
, "Failed to remount /sys");
761 static int drop_capabilities(struct attach_context
*ctx
)
765 last_cap
= lxc_caps_last_cap();
766 for (int cap
= 0; cap
<= last_cap
; cap
++) {
767 if (ctx
->capability_mask
& (1LL << cap
))
770 if (prctl(PR_CAPBSET_DROP
, prctl_arg(cap
), prctl_arg(0),
771 prctl_arg(0), prctl_arg(0)))
772 return log_error_errno(-1, errno
, "Failed to drop capability %d", cap
);
774 TRACE("Dropped capability %d", cap
);
780 static int lxc_attach_set_environment(struct attach_context
*ctx
,
781 enum lxc_attach_env_policy_t policy
,
782 char **extra_env
, char **extra_keep
)
786 if (policy
== LXC_ATTACH_CLEAR_ENV
) {
788 char **extra_keep_store
= NULL
;
793 for (count
= 0; extra_keep
[count
]; count
++)
796 extra_keep_store
= zalloc(count
* sizeof(char *));
797 if (!extra_keep_store
)
800 for (i
= 0; i
< count
; i
++) {
801 char *v
= getenv(extra_keep
[i
]);
803 extra_keep_store
[i
] = strdup(v
);
804 if (!extra_keep_store
[i
]) {
806 free(extra_keep_store
[--i
]);
808 free(extra_keep_store
);
812 if (strequal(extra_keep
[i
], "PATH"))
819 if (extra_keep_store
) {
822 for (p
= extra_keep_store
; *p
; p
++)
825 free(extra_keep_store
);
828 return log_error(-1, "Failed to clear environment");
831 if (extra_keep_store
) {
834 for (i
= 0; extra_keep
[i
]; i
++) {
835 if (extra_keep_store
[i
]) {
836 ret
= setenv(extra_keep
[i
], extra_keep_store
[i
], 1);
838 SYSWARN("Failed to set environment variable");
841 free(extra_keep_store
[i
]);
844 free(extra_keep_store
);
847 /* Always set a default path; shells and execlp tend to be fine
848 * without it, but there is a disturbing number of C programs
849 * out there that just assume that getenv("PATH") is never NULL
850 * and then die a painful segfault death.
853 ret
= setenv("PATH", "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", 1);
855 SYSWARN("Failed to set environment variable");
859 ret
= putenv("container=lxc");
861 return log_warn(-1, "Failed to set environment variable");
863 /* Set container environment variables.*/
864 if (ctx
->container
->lxc_conf
) {
865 ret
= lxc_set_environment(ctx
->container
->lxc_conf
);
870 /* Set extra environment variables. */
872 for (; *extra_env
; extra_env
++) {
875 /* We just assume the user knows what they are doing, so
876 * we don't do any checks.
878 p
= strdup(*extra_env
);
884 SYSWARN("Failed to set environment variable");
891 static char *lxc_attach_getpwshell(uid_t uid
)
893 __do_free
char *line
= NULL
, *result
= NULL
;
894 __do_fclose
FILE *pipe_f
= NULL
;
899 size_t line_bufsz
= 0;
901 /* We need to fork off a process that runs the getent program, and we
902 * need to capture its output, so we use a pipe for that purpose.
904 ret
= pipe2(pipes
, O_CLOEXEC
);
917 char *arguments
[] = {
926 /* We want to capture stdout. */
927 ret
= dup2(pipes
[1], STDOUT_FILENO
);
932 /* Get rid of stdin/stderr, so we try to associate it with
938 close(STDERR_FILENO
);
940 (void)dup3(fd
, STDIN_FILENO
, O_CLOEXEC
);
941 (void)dup3(fd
, STDERR_FILENO
, O_CLOEXEC
);
945 /* Finish argument list. */
946 ret
= strnprintf(uid_buf
, sizeof(uid_buf
), "%ld", (long)uid
);
950 /* Try to run getent program. */
951 (void)execvp("getent", arguments
);
957 pipe_f
= fdopen(pipes
[0], "re");
962 /* Transfer ownership of pipes[0] to pipe_f. */
965 while (getline(&line
, &line_bufsz
, pipe_f
) != -1) {
969 char *endptr
= NULL
, *saveptr
= NULL
;
971 /* If we already found something, just continue to read
972 * until the pipe doesn't deliver any more data, but
973 * don't modify the existing data structure.
981 /* Trim line on the right hand side. */
982 for (i
= strlen(line
); i
> 0 && (line
[i
- 1] == '\n' || line
[i
- 1] == '\r'); --i
)
985 /* Split into tokens: first: user name. */
986 token
= strtok_r(line
, ":", &saveptr
);
990 /* next: placeholder password field */
991 token
= strtok_r(NULL
, ":", &saveptr
);
996 token
= strtok_r(NULL
, ":", &saveptr
);
997 value
= token
? strtol(token
, &endptr
, 10) : 0;
998 if (!token
|| !endptr
|| *endptr
|| value
== LONG_MIN
||
1002 /* placeholder conherence check: user id matches */
1003 if ((uid_t
)value
!= uid
)
1006 /* skip fields: gid, gecos, dir, go to next field 'shell' */
1007 for (i
= 0; i
< 4; i
++) {
1008 token
= strtok_r(NULL
, ":", &saveptr
);
1016 free_disarm(result
);
1017 result
= strdup(token
);
1019 /* Sanity check that there are no fields after that. */
1020 token
= strtok_r(NULL
, ":", &saveptr
);
1028 ret
= wait_for_pid(pid
);
1035 return move_ptr(result
);
1038 static bool fetch_seccomp(struct lxc_container
*c
, lxc_attach_options_t
*options
)
1040 __do_free
char *path
= NULL
;
1044 if (!attach_lsm(options
)) {
1045 free_disarm(c
->lxc_conf
->seccomp
.seccomp
);
1049 /* Remove current setting. */
1050 if (!c
->set_config_item(c
, "lxc.seccomp.profile", "") &&
1051 !c
->set_config_item(c
, "lxc.seccomp", ""))
1054 /* Fetch the current profile path over the cmd interface. */
1055 path
= c
->get_running_config_item(c
, "lxc.seccomp.profile");
1057 INFO("Failed to retrieve lxc.seccomp.profile");
1059 path
= c
->get_running_config_item(c
, "lxc.seccomp");
1061 return log_info(true, "Failed to retrieve lxc.seccomp");
1064 /* Copy the value into the new lxc_conf. */
1065 bret
= c
->set_config_item(c
, "lxc.seccomp.profile", path
);
1069 /* Attempt to parse the resulting config. */
1070 ret
= lxc_read_seccomp_config(c
->lxc_conf
);
1072 return log_error(false, "Failed to retrieve seccomp policy");
1074 return log_info(true, "Retrieved seccomp policy");
1077 static bool no_new_privs(struct lxc_container
*c
, lxc_attach_options_t
*options
)
1079 __do_free
char *val
= NULL
;
1081 /* Remove current setting. */
1082 if (!c
->set_config_item(c
, "lxc.no_new_privs", ""))
1083 return log_info(false, "Failed to unset lxc.no_new_privs");
1085 /* Retrieve currently active setting. */
1086 val
= c
->get_running_config_item(c
, "lxc.no_new_privs");
1088 return log_info(false, "Failed to retrieve lxc.no_new_privs");
1090 /* Set currently active setting. */
1091 return c
->set_config_item(c
, "lxc.no_new_privs", val
);
1094 struct attach_payload
{
1096 int terminal_pts_fd
;
1097 lxc_attach_options_t
*options
;
1098 struct attach_context
*ctx
;
1099 lxc_attach_exec_t exec_function
;
1103 static void put_attach_payload(struct attach_payload
*p
)
1106 close_prot_errno_disarm(p
->ipc_socket
);
1107 close_prot_errno_disarm(p
->terminal_pts_fd
);
1108 put_attach_context(p
->ctx
);
1113 __noreturn
static void do_attach(struct attach_payload
*ap
)
1115 lxc_attach_exec_t attach_function
= move_ptr(ap
->exec_function
);
1116 void *attach_function_args
= move_ptr(ap
->exec_payload
);
1118 lxc_attach_options_t
* options
= ap
->options
;
1119 struct attach_context
*ctx
= ap
->ctx
;
1120 struct lxc_conf
*conf
= ctx
->container
->lxc_conf
;
1122 /* A description of the purpose of this functionality is provided in the
1123 * lxc-attach(1) manual page. We have to remount here and not in the
1124 * parent process, otherwise /proc may not properly reflect the new pid
1127 if (!(options
->namespaces
& CLONE_NEWNS
) &&
1128 (options
->attach_flags
& LXC_ATTACH_REMOUNT_PROC_SYS
)) {
1129 ret
= lxc_attach_remount_sys_proc();
1133 TRACE("Remounted \"/proc\" and \"/sys\"");
1136 /* Now perform additional attachments. */
1137 if (options
->attach_flags
& LXC_ATTACH_SET_PERSONALITY
) {
1138 long new_personality
;
1140 if (options
->personality
== LXC_ATTACH_DETECT_PERSONALITY
)
1141 new_personality
= ctx
->personality
;
1143 new_personality
= options
->personality
;
1145 if (new_personality
!= LXC_ARCH_UNCHANGED
) {
1146 ret
= lxc_personality(new_personality
);
1150 TRACE("Set new personality");
1154 if (options
->attach_flags
& LXC_ATTACH_DROP_CAPABILITIES
) {
1155 ret
= drop_capabilities(ctx
);
1159 TRACE("Dropped capabilities");
1162 /* Always set the environment (specify (LXC_ATTACH_KEEP_ENV, NULL, NULL)
1163 * if you want this to be a no-op).
1165 ret
= lxc_attach_set_environment(ctx
,
1166 options
->env_policy
,
1167 options
->extra_env_vars
,
1168 options
->extra_keep_env
);
1172 TRACE("Set up environment");
1175 * This remark only affects fully unprivileged containers:
1176 * Receive fd for LSM security module before we set{g,u}id(). The reason
1177 * is that on set{g,u}id() the kernel will a) make us undumpable and b)
1178 * we will change our effective uid. This means our effective uid will
1179 * be different from the effective uid of the process that created us
1180 * which means that this processs no longer has capabilities in our
1181 * namespace including CAP_SYS_PTRACE. This means we will not be able to
1182 * read and /proc/<pid> files for the process anymore when /proc is
1183 * mounted with hidepid={1,2}. So let's get the lsm label fd before the
1186 if (attach_lsm(options
) && ctx
->lsm_label
) {
1187 if (!sync_wait_fd(ap
->ipc_socket
, &fd_lsm
)) {
1188 SYSERROR("Failed to receive lsm label fd");
1192 TRACE("Received LSM label file descriptor %d from parent", fd_lsm
);
1195 if (options
->stdin_fd
> 0 && isatty(options
->stdin_fd
)) {
1196 ret
= lxc_make_controlling_terminal(options
->stdin_fd
);
1201 if ((options
->attach_flags
& LXC_ATTACH_SETGROUPS
) &&
1202 options
->groups
.size
> 0) {
1203 if (!lxc_setgroups(options
->groups
.list
, options
->groups
.size
))
1206 if (!lxc_drop_groups() && errno
!= EPERM
)
1210 if (options
->namespaces
& CLONE_NEWUSER
)
1211 if (!lxc_switch_uid_gid(ctx
->setup_ns_uid
, ctx
->setup_ns_gid
))
1214 if (attach_lsm(options
) && ctx
->lsm_label
) {
1217 /* Change into our new LSM profile. */
1218 on_exec
= options
->attach_flags
& LXC_ATTACH_LSM_EXEC
? true : false;
1219 ret
= ctx
->lsm_ops
->process_label_set_at(ctx
->lsm_ops
, fd_lsm
, ctx
->lsm_label
, on_exec
);
1220 close_prot_errno_disarm(fd_lsm
);
1224 TRACE("Set %s LSM label to \"%s\"", ctx
->lsm_ops
->name
, ctx
->lsm_label
);
1227 if (conf
->no_new_privs
|| (options
->attach_flags
& LXC_ATTACH_NO_NEW_PRIVS
)) {
1228 ret
= prctl(PR_SET_NO_NEW_PRIVS
, prctl_arg(1), prctl_arg(0),
1229 prctl_arg(0), prctl_arg(0));
1233 TRACE("Set PR_SET_NO_NEW_PRIVS");
1236 /* The following is done after the communication socket is shut down.
1237 * That way, all errors that might (though unlikely) occur up until this
1238 * point will have their messages printed to the original stderr (if
1239 * logging is so configured) and not the fd the user supplied, if any.
1242 /* Fd handling for stdin, stdout and stderr; ignore errors here, user
1243 * may want to make sure the fds are closed, for example.
1245 if (options
->stdin_fd
>= 0 && options
->stdin_fd
!= STDIN_FILENO
)
1246 if (dup2(options
->stdin_fd
, STDIN_FILENO
) < 0)
1247 SYSDEBUG("Failed to replace stdin with %d", options
->stdin_fd
);
1249 if (options
->stdout_fd
>= 0 && options
->stdout_fd
!= STDOUT_FILENO
)
1250 if (dup2(options
->stdout_fd
, STDOUT_FILENO
) < 0)
1251 SYSDEBUG("Failed to replace stdout with %d", options
->stdout_fd
);
1253 if (options
->stderr_fd
>= 0 && options
->stderr_fd
!= STDERR_FILENO
)
1254 if (dup2(options
->stderr_fd
, STDERR_FILENO
) < 0)
1255 SYSDEBUG("Failed to replace stderr with %d", options
->stderr_fd
);
1257 /* close the old fds */
1258 if (options
->stdin_fd
> STDERR_FILENO
)
1259 close(options
->stdin_fd
);
1261 if (options
->stdout_fd
> STDERR_FILENO
)
1262 close(options
->stdout_fd
);
1264 if (options
->stderr_fd
> STDERR_FILENO
)
1265 close(options
->stderr_fd
);
1268 * Try to remove FD_CLOEXEC flag from stdin/stdout/stderr, but also
1269 * here, ignore errors.
1271 for (int fd
= STDIN_FILENO
; fd
<= STDERR_FILENO
; fd
++) {
1272 ret
= fd_cloexec(fd
, false);
1274 SYSERROR("Failed to clear FD_CLOEXEC from file descriptor %d", fd
);
1279 if (options
->attach_flags
& LXC_ATTACH_TERMINAL
) {
1280 ret
= lxc_terminal_prepare_login(ap
->terminal_pts_fd
);
1282 SYSERROR("Failed to prepare terminal file descriptor %d", ap
->terminal_pts_fd
);
1286 TRACE("Prepared terminal file descriptor %d", ap
->terminal_pts_fd
);
1289 /* Avoid unnecessary syscalls. */
1290 if (ctx
->setup_ns_uid
== ctx
->target_ns_uid
)
1291 ctx
->target_ns_uid
= LXC_INVALID_UID
;
1293 if (ctx
->setup_ns_gid
== ctx
->target_ns_gid
)
1294 ctx
->target_ns_gid
= LXC_INVALID_GID
;
1297 * Make sure that the processes STDIO is correctly owned by the user
1298 * that we are switching to.
1300 ret
= fix_stdio_permissions(ctx
->target_ns_uid
);
1302 INFO("Failed to adjust stdio permissions");
1304 if (conf
->seccomp
.seccomp
) {
1305 ret
= lxc_seccomp_load(conf
);
1309 TRACE("Loaded seccomp profile");
1311 ret
= lxc_seccomp_send_notifier_fd(&conf
->seccomp
, ap
->ipc_socket
);
1314 lxc_seccomp_close_notifier_fd(&conf
->seccomp
);
1317 if (!lxc_switch_uid_gid(ctx
->target_ns_uid
, ctx
->target_ns_gid
))
1320 put_attach_payload(ap
);
1322 /* We're done, so we can now do whatever the user intended us to do. */
1323 _exit(attach_function(attach_function_args
));
1326 ERROR("Failed to attach to container");
1327 put_attach_payload(ap
);
1328 _exit(EXIT_FAILURE
);
1331 static int lxc_attach_terminal(const char *name
, const char *lxcpath
, struct lxc_conf
*conf
,
1332 struct lxc_terminal
*terminal
)
1336 lxc_terminal_init(terminal
);
1338 ret
= lxc_terminal_create(name
, lxcpath
, conf
, terminal
);
1340 return log_error(-1, "Failed to create terminal");
1345 static int lxc_attach_terminal_mainloop_init(struct lxc_terminal
*terminal
,
1346 struct lxc_async_descr
*descr
)
1350 ret
= lxc_mainloop_open(descr
);
1352 return log_error(-1, "Failed to create mainloop");
1354 ret
= lxc_terminal_mainloop_add(descr
, terminal
);
1356 lxc_mainloop_close(descr
);
1357 return log_error(-1, "Failed to add handlers to mainloop");
1363 static inline void lxc_attach_terminal_close_ptx(struct lxc_terminal
*terminal
)
1365 close_prot_errno_disarm(terminal
->ptx
);
1368 static inline void lxc_attach_terminal_close_pts(struct lxc_terminal
*terminal
)
1370 close_prot_errno_disarm(terminal
->pty
);
1373 static inline void lxc_attach_terminal_close_peer(struct lxc_terminal
*terminal
)
1375 close_prot_errno_disarm(terminal
->peer
);
1378 static inline void lxc_attach_terminal_close_log(struct lxc_terminal
*terminal
)
1380 close_prot_errno_disarm(terminal
->log_fd
);
1383 int lxc_attach(struct lxc_container
*container
, lxc_attach_exec_t exec_function
,
1384 void *exec_payload
, lxc_attach_options_t
*options
,
1385 pid_t
*attached_process
)
1387 int ret_parent
= -1;
1388 struct lxc_async_descr descr
= {};
1390 char *name
, *lxcpath
;
1392 pid_t attached_pid
, pid
, to_cleanup_pid
;
1393 struct attach_context
*ctx
;
1394 struct lxc_terminal terminal
;
1395 struct lxc_conf
*conf
;
1398 return ret_set_errno(-1, EINVAL
);
1400 if (!lxc_container_get(container
))
1401 return ret_set_errno(-1, EINVAL
);
1403 name
= container
->name
;
1404 lxcpath
= container
->config_path
;
1407 options
= &attach_static_default_options
;
1408 options
->lsm_label
= NULL
;
1411 ctx
= alloc_attach_context();
1413 lxc_container_put(container
);
1414 return log_error_errno(-ENOMEM
, ENOMEM
, "Failed to allocate attach context");
1417 ret
= get_attach_context(ctx
, container
, options
);
1419 put_attach_context(ctx
);
1420 return log_error(-1, "Failed to get attach context");
1423 conf
= ctx
->container
->lxc_conf
;
1425 if (!fetch_seccomp(ctx
->container
, options
))
1426 WARN("Failed to get seccomp policy");
1428 if (!no_new_privs(ctx
->container
, options
))
1429 WARN("Could not determine whether PR_SET_NO_NEW_PRIVS is set");
1431 ret
= prepare_namespaces(ctx
, options
);
1433 put_attach_context(ctx
);
1434 return log_error(-1, "Failed to get namespace file descriptors");
1437 if (options
->attach_flags
& LXC_ATTACH_TERMINAL
) {
1438 ret
= lxc_attach_terminal(name
, lxcpath
, conf
, &terminal
);
1440 put_attach_context(ctx
);
1441 return log_error(-1, "Failed to setup new terminal");
1444 terminal
.log_fd
= options
->log_fd
;
1446 lxc_terminal_init(&terminal
);
1449 /* Create a socket pair for IPC communication; set SOCK_CLOEXEC in order
1450 * to make sure we don't irritate other threads that want to fork+exec
1453 * IMPORTANT: if the initial process is multithreaded and another call
1454 * just fork()s away without exec'ing directly after, the socket fd will
1455 * exist in the forked process from the other thread and any close() in
1456 * our own child process will not really cause the socket to close
1457 * properly, potentially causing the parent to get stuck.
1459 * For this reason, while IPC is still active, we have to use shutdown()
1460 * if the child exits prematurely in order to signal that the socket is
1461 * closed and cannot assume that the child exiting will automatically do
1464 * IPC mechanism: (X is receiver)
1465 * initial process transient process attached process
1466 * X <--- send pid of
1469 * send 0 ------------------------------------> X
1470 * [do initialization]
1471 * X <------------------------------------ send 1
1472 * [add to cgroup, ...]
1473 * send 2 ------------------------------------> X
1474 * [set LXC_ATTACH_NO_NEW_PRIVS]
1475 * X <------------------------------------ send 3
1476 * [open LSM label fd]
1477 * send 4 ------------------------------------> X
1479 * close socket close socket
1482 ret
= socketpair(PF_LOCAL
, SOCK_STREAM
| SOCK_CLOEXEC
, 0, ipc_sockets
);
1484 put_attach_context(ctx
);
1485 return log_error_errno(-1, errno
, "Could not set up required IPC mechanism for attaching");
1488 /* Create transient process, two reasons:
1489 * 1. We can't setns() in the child itself, since we want to make
1490 * sure we are properly attached to the pidns.
1491 * 2. Also, the initial thread has to put the attached process
1492 * into the cgroup, which we can only do if we didn't already
1493 * setns() (otherwise, user namespaces will hate us).
1497 put_attach_context(ctx
);
1498 return log_error_errno(-1, errno
, "Failed to create first subprocess");
1502 char *cwd
, *new_cwd
;
1504 /* close unneeded file descriptors */
1505 close_prot_errno_disarm(ipc_sockets
[0]);
1507 if (options
->attach_flags
& LXC_ATTACH_TERMINAL
) {
1508 lxc_attach_terminal_close_ptx(&terminal
);
1509 lxc_attach_terminal_close_peer(&terminal
);
1510 lxc_attach_terminal_close_log(&terminal
);
1513 /* Wait for the parent to have setup cgroups. */
1514 if (!sync_wait(ipc_sockets
[1], ATTACH_SYNC_CGROUP
)) {
1515 shutdown(ipc_sockets
[1], SHUT_RDWR
);
1516 put_attach_context(ctx
);
1517 _exit(EXIT_FAILURE
);
1520 if (!attach_context_security_barrier(ctx
)) {
1521 shutdown(ipc_sockets
[1], SHUT_RDWR
);
1522 put_attach_context(ctx
);
1523 _exit(EXIT_FAILURE
);
1526 cwd
= getcwd(NULL
, 0);
1529 * Attach now, create another subprocess later, since pid
1530 * namespaces only really affect the children of the current
1533 * Note that this is a crucial barrier. We're no moving into
1534 * the container's context so we need to make sure to not leak
1535 * anything sensitive. That especially means things such as
1536 * open file descriptors!
1538 ret
= attach_namespaces(ctx
, options
);
1540 ERROR("Failed to enter namespaces");
1541 shutdown(ipc_sockets
[1], SHUT_RDWR
);
1542 put_attach_context(ctx
);
1543 _exit(EXIT_FAILURE
);
1546 /* Attach succeeded, try to cwd. */
1547 if (options
->initial_cwd
)
1548 new_cwd
= options
->initial_cwd
;
1552 ret
= chdir(new_cwd
);
1554 WARN("Could not change directory to \"%s\"", new_cwd
);
1558 /* Create attached process. */
1559 pid
= lxc_raw_clone(CLONE_PARENT
, NULL
);
1561 SYSERROR("Failed to clone attached process");
1562 shutdown(ipc_sockets
[1], SHUT_RDWR
);
1563 put_attach_context(ctx
);
1564 _exit(EXIT_FAILURE
);
1568 struct attach_payload ap
= {
1569 .ipc_socket
= ipc_sockets
[1],
1572 .terminal_pts_fd
= terminal
.pty
,
1573 .exec_function
= exec_function
,
1574 .exec_payload
= exec_payload
,
1577 if (options
->attach_flags
& LXC_ATTACH_TERMINAL
) {
1578 ret
= lxc_terminal_signal_sigmask_safe_blocked(&terminal
);
1580 SYSERROR("Failed to reset signal mask");
1581 _exit(EXIT_FAILURE
);
1585 /* Does not return. */
1588 TRACE("Attached process %d started initializing", pid
);
1590 if (options
->attach_flags
& LXC_ATTACH_TERMINAL
)
1591 lxc_attach_terminal_close_pts(&terminal
);
1593 /* Tell grandparent the pid of the pid of the newly created child. */
1594 if (!sync_wake_pid(ipc_sockets
[1], pid
)) {
1595 /* If this really happens here, this is very unfortunate, since
1596 * the parent will not know the pid of the attached process and
1597 * will not be able to wait for it (and we won't either due to
1598 * CLONE_PARENT) so the parent won't be able to reap it and the
1599 * attached process will remain a zombie.
1601 shutdown(ipc_sockets
[1], SHUT_RDWR
);
1602 put_attach_context(ctx
);
1603 _exit(EXIT_FAILURE
);
1606 /* The rest is in the hands of the initial and the attached process. */
1607 put_attach_context(ctx
);
1608 _exit(EXIT_SUCCESS
);
1610 TRACE("Transient process %d started initializing", pid
);
1612 to_cleanup_pid
= pid
;
1614 /* close unneeded file descriptors */
1615 close_prot_errno_disarm(ipc_sockets
[1]);
1616 put_namespaces(ctx
);
1617 if (options
->attach_flags
& LXC_ATTACH_TERMINAL
)
1618 lxc_attach_terminal_close_pts(&terminal
);
1620 /* Attach to cgroup, if requested. */
1621 if (options
->attach_flags
& LXC_ATTACH_MOVE_TO_CGROUP
) {
1623 * If this is the unified hierarchy cgroup_attach() is
1626 ret
= cgroup_attach(conf
, name
, lxcpath
, pid
);
1628 call_cleaner(cgroup_exit
) struct cgroup_ops
*cgroup_ops
= NULL
;
1629 if (!ERRNO_IS_NOT_SUPPORTED(ret
)) {
1630 SYSERROR("Failed to attach cgroup");
1634 cgroup_ops
= cgroup_init(conf
);
1638 if (!cgroup_ops
->attach(cgroup_ops
, conf
, name
, lxcpath
, pid
))
1642 TRACE("Moved transient process %d into container cgroup", pid
);
1646 * Close sensitive file descriptors we don't need anymore. Even if
1649 if (!attach_context_security_barrier(ctx
))
1652 /* Setup /proc limits */
1653 ret
= setup_proc_filesystem(conf
, pid
);
1657 /* Setup resource limits */
1658 ret
= setup_resource_limits(conf
, pid
);
1662 if (options
->attach_flags
& LXC_ATTACH_TERMINAL
) {
1663 ret
= lxc_attach_terminal_mainloop_init(&terminal
, &descr
);
1667 TRACE("Initialized terminal mainloop");
1670 /* Let the child process know to go ahead. */
1671 if (!sync_wake(ipc_sockets
[0], ATTACH_SYNC_CGROUP
))
1672 goto close_mainloop
;
1674 TRACE("Told transient process to start initializing");
1676 /* Get pid of attached process from transient process. */
1677 if (!sync_wait_pid(ipc_sockets
[0], &attached_pid
))
1678 goto close_mainloop
;
1680 TRACE("Received pid %d of attached process in parent pid namespace", attached_pid
);
1682 /* Ignore SIGKILL (CTRL-C) and SIGQUIT (CTRL-\) - issue #313. */
1683 if (options
->stdin_fd
== STDIN_FILENO
) {
1684 signal(SIGINT
, SIG_IGN
);
1685 signal(SIGQUIT
, SIG_IGN
);
1688 /* Reap transient process. */
1689 ret
= wait_for_pid(pid
);
1691 goto close_mainloop
;
1693 TRACE("Transient process %d exited", pid
);
1695 /* We will always have to reap the attached process now. */
1696 to_cleanup_pid
= attached_pid
;
1698 /* Open LSM fd and send it to child. */
1699 if (attach_lsm(options
) && ctx
->lsm_label
) {
1700 __do_close
int fd_lsm
= -EBADF
;
1703 on_exec
= options
->attach_flags
& LXC_ATTACH_LSM_EXEC
? true : false;
1704 fd_lsm
= ctx
->lsm_ops
->process_label_fd_get(ctx
->lsm_ops
, attached_pid
, on_exec
);
1706 goto close_mainloop
;
1708 TRACE("Opened LSM label file descriptor %d", fd_lsm
);
1710 /* Send child fd of the LSM security module to write to. */
1711 if (!sync_wake_fd(ipc_sockets
[0], fd_lsm
)) {
1712 SYSERROR("Failed to send lsm label fd");
1713 goto close_mainloop
;
1716 TRACE("Sent LSM label file descriptor %d to child", fd_lsm
);
1719 if (conf
->seccomp
.seccomp
) {
1720 ret
= lxc_seccomp_recv_notifier_fd(&conf
->seccomp
, ipc_sockets
[0]);
1722 goto close_mainloop
;
1724 ret
= lxc_seccomp_add_notifier(name
, lxcpath
, &conf
->seccomp
);
1726 goto close_mainloop
;
1729 /* We're done, the child process should now execute whatever it
1730 * is that the user requested. The parent can now track it with
1731 * waitpid() or similar.
1734 *attached_process
= attached_pid
;
1736 /* Now shut down communication with child, we're done. */
1737 shutdown(ipc_sockets
[0], SHUT_RDWR
);
1738 close_prot_errno_disarm(ipc_sockets
[0]);
1741 to_cleanup_pid
= -1;
1743 if (options
->attach_flags
& LXC_ATTACH_TERMINAL
) {
1744 ret
= lxc_mainloop(&descr
, -1);
1747 to_cleanup_pid
= attached_pid
;
1752 if (options
->attach_flags
& LXC_ATTACH_TERMINAL
)
1753 lxc_mainloop_close(&descr
);
1756 if (ipc_sockets
[0] >= 0) {
1757 shutdown(ipc_sockets
[0], SHUT_RDWR
);
1758 close_prot_errno_disarm(ipc_sockets
[0]);
1761 if (to_cleanup_pid
> 0)
1762 (void)wait_for_pid(to_cleanup_pid
);
1764 if (options
->attach_flags
& LXC_ATTACH_TERMINAL
) {
1765 lxc_terminal_delete(&terminal
);
1766 lxc_terminal_conf_free(&terminal
);
1769 put_attach_context(ctx
);
1773 int lxc_attach_run_command(void *payload
)
1776 lxc_attach_command_t
*cmd
= payload
;
1778 ret
= execvp(cmd
->program
, cmd
->argv
);
1790 return log_error_errno(ret
, errno
, "Failed to exec \"%s\"", cmd
->program
);
1793 int lxc_attach_run_shell(void* payload
)
1795 __do_free
char *buf
= NULL
;
1797 struct passwd pwent
;
1798 struct passwd
*pwentp
= NULL
;
1803 /* Ignore payload parameter. */
1808 bufsize
= sysconf(_SC_GETPW_R_SIZE_MAX
);
1812 buf
= malloc(bufsize
);
1814 ret
= getpwuid_r(uid
, &pwent
, buf
, bufsize
, &pwentp
);
1817 WARN("Could not find matched password record");
1819 WARN("Failed to get password record - %u", uid
);
1823 /* This probably happens because of incompatible nss implementations in
1824 * host and container (remember, this code is still using the host's
1825 * glibc but our mount namespace is in the container) we may try to get
1826 * the information by spawning a [getent passwd uid] process and parsing
1830 user_shell
= lxc_attach_getpwshell(uid
);
1832 user_shell
= pwent
.pw_shell
;
1835 execlp(user_shell
, user_shell
, (char *)NULL
);
1837 /* Executed if either no passwd entry or execvp fails, we will fall back
1838 * on /bin/sh as a default shell.
1840 execlp("/bin/sh", "/bin/sh", (char *)NULL
);
1842 SYSERROR("Failed to execute shell");