]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/attach.c
Merge pull request #3947 from blenk92/fix-missing-seccomp
[mirror_lxc.git] / src / lxc / attach.c
CommitLineData
cc73685d 1/* SPDX-License-Identifier: LGPL-2.1+ */
e0732705 2
d38dd64a
CB
3#ifndef _GNU_SOURCE
4#define _GNU_SOURCE 1
5#endif
e0732705
CS
6#include <errno.h>
7#include <fcntl.h>
c476bdce 8#include <grp.h>
604ca1c0 9#include <linux/unistd.h>
6f4f1937 10#include <pwd.h>
0bece477 11#include <pthread.h>
6f4f1937
CB
12#include <signal.h>
13#include <stdio.h>
14#include <stdlib.h>
15#include <string.h>
6f4f1937 16#include <sys/mount.h>
e0732705
CS
17#include <sys/param.h>
18#include <sys/prctl.h>
5ec27989 19#include <sys/socket.h>
1ba0013f 20#include <sys/syscall.h>
905022f7 21#include <sys/wait.h>
604ca1c0
CB
22#include <termios.h>
23#include <unistd.h>
6f4f1937
CB
24
25#include <lxc/lxccontainer.h>
e0732705 26
81f466d0 27#include "af_unix.h"
e0732705
CS
28#include "attach.h"
29#include "caps.h"
c988c8b1
CB
30#include "cgroups/cgroup.h"
31#include "cgroups/cgroup_utils.h"
6f4f1937 32#include "commands.h"
2c4ea790 33#include "conf.h"
6f4f1937 34#include "config.h"
9b8e3c96 35#include "confile.h"
6f4f1937
CB
36#include "log.h"
37#include "lsm/lsm.h"
38#include "lxclock.h"
39#include "lxcseccomp.h"
604ca1c0 40#include "macro.h"
ba2be1a8 41#include "mainloop.h"
cd8f5663 42#include "memory_utils.h"
657256e0 43#include "mount_utils.h"
6f4f1937 44#include "namespace.h"
f40988c7 45#include "process_utils.h"
a9f0cecf 46#include "sync.h"
59524108 47#include "syscall_wrappers.h"
0ed9b1bc 48#include "terminal.h"
6f4f1937 49#include "utils.h"
9c4693b8 50
ac2cecc4 51lxc_log_define(attach, lxc);
e0732705 52
ef05d368
CB
53/* Define default options if no options are supplied by the user. */
54static lxc_attach_options_t attach_static_default_options = LXC_ATTACH_OPTIONS_DEFAULT;
55
20718e39
CB
56/*
57 * The context used to attach to the container.
3ac4480a
CB
58 * @attach_flags : the attach flags specified in lxc_attach_options_t
59 * @init_pid : the PID of the container's init process
60 * @dfd_init_pid : file descriptor to /proc/@init_pid
61 * __Must be closed in attach_context_security_barrier()__!
62 * @dfd_self_pid : file descriptor to /proc/self
63 * __Must be closed in attach_context_security_barrier()__!
64 * @setup_ns_uid : if CLONE_NEWUSER is specified will contain the uid used
65 * during attach setup.
66 * @setup_ns_gid : if CLONE_NEWUSER is specified will contain the gid used
67 * during attach setup.
68 * @target_ns_uid : if CLONE_NEWUSER is specified the uid that the final
69 * program will be run with.
70 * @target_ns_gid : if CLONE_NEWUSER is specified the gid that the final
71 * program will be run with.
72 * @target_host_uid : if CLONE_NEWUSER is specified the uid that the final
73 * program will be run with on the host.
74 * @target_host_gid : if CLONE_NEWUSER is specified the gid that the final
75 * program will be run with on the host.
76 * @lsm_label : LSM label to be used for the attaching process
77 * @container : the container we're attaching o
78 * @personality : the personality to use for the final program
79 * @capability : the capability mask of the @init_pid
80 * @ns_inherited : flags of namespaces that the final program will inherit
81 * from @init_pid
82 * @ns_fd : file descriptors to @init_pid's namespaces
20718e39 83 */
ab919e5f 84struct attach_context {
afc691a0 85 unsigned int attach_flags;
500ed813 86 int init_pid;
9b31ab58 87 int init_pidfd;
25c659d5
CB
88 int dfd_init_pid;
89 int dfd_self_pid;
3ac4480a
CB
90 uid_t setup_ns_uid;
91 gid_t setup_ns_gid;
92 uid_t target_ns_uid;
93 gid_t target_ns_gid;
94 uid_t target_host_uid;
95 uid_t target_host_gid;
0e304baa
CB
96 char *lsm_label;
97 struct lxc_container *container;
64a04c84 98 personality_t personality;
0e304baa
CB
99 unsigned long long capability_mask;
100 int ns_inherited;
101 int ns_fd[LXC_NS_MAX];
102 struct lsm_ops *lsm_ops;
103};
104
6f0c2cea 105static pid_t pidfd_get_pid(int dfd_init_pid, int pidfd)
d8764025
CB
106{
107 __do_free char *line = NULL;
108 __do_fclose FILE *f = NULL;
109 size_t len = 0;
6f0c2cea 110 char path[STRLITERALLEN("fdinfo/") + INTTYPE_TO_STRLEN(int) + 1 ] = "fdinfo/";
d8764025
CB
111 int ret;
112
6f0c2cea
CB
113 if (dfd_init_pid < 0 || pidfd < 0)
114 return ret_errno(EBADF);
d8764025 115
f51c7eb4
CB
116 ret = strnprintf(path + STRLITERALLEN("fdinfo/"), INTTYPE_TO_STRLEN(int), "%d", pidfd);
117 if (ret < 0)
d8764025
CB
118 return ret_errno(EIO);
119
6f0c2cea 120 f = fdopen_at(dfd_init_pid, path, "re", PROTECT_OPEN, PROTECT_LOOKUP_BENEATH);
d8764025
CB
121 if (!f)
122 return -errno;
123
124 while (getline(&line, &len, f) != -1) {
125 const char *prefix = "Pid:\t";
126 const size_t prefix_len = STRLITERALLEN("Pid:\t");
127 int pid = -ESRCH;
128 char *slider = line;
129
6a6c7030 130 if (!strnequal(slider, prefix, prefix_len))
d8764025
CB
131 continue;
132
133 slider += prefix_len;
134 slider = lxc_trim_whitespace_in_place(slider);
135
136 ret = lxc_safe_int(slider, &pid);
137 if (ret)
138 return -ret;
139
140 return pid;
141 }
142
143 return ret_errno(ENOENT);
144}
145
6e36c297
CB
146static inline bool sync_wake_pid(int fd, pid_t pid)
147{
148 return lxc_write_nointr(fd, &pid, sizeof(pid_t)) == sizeof(pid_t);
149}
150
151static inline bool sync_wait_pid(int fd, pid_t *pid)
152{
153 return lxc_read_nointr(fd, pid, sizeof(pid_t)) == sizeof(pid_t);
154}
155
156static inline bool sync_wake_fd(int fd, int fd_send)
157{
158 return lxc_abstract_unix_send_fds(fd, &fd_send, 1, NULL, 0) > 0;
159}
160
161static inline bool sync_wait_fd(int fd, int *fd_recv)
162{
d17c815d 163 return lxc_abstract_unix_recv_one_fd(fd, fd_recv, NULL, 0) > 0;
6e36c297
CB
164}
165
afc691a0
CB
166static bool attach_lsm(lxc_attach_options_t *options)
167{
168 return (options->namespaces & CLONE_NEWNS) &&
169 (options->attach_flags & (LXC_ATTACH_LSM | LXC_ATTACH_LSM_LABEL));
170}
171
9745eb8a
CB
172static struct attach_context *alloc_attach_context(void)
173{
581b849a
CB
174 struct attach_context *ctx;
175
176 ctx = zalloc(sizeof(struct attach_context));
177 if (!ctx)
178 return ret_set_errno(NULL, ENOMEM);
179
f620ed44
CB
180 ctx->init_pid = -ESRCH;
181
9b31ab58
CB
182 ctx->dfd_self_pid = -EBADF;
183 ctx->dfd_init_pid = -EBADF;
184 ctx->init_pidfd = -EBADF;
f620ed44 185
9b31ab58
CB
186 ctx->setup_ns_uid = LXC_INVALID_UID;
187 ctx->setup_ns_gid = LXC_INVALID_GID;
188 ctx->target_ns_uid = LXC_INVALID_UID;
189 ctx->target_ns_gid = LXC_INVALID_GID;
190 ctx->target_host_uid = LXC_INVALID_UID;
191 ctx->target_host_gid = LXC_INVALID_GID;
581b849a 192
2533995e 193 for (lxc_namespace_t i = 0; i < LXC_NS_MAX; i++)
581b849a
CB
194 ctx->ns_fd[i] = -EBADF;
195
196 return ctx;
9745eb8a
CB
197}
198
ee142207 199static int get_personality(const char *name, const char *lxcpath,
64a04c84 200 personality_t *personality)
d92c8e40
CB
201{
202 __do_free char *p = NULL;
7c43fa56 203 int ret;
ee142207 204 signed long per;
d92c8e40
CB
205
206 p = lxc_cmd_get_config_item(name, "lxc.arch", lxcpath);
ee142207
CB
207 if (!p) {
208 *personality = LXC_ARCH_UNCHANGED;
209 return 0;
210 }
d92c8e40 211
7c43fa56
CB
212 ret = lxc_config_parse_arch(p, &per);
213 if (ret < 0)
214 return syserror("Failed to parse personality");
ee142207
CB
215
216 *personality = per;
217 return 0;
d92c8e40
CB
218}
219
4475fabb 220static int userns_setup_ids(struct attach_context *ctx,
3ac4480a 221 lxc_attach_options_t *options)
4475fabb
CB
222{
223 __do_free char *line = NULL;
224 __do_fclose FILE *f_gidmap = NULL, *f_uidmap = NULL;
225 size_t len = 0;
226 uid_t init_ns_uid = LXC_INVALID_UID;
227 gid_t init_ns_gid = LXC_INVALID_GID;
228 uid_t nsuid, hostuid, range_uid;
229 gid_t nsgid, hostgid, range_gid;
230
231 if (!(options->namespaces & CLONE_NEWUSER))
232 return 0;
233
72a19d2f 234 f_uidmap = fdopen_at(ctx->dfd_init_pid, "uid_map", "re", PROTECT_OPEN, PROTECT_LOOKUP_BENEATH);
4475fabb
CB
235 if (!f_uidmap)
236 return log_error_errno(-errno, errno, "Failed to open uid_map");
237
238 while (getline(&line, &len, f_uidmap) != -1) {
239 if (sscanf(line, "%u %u %u", &nsuid, &hostuid, &range_uid) != 3)
240 continue;
241
242 if (0 >= nsuid && 0 < nsuid + range_uid) {
3ac4480a 243 ctx->setup_ns_uid = 0;
4475fabb
CB
244 TRACE("Container has mapping for uid 0");
245 break;
246 }
247
3ac4480a
CB
248 if (ctx->target_host_uid >= hostuid && ctx->target_host_uid < hostuid + range_uid) {
249 init_ns_uid = (ctx->target_host_uid - hostuid) + nsuid;
4475fabb
CB
250 TRACE("Container runs with uid %d", init_ns_uid);
251 }
252 }
253
72a19d2f 254 f_gidmap = fdopen_at(ctx->dfd_init_pid, "gid_map", "re", PROTECT_OPEN, PROTECT_LOOKUP_BENEATH);
4475fabb
CB
255 if (!f_gidmap)
256 return log_error_errno(-errno, errno, "Failed to open gid_map");
257
258 while (getline(&line, &len, f_gidmap) != -1) {
259 if (sscanf(line, "%u %u %u", &nsgid, &hostgid, &range_gid) != 3)
260 continue;
261
262 if (0 >= nsgid && 0 < nsgid + range_gid) {
3ac4480a 263 ctx->setup_ns_gid = 0;
4475fabb
CB
264 TRACE("Container has mapping for gid 0");
265 break;
266 }
267
3ac4480a
CB
268 if (ctx->target_host_gid >= hostgid && ctx->target_host_gid < hostgid + range_gid) {
269 init_ns_gid = (ctx->target_host_gid - hostgid) + nsgid;
4475fabb
CB
270 TRACE("Container runs with gid %d", init_ns_gid);
271 }
272 }
273
3ac4480a
CB
274 if (ctx->setup_ns_uid == LXC_INVALID_UID)
275 ctx->setup_ns_uid = init_ns_uid;
4475fabb 276
3ac4480a
CB
277 if (ctx->setup_ns_gid == LXC_INVALID_UID)
278 ctx->setup_ns_gid = init_ns_gid;
4475fabb 279
4475fabb
CB
280 return 0;
281}
282
283static void userns_target_ids(struct attach_context *ctx, lxc_attach_options_t *options)
284{
285 if (options->uid != LXC_INVALID_UID)
3ac4480a 286 ctx->target_ns_uid = options->uid;
4475fabb 287 else if (options->namespaces & CLONE_NEWUSER)
3ac4480a 288 ctx->target_ns_uid = ctx->setup_ns_uid;
4475fabb 289 else
3ac4480a 290 ctx->target_ns_uid = 0;
4475fabb 291
3ac4480a 292 if (ctx->target_ns_uid == LXC_INVALID_UID)
4475fabb
CB
293 WARN("Invalid uid specified");
294
295 if (options->gid != LXC_INVALID_GID)
3ac4480a 296 ctx->target_ns_gid = options->gid;
4475fabb 297 else if (options->namespaces & CLONE_NEWUSER)
3ac4480a 298 ctx->target_ns_gid = ctx->setup_ns_gid;
4475fabb 299 else
3ac4480a 300 ctx->target_ns_gid = 0;
4475fabb 301
3ac4480a 302 if (ctx->target_ns_gid == LXC_INVALID_GID)
4475fabb
CB
303 WARN("Invalid gid specified");
304}
305
9680e7b0
CB
306static int parse_init_status(struct attach_context *ctx, lxc_attach_options_t *options)
307{
308 __do_free char *line = NULL;
309 __do_fclose FILE *f = NULL;
310 size_t len = 0;
311 bool caps_found = false;
4475fabb 312 int ret;
9680e7b0 313
72a19d2f 314 f = fdopen_at(ctx->dfd_init_pid, "status", "re", PROTECT_OPEN, PROTECT_LOOKUP_BENEATH);
9680e7b0 315 if (!f)
4475fabb 316 return log_error_errno(-errno, errno, "Failed to open status file");
9680e7b0
CB
317
318 while (getline(&line, &len, f) != -1) {
319 signed long value = -1;
9680e7b0 320
4475fabb
CB
321 /*
322 * Format is: real, effective, saved set user, fs we only care
323 * about real uid.
324 */
325 ret = sscanf(line, "Uid: %ld", &value);
326 if (ret != EOF && ret == 1) {
3ac4480a
CB
327 ctx->target_host_uid = (uid_t)value;
328 TRACE("Container's init process runs with hostuid %d", ctx->target_host_uid);
4475fabb
CB
329 goto next;
330 }
9680e7b0 331
4475fabb
CB
332 ret = sscanf(line, "Gid: %ld", &value);
333 if (ret != EOF && ret == 1) {
3ac4480a
CB
334 ctx->target_host_gid = (gid_t)value;
335 TRACE("Container's init process runs with hostgid %d", ctx->target_host_gid);
4475fabb 336 goto next;
9680e7b0
CB
337 }
338
339 ret = sscanf(line, "CapBnd: %llx", &ctx->capability_mask);
340 if (ret != EOF && ret == 1) {
341 caps_found = true;
342 goto next;
343 }
344
345 next:
3ac4480a
CB
346 if (ctx->target_host_uid != LXC_INVALID_UID &&
347 ctx->target_host_gid != LXC_INVALID_GID &&
4475fabb 348 caps_found)
9680e7b0
CB
349 break;
350
351 }
352
3ac4480a 353 ret = userns_setup_ids(ctx, options);
4475fabb
CB
354 if (ret)
355 return log_error_errno(ret, errno, "Failed to get setup ids");
356 userns_target_ids(ctx, options);
357
9680e7b0
CB
358 return 0;
359}
360
9b31ab58
CB
361static bool pidfd_setns_supported(struct attach_context *ctx)
362{
363 int ret;
364
365 /*
366 * The ability to attach to time namespaces came after the introduction
367 * of of using pidfds for attaching to namespaces. To avoid having to
368 * special-case both CLONE_NEWUSER and CLONE_NEWTIME handling, let's
369 * use CLONE_NEWTIME as gatekeeper.
370 */
371 if (ctx->init_pidfd >= 0)
372 ret = setns(ctx->init_pidfd, CLONE_NEWTIME);
373 else
374 ret = -EOPNOTSUPP;
375 TRACE("Attaching to namespaces via pidfds %s",
376 ret ? "unsupported" : "supported");
377 return ret == 0;
378}
379
500ed813 380static int get_attach_context(struct attach_context *ctx,
afc691a0
CB
381 struct lxc_container *container,
382 lxc_attach_options_t *options)
e0732705 383{
9680e7b0 384 __do_free char *lsm_label = NULL;
6f4f1937 385 int ret;
c538837d 386 char path[LXC_PROC_PID_LEN];
e0732705 387
500ed813 388 ctx->container = container;
afc691a0 389 ctx->attach_flags = options->attach_flags;
500ed813 390
6f0c2cea
CB
391 ctx->dfd_self_pid = open_at(-EBADF, "/proc/self",
392 PROTECT_OPATH_FILE & ~O_NOFOLLOW,
393 (PROTECT_LOOKUP_ABSOLUTE_WITH_SYMLINKS & ~RESOLVE_NO_XDEV), 0);
394 if (ctx->dfd_self_pid < 0)
395 return log_error_errno(-errno, errno, "Failed to open /proc/self");
396
9b31ab58
CB
397 ctx->init_pidfd = lxc_cmd_get_init_pidfd(container->name, container->config_path);
398 if (ctx->init_pidfd >= 0)
399 ctx->init_pid = pidfd_get_pid(ctx->dfd_self_pid, ctx->init_pidfd);
d8764025
CB
400 else
401 ctx->init_pid = lxc_cmd_get_init_pid(container->name, container->config_path);
500ed813
CB
402 if (ctx->init_pid < 0)
403 return log_error(-1, "Failed to get init pid");
404
f51c7eb4
CB
405 ret = strnprintf(path, sizeof(path), "/proc/%d", ctx->init_pid);
406 if (ret < 0)
c538837d 407 return ret_errno(EIO);
e0732705 408
5129b2d3
CB
409 ctx->dfd_init_pid = open_at(-EBADF, path,
410 PROTECT_OPATH_DIRECTORY,
411 (PROTECT_LOOKUP_ABSOLUTE & ~RESOLVE_NO_XDEV), 0);
9680e7b0 412 if (ctx->dfd_init_pid < 0)
4475fabb 413 return log_error_errno(-errno, errno, "Failed to open /proc/%d", ctx->init_pid);
c538837d 414
9b31ab58
CB
415 if (ctx->init_pidfd >= 0) {
416 ret = lxc_raw_pidfd_send_signal(ctx->init_pidfd, 0, NULL, 0);
d8764025
CB
417 if (ret)
418 return log_error_errno(-errno, errno, "Container process exited or PID has been recycled");
419 else
420 TRACE("Container process still running and PID was not recycled");
9b31ab58
CB
421
422 if (!pidfd_setns_supported(ctx)) {
423 /* We can't risk leaking file descriptors during attach. */
424 if (close(ctx->init_pidfd))
425 return log_error_errno(-errno, errno, "Failed to close pidfd");
426
427 ctx->init_pidfd = -EBADF;
428 TRACE("Attaching to namespaces via pidfds not supported");
429 }
d8764025
CB
430 }
431
4475fabb
CB
432 /* Determine which namespaces the container was created with. */
433 if (options->namespaces == -1) {
434 options->namespaces = lxc_cmd_get_clone_flags(container->name, container->config_path);
435 if (options->namespaces == -1)
436 return log_error_errno(-EINVAL, EINVAL, "Failed to automatically determine the namespaces which the container uses");
437
2533995e 438 for (lxc_namespace_t i = 0; i < LXC_NS_MAX; i++) {
4475fabb
CB
439 if (ns_info[i].clone_flag & CLONE_NEWCGROUP)
440 if (!(options->attach_flags & LXC_ATTACH_MOVE_TO_CGROUP) ||
441 !cgns_supported())
442 continue;
443
444 if (ns_info[i].clone_flag & options->namespaces)
445 continue;
446
447 ctx->ns_inherited |= ns_info[i].clone_flag;
448 }
449 }
450
9680e7b0
CB
451 ret = parse_init_status(ctx, options);
452 if (ret)
4475fabb 453 return log_error_errno(-errno, errno, "Failed to open parse file");
e0732705 454
4eb19ac0 455 ctx->lsm_ops = lsm_init_static();
d701d729 456
afc691a0
CB
457 if (attach_lsm(options)) {
458 if (ctx->attach_flags & LXC_ATTACH_LSM_LABEL)
459 lsm_label = options->lsm_label;
460 else
9680e7b0 461 lsm_label = ctx->lsm_ops->process_label_get_at(ctx->lsm_ops, ctx->dfd_init_pid);
afc691a0
CB
462 if (!lsm_label)
463 WARN("No security context received");
464 else
465 INFO("Retrieved security context %s", lsm_label);
466 }
e0732705 467
ee142207
CB
468 ret = get_personality(container->name, container->config_path, &ctx->personality);
469 if (ret)
470 return log_error_errno(ret, errno, "Failed to get personality of the container");
d92c8e40 471
1874ef74
CB
472 if (!ctx->container->lxc_conf) {
473 ctx->container->lxc_conf = lxc_conf_init();
474 if (!ctx->container->lxc_conf)
475 return log_error_errno(-ENOMEM, ENOMEM, "Failed to allocate new lxc config");
476 }
477
afc691a0 478 ctx->lsm_label = move_ptr(lsm_label);
9745eb8a 479 return 0;
e0732705
CS
480}
481
9b31ab58 482static int same_nsfd(int dfd_pid1, int dfd_pid2, const char *ns_path)
299d1198 483{
9b31ab58 484 int ret;
299d1198
CB
485 struct stat ns_st1, ns_st2;
486
9b31ab58
CB
487 ret = fstatat(dfd_pid1, ns_path, &ns_st1, 0);
488 if (ret)
3a6678c7 489 return -errno;
299d1198 490
9b31ab58
CB
491 ret = fstatat(dfd_pid2, ns_path, &ns_st2, 0);
492 if (ret)
3a6678c7 493 return -errno;
299d1198
CB
494
495 /* processes are in the same namespace */
9b31ab58
CB
496 if ((ns_st1.st_dev == ns_st2.st_dev) &&
497 (ns_st1.st_ino == ns_st2.st_ino))
3a6678c7 498 return 1;
299d1198 499
9b31ab58
CB
500 return 0;
501}
502
503static int same_ns(int dfd_pid1, int dfd_pid2, const char *ns_path)
504{
505 __do_close int ns_fd2 = -EBADF;
506 int ret = -1;
507
508 ns_fd2 = open_at(dfd_pid2, ns_path, PROTECT_OPEN_WITH_TRAILING_SYMLINKS,
509 (PROTECT_LOOKUP_BENEATH_WITH_MAGICLINKS &
510 ~(RESOLVE_NO_XDEV | RESOLVE_BENEATH)), 0);
511 if (ns_fd2 < 0) {
9b31ab58 512 if (errno == ENOENT)
fb54db2c 513 return -ENOENT;
2d7b0895 514 return syserror("Failed to open %d(%s)", dfd_pid2, ns_path);
9b31ab58
CB
515 }
516
517 ret = same_nsfd(dfd_pid1, dfd_pid2, ns_path);
3a6678c7
CB
518 switch (ret) {
519 case -ENOENT:
520 __fallthrough;
521 case 1:
522 return ret_errno(ENOENT);
523 case 0:
524 /* processes are in different namespaces */
525 return move_fd(ns_fd2);
526 }
9b31ab58 527
3a6678c7 528 return ret;
299d1198
CB
529}
530
9b31ab58
CB
531static int __prepare_namespaces_pidfd(struct attach_context *ctx)
532{
2533995e 533 for (lxc_namespace_t i = 0; i < LXC_NS_MAX; i++) {
9b31ab58
CB
534 int ret;
535
9b31ab58
CB
536 ret = same_nsfd(ctx->dfd_self_pid,
537 ctx->dfd_init_pid,
538 ns_info[i].proc_path);
3a6678c7
CB
539 switch (ret) {
540 case -ENOENT:
541 __fallthrough;
542 case 1:
9b31ab58 543 ctx->ns_inherited &= ~ns_info[i].clone_flag;
0b8e876f
CB
544 TRACE("Shared %s namespace doesn't need attach", ns_info[i].proc_name);
545 continue;
3a6678c7 546 case 0:
0b8e876f
CB
547 TRACE("Different %s namespace needs attach", ns_info[i].proc_name);
548 continue;
3a6678c7
CB
549 }
550
2d7b0895 551 return syserror("Failed to determine whether %s namespace is shared",
3a6678c7 552 ns_info[i].proc_name);
9b31ab58
CB
553 }
554
555 return 0;
556}
557
558static int __prepare_namespaces_nsfd(struct attach_context *ctx,
559 lxc_attach_options_t *options)
b7873c95 560{
2533995e
CB
561 for (lxc_namespace_t i = 0; i < LXC_NS_MAX; i++) {
562 lxc_namespace_t j;
b7873c95
CB
563
564 if (options->namespaces & ns_info[i].clone_flag)
5129b2d3
CB
565 ctx->ns_fd[i] = open_at(ctx->dfd_init_pid,
566 ns_info[i].proc_path,
567 PROTECT_OPEN_WITH_TRAILING_SYMLINKS,
9b31ab58
CB
568 (PROTECT_LOOKUP_BENEATH_WITH_MAGICLINKS &
569 ~(RESOLVE_NO_XDEV | RESOLVE_BENEATH)),
5129b2d3 570 0);
b7873c95 571 else if (ctx->ns_inherited & ns_info[i].clone_flag)
5129b2d3
CB
572 ctx->ns_fd[i] = same_ns(ctx->dfd_self_pid,
573 ctx->dfd_init_pid,
574 ns_info[i].proc_path);
b7873c95
CB
575 else
576 continue;
577
578 if (ctx->ns_fd[i] >= 0)
579 continue;
580
fb54db2c 581 if (ctx->ns_fd[i] == -ENOENT) {
b7873c95
CB
582 ctx->ns_inherited &= ~ns_info[i].clone_flag;
583 continue;
584 }
585
586 /* We failed to preserve the namespace. */
9b31ab58
CB
587 SYSERROR("Failed to preserve %s namespace of %d",
588 ns_info[i].proc_name, ctx->init_pid);
b7873c95
CB
589
590 /* Close all already opened file descriptors before we return an
591 * error, so we don't leak them.
592 */
593 for (j = 0; j < i; j++)
594 close_prot_errno_disarm(ctx->ns_fd[j]);
595
596 return -1;
597 }
598
599 return 0;
600}
601
9b31ab58
CB
602static int prepare_namespaces(struct attach_context *ctx,
603 lxc_attach_options_t *options)
b7873c95 604{
9b31ab58
CB
605 if (ctx->init_pidfd < 0)
606 return __prepare_namespaces_nsfd(ctx, options);
607
608 return __prepare_namespaces_pidfd(ctx);
b7873c95
CB
609}
610
9b31ab58 611static inline void put_namespaces(struct attach_context *ctx)
b7873c95 612{
9b31ab58
CB
613 if (ctx->init_pidfd < 0) {
614 for (int i = 0; i < LXC_NS_MAX; i++)
615 close_prot_errno_disarm(ctx->ns_fd[i]);
616 }
617}
b7873c95 618
9b31ab58
CB
619static int __attach_namespaces_pidfd(struct attach_context *ctx,
620 lxc_attach_options_t *options)
621{
622 unsigned int ns_flags = options->namespaces | ctx->ns_inherited;
623 int ret;
b7873c95 624
9b31ab58
CB
625 /* The common case is to attach to all namespaces. */
626 ret = setns(ctx->init_pidfd, ns_flags);
627 if (ret)
628 return log_error_errno(-errno, errno,
629 "Failed to attach to namespaces via pidfd");
630
631 /* We can't risk leaking file descriptors into the container. */
632 if (close(ctx->init_pidfd))
633 return log_error_errno(-errno, errno, "Failed to close pidfd");
634 ctx->init_pidfd = -EBADF;
635
636 return log_trace(0, "Attached to container namespaces via pidfd");
b7873c95
CB
637}
638
9b31ab58
CB
639static int __attach_namespaces_nsfd(struct attach_context *ctx,
640 lxc_attach_options_t *options)
99d50954 641{
92466fe3
CB
642 int fret = 0;
643
2533995e 644 for (lxc_namespace_t i = 0; i < LXC_NS_MAX; i++) {
ffeeed8b 645 int ret;
99d50954 646
877f3a04 647 if (ctx->ns_fd[i] < 0)
26818618
CB
648 continue;
649
21d0acc2 650 ret = setns(ctx->ns_fd[i], ns_info[i].clone_flag);
92466fe3 651 if (ret)
9b31ab58
CB
652 return log_error_errno(-errno, errno,
653 "Failed to attach to %s namespace of %d",
654 ns_info[i].proc_name,
655 ctx->init_pid);
92466fe3
CB
656
657 if (close(ctx->ns_fd[i])) {
658 fret = -errno;
9b31ab58
CB
659 SYSERROR("Failed to close file descriptor for %s namespace",
660 ns_info[i].proc_name);
92466fe3
CB
661 }
662 ctx->ns_fd[i] = -EBADF;
99d50954
CS
663 }
664
92466fe3 665 return fret;
99d50954
CS
666}
667
9b31ab58
CB
668static int attach_namespaces(struct attach_context *ctx,
669 lxc_attach_options_t *options)
670{
671 if (lxc_log_trace()) {
2533995e 672 for (lxc_namespace_t i = 0; i < LXC_NS_MAX; i++) {
9b31ab58
CB
673 if (ns_info[i].clone_flag & options->namespaces) {
674 TRACE("Attaching to %s namespace", ns_info[i].proc_name);
675 continue;
676 }
677 if (ns_info[i].clone_flag & ctx->ns_inherited) {
678 TRACE("Sharing %s namespace", ns_info[i].proc_name);
679 continue;
680 }
681 TRACE("Inheriting %s namespace", ns_info[i].proc_name);
682 }
683 }
684
685 if (ctx->init_pidfd < 0)
686 return __attach_namespaces_nsfd(ctx, options);
687
688 return __attach_namespaces_pidfd(ctx, options);
689}
690
691static void put_attach_context(struct attach_context *ctx)
692{
693 if (ctx) {
694 if (!(ctx->attach_flags & LXC_ATTACH_LSM_LABEL))
695 free_disarm(ctx->lsm_label);
696 close_prot_errno_disarm(ctx->dfd_init_pid);
697
698 if (ctx->container) {
699 lxc_container_put(ctx->container);
700 ctx->container = NULL;
701 }
702
703 put_namespaces(ctx);
704 free(ctx);
705 }
706}
707
c538837d
CB
708/*
709 * Place anything in here that needs to be get rid of before we move into the
710 * container's context and fail hard if we can't.
711 */
712static bool attach_context_security_barrier(struct attach_context *ctx)
713{
714 if (ctx) {
25c659d5
CB
715 if (close(ctx->dfd_self_pid))
716 return false;
717 ctx->dfd_self_pid = -EBADF;
718
719 if (close(ctx->dfd_init_pid))
c538837d 720 return false;
25c659d5 721 ctx->dfd_init_pid = -EBADF;
c538837d
CB
722 }
723
724 return true;
725}
726
e4103cf6 727int lxc_attach_remount_sys_proc(void)
7a0b0b56
CS
728{
729 int ret;
730
731 ret = unshare(CLONE_NEWNS);
ffeeed8b
CB
732 if (ret < 0)
733 return log_error_errno(-1, errno, "Failed to unshare mount namespace");
7a0b0b56 734
9e61fb1f
CB
735 if (detect_shared_rootfs() && mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL))
736 SYSERROR("Failed to recursively turn root mount tree into dependent mount. Continuing...");
2c6f3fc9 737
8ce83369 738 /* Assume /proc is always mounted, so remount it. */
7a0b0b56 739 ret = umount2("/proc", MNT_DETACH);
ffeeed8b
CB
740 if (ret < 0)
741 return log_error_errno(-1, errno, "Failed to unmount /proc");
7a0b0b56 742
0d50f288 743 ret = mount("none", "/proc", "proc", 0, NULL);
ffeeed8b
CB
744 if (ret < 0)
745 return log_error_errno(-1, errno, "Failed to remount /proc");
7a0b0b56 746
ffeeed8b
CB
747 /*
748 * Try to umount /sys. If it's not a mount point, we'll get EINVAL, then
8ce83369 749 * we ignore it because it may not have been mounted in the first place.
7a0b0b56
CS
750 */
751 ret = umount2("/sys", MNT_DETACH);
ffeeed8b
CB
752 if (ret < 0 && errno != EINVAL)
753 return log_error_errno(-1, errno, "Failed to unmount /sys");
754
755 /* Remount it. */
0d50f288 756 if (ret == 0 && mount("none", "/sys", "sysfs", 0, NULL))
ffeeed8b 757 return log_error_errno(-1, errno, "Failed to remount /sys");
7a0b0b56
CS
758
759 return 0;
760}
761
677e1d27 762static int drop_capabilities(struct attach_context *ctx)
e0732705 763{
ffeeed8b 764 int last_cap;
e0732705 765
6f4f1937 766 last_cap = lxc_caps_last_cap();
ffeeed8b 767 for (int cap = 0; cap <= last_cap; cap++) {
e0732705
CS
768 if (ctx->capability_mask & (1LL << cap))
769 continue;
770
b81689a1 771 if (prctl(PR_CAPBSET_DROP, prctl_arg(cap), prctl_arg(0),
ffeeed8b
CB
772 prctl_arg(0), prctl_arg(0)))
773 return log_error_errno(-1, errno, "Failed to drop capability %d", cap);
ea918412 774
94ac256f 775 TRACE("Dropped capability %d", cap);
e0732705
CS
776 }
777
778 return 0;
779}
905022f7 780
ab919e5f 781static int lxc_attach_set_environment(struct attach_context *ctx,
7385273f 782 enum lxc_attach_env_policy_t policy,
6f4f1937 783 char **extra_env, char **extra_keep)
b3a39ba6 784{
3d55242a 785 int ret;
7385273f 786 struct lxc_list *iterator;
787
799f96fd 788 if (policy == LXC_ATTACH_CLEAR_ENV) {
3d5e9f48 789 int path_kept = 0;
6f4f1937 790 char **extra_keep_store = NULL;
3d5e9f48
CS
791
792 if (extra_keep) {
793 size_t count, i;
794
3d55242a
CB
795 for (count = 0; extra_keep[count]; count++)
796 ;
3d5e9f48 797
89b7bfe3 798 extra_keep_store = zalloc(count * sizeof(char *));
3d55242a 799 if (!extra_keep_store)
3d5e9f48 800 return -1;
3d55242a 801
3d5e9f48
CS
802 for (i = 0; i < count; i++) {
803 char *v = getenv(extra_keep[i]);
804 if (v) {
805 extra_keep_store[i] = strdup(v);
806 if (!extra_keep_store[i]) {
3d5e9f48
CS
807 while (i > 0)
808 free(extra_keep_store[--i]);
ea918412 809
3d5e9f48
CS
810 free(extra_keep_store);
811 return -1;
812 }
3d55242a 813
e8c43357 814 if (strequal(extra_keep[i], "PATH"))
3d5e9f48
CS
815 path_kept = 1;
816 }
3d5e9f48
CS
817 }
818 }
819
799f96fd 820 if (clearenv()) {
a9cab7e3 821 if (extra_keep_store) {
3d55242a
CB
822 char **p;
823
a9cab7e3
CS
824 for (p = extra_keep_store; *p; p++)
825 free(*p);
3d55242a 826
a9cab7e3
CS
827 free(extra_keep_store);
828 }
3d55242a 829
ffeeed8b 830 return log_error(-1, "Failed to clear environment");
3d5e9f48
CS
831 }
832
833 if (extra_keep_store) {
834 size_t i;
6f4f1937 835
3d5e9f48 836 for (i = 0; extra_keep[i]; i++) {
acd4922e 837 if (extra_keep_store[i]) {
3d55242a
CB
838 ret = setenv(extra_keep[i], extra_keep_store[i], 1);
839 if (ret < 0)
a24c5678 840 SYSWARN("Failed to set environment variable");
acd4922e 841 }
ea918412 842
3d5e9f48
CS
843 free(extra_keep_store[i]);
844 }
ea918412 845
3d5e9f48
CS
846 free(extra_keep_store);
847 }
848
8ce83369
CB
849 /* Always set a default path; shells and execlp tend to be fine
850 * without it, but there is a disturbing number of C programs
851 * out there that just assume that getenv("PATH") is never NULL
852 * and then die a painful segfault death.
853 */
3d55242a
CB
854 if (!path_kept) {
855 ret = setenv("PATH", "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", 1);
856 if (ret < 0)
a24c5678 857 SYSWARN("Failed to set environment variable");
3d55242a 858 }
b3a39ba6
DW
859 }
860
3d55242a 861 ret = putenv("container=lxc");
ffeeed8b 862 if (ret < 0)
818a57fc 863 return log_warn(-1, "Failed to set environment variable");
b3a39ba6 864
7385273f 865 /* Set container environment variables.*/
640952e5 866 if (ctx->container->lxc_conf) {
ab919e5f 867 lxc_list_for_each(iterator, &ctx->container->lxc_conf->environment) {
3d55242a
CB
868 char *env_tmp;
869
870 env_tmp = strdup((char *)iterator->elem);
871 if (!env_tmp)
7385273f 872 return -1;
7385273f 873
3d55242a 874 ret = putenv(env_tmp);
ffeeed8b
CB
875 if (ret < 0)
876 return log_error_errno(-1, errno, "Failed to set environment variable: %s", (char *)iterator->elem);
7385273f 877 }
878 }
879
8ce83369 880 /* Set extra environment variables. */
3d5e9f48
CS
881 if (extra_env) {
882 for (; *extra_env; extra_env++) {
3d55242a 883 char *p;
ea918412 884
8ce83369
CB
885 /* We just assume the user knows what they are doing, so
886 * we don't do any checks.
887 */
3d55242a
CB
888 p = strdup(*extra_env);
889 if (!p)
3d5e9f48 890 return -1;
3d55242a
CB
891
892 ret = putenv(p);
893 if (ret < 0)
a24c5678 894 SYSWARN("Failed to set environment variable");
3d5e9f48
CS
895 }
896 }
897
b3a39ba6
DW
898 return 0;
899}
900
74a3920a 901static char *lxc_attach_getpwshell(uid_t uid)
905022f7 902{
1b9c9f5b 903 __do_free char *line = NULL, *result = NULL;
cd8f5663 904 __do_fclose FILE *pipe_f = NULL;
6f4f1937 905 int fd, ret;
905022f7
CS
906 pid_t pid;
907 int pipes[2];
3fa23ac3
CB
908 bool found = false;
909 size_t line_bufsz = 0;
905022f7 910
8ce83369
CB
911 /* We need to fork off a process that runs the getent program, and we
912 * need to capture its output, so we use a pipe for that purpose.
905022f7 913 */
3fa23ac3 914 ret = pipe2(pipes, O_CLOEXEC);
905022f7
CS
915 if (ret < 0)
916 return NULL;
917
918 pid = fork();
919 if (pid < 0) {
920 close(pipes[0]);
921 close(pipes[1]);
922 return NULL;
923 }
924
3fa23ac3 925 if (!pid) {
905022f7
CS
926 char uid_buf[32];
927 char *arguments[] = {
928 "getent",
929 "passwd",
930 uid_buf,
931 NULL
932 };
933
934 close(pipes[0]);
935
8ce83369 936 /* We want to capture stdout. */
3fa23ac3 937 ret = dup2(pipes[1], STDOUT_FILENO);
905022f7 938 close(pipes[1]);
3fa23ac3 939 if (ret < 0)
ea918412 940 _exit(EXIT_FAILURE);
905022f7 941
8ce83369
CB
942 /* Get rid of stdin/stderr, so we try to associate it with
943 * /dev/null.
905022f7 944 */
3fa23ac3 945 fd = open_devnull();
905022f7 946 if (fd < 0) {
3fa23ac3
CB
947 close(STDIN_FILENO);
948 close(STDERR_FILENO);
905022f7 949 } else {
3fa23ac3 950 (void)dup3(fd, STDIN_FILENO, O_CLOEXEC);
59f0e209 951 (void)dup3(fd, STDERR_FILENO, O_CLOEXEC);
905022f7
CS
952 close(fd);
953 }
954
8ce83369 955 /* Finish argument list. */
f51c7eb4
CB
956 ret = strnprintf(uid_buf, sizeof(uid_buf), "%ld", (long)uid);
957 if (ret <= 0)
ea918412 958 _exit(EXIT_FAILURE);
905022f7 959
8ce83369 960 /* Try to run getent program. */
3fa23ac3 961 (void)execvp("getent", arguments);
ea918412 962 _exit(EXIT_FAILURE);
905022f7 963 }
3fa23ac3
CB
964
965 close(pipes[1]);
966
4110345b 967 pipe_f = fdopen(pipes[0], "re");
cf4026f1
CB
968 if (!pipe_f) {
969 close(pipes[0]);
970 goto reap_child;
971 }
972 /* Transfer ownership of pipes[0] to pipe_f. */
973 move_fd(pipes[0]);
974
3fa23ac3
CB
975 while (getline(&line, &line_bufsz, pipe_f) != -1) {
976 int i;
977 long value;
978 char *token;
979 char *endptr = NULL, *saveptr = NULL;
980
981 /* If we already found something, just continue to read
982 * until the pipe doesn't deliver any more data, but
983 * don't modify the existing data structure.
984 */
985 if (found)
986 continue;
987
18d4ffde 988 if (!line)
989 continue;
990
3fa23ac3
CB
991 /* Trim line on the right hand side. */
992 for (i = strlen(line); i > 0 && (line[i - 1] == '\n' || line[i - 1] == '\r'); --i)
993 line[i - 1] = '\0';
994
995 /* Split into tokens: first: user name. */
996 token = strtok_r(line, ":", &saveptr);
997 if (!token)
998 continue;
999
8de0119d 1000 /* next: placeholder password field */
3fa23ac3
CB
1001 token = strtok_r(NULL, ":", &saveptr);
1002 if (!token)
1003 continue;
1004
1005 /* next: user id */
1006 token = strtok_r(NULL, ":", &saveptr);
1007 value = token ? strtol(token, &endptr, 10) : 0;
1008 if (!token || !endptr || *endptr || value == LONG_MIN ||
ea918412 1009 value == LONG_MAX)
3fa23ac3
CB
1010 continue;
1011
8de0119d 1012 /* placeholder conherence check: user id matches */
3fa23ac3
CB
1013 if ((uid_t)value != uid)
1014 continue;
1015
1016 /* skip fields: gid, gecos, dir, go to next field 'shell' */
1017 for (i = 0; i < 4; i++) {
1018 token = strtok_r(NULL, ":", &saveptr);
1019 if (!token)
1020 continue;
1021 }
ea918412 1022
3fa23ac3
CB
1023 if (!token)
1024 continue;
ea918412 1025
1b9c9f5b 1026 free_disarm(result);
3fa23ac3
CB
1027 result = strdup(token);
1028
1029 /* Sanity check that there are no fields after that. */
1030 token = strtok_r(NULL, ":", &saveptr);
1031 if (token)
1032 continue;
1033
1034 found = true;
1035 }
ea918412 1036
cf4026f1 1037reap_child:
3fa23ac3 1038 ret = wait_for_pid(pid);
1b9c9f5b 1039 if (ret < 0)
3fa23ac3 1040 return NULL;
3fa23ac3 1041
1b9c9f5b 1042 if (!found)
3fa23ac3 1043 return NULL;
3fa23ac3 1044
1b9c9f5b 1045 return move_ptr(result);
905022f7 1046}
cb3e61fa 1047
d4db3d14 1048static bool fetch_seccomp(struct lxc_container *c, lxc_attach_options_t *options)
2c4ea790 1049{
cd8f5663 1050 __do_free char *path = NULL;
d4db3d14
CB
1051 int ret;
1052 bool bret;
2eef2bda 1053
afc691a0 1054 if (!attach_lsm(options)) {
cca66e06 1055 free_disarm(c->lxc_conf->seccomp.seccomp);
2c4ea790 1056 return true;
bd4307f0 1057 }
bd7b4e28 1058
afc691a0 1059 /* Remove current setting. */
d4db3d14 1060 if (!c->set_config_item(c, "lxc.seccomp.profile", "") &&
ea918412 1061 !c->set_config_item(c, "lxc.seccomp", ""))
2c4ea790 1062 return false;
bd7b4e28 1063
8ce83369 1064 /* Fetch the current profile path over the cmd interface. */
0b427da0 1065 path = c->get_running_config_item(c, "lxc.seccomp.profile");
bd7b4e28 1066 if (!path) {
d4db3d14 1067 INFO("Failed to retrieve lxc.seccomp.profile");
ea918412 1068
0b427da0 1069 path = c->get_running_config_item(c, "lxc.seccomp");
cca66e06
CB
1070 if (!path)
1071 return log_info(true, "Failed to retrieve lxc.seccomp");
bd7b4e28
SG
1072 }
1073
8ce83369 1074 /* Copy the value into the new lxc_conf. */
d4db3d14 1075 bret = c->set_config_item(c, "lxc.seccomp.profile", path);
d4db3d14
CB
1076 if (!bret)
1077 return false;
bd7b4e28 1078
8ce83369 1079 /* Attempt to parse the resulting config. */
d4db3d14 1080 ret = lxc_read_seccomp_config(c->lxc_conf);
cca66e06
CB
1081 if (ret < 0)
1082 return log_error(false, "Failed to retrieve seccomp policy");
2c4ea790 1083
cca66e06 1084 return log_info(true, "Retrieved seccomp policy");
2e812c16
CB
1085}
1086
6f4f1937 1087static bool no_new_privs(struct lxc_container *c, lxc_attach_options_t *options)
2e812c16 1088{
cd8f5663 1089 __do_free char *val = NULL;
2e812c16 1090
2e812c16 1091 /* Remove current setting. */
02d3b72b
CB
1092 if (!c->set_config_item(c, "lxc.no_new_privs", ""))
1093 return log_info(false, "Failed to unset lxc.no_new_privs");
2e812c16
CB
1094
1095 /* Retrieve currently active setting. */
1096 val = c->get_running_config_item(c, "lxc.no_new_privs");
02d3b72b
CB
1097 if (!val)
1098 return log_info(false, "Failed to retrieve lxc.no_new_privs");
2e812c16
CB
1099
1100 /* Set currently active setting. */
cd8f5663 1101 return c->set_config_item(c, "lxc.no_new_privs", val);
2c4ea790
SH
1102}
1103
338b230f 1104struct attach_payload {
a998454a 1105 int ipc_socket;
cecf3e83 1106 int terminal_pts_fd;
a998454a 1107 lxc_attach_options_t *options;
ab919e5f 1108 struct attach_context *ctx;
a998454a
CB
1109 lxc_attach_exec_t exec_function;
1110 void *exec_payload;
1111};
1112
338b230f 1113static void put_attach_payload(struct attach_payload *p)
ba2be1a8 1114{
afc691a0
CB
1115 if (p) {
1116 close_prot_errno_disarm(p->ipc_socket);
1117 close_prot_errno_disarm(p->terminal_pts_fd);
dd53c8af 1118 put_attach_context(p->ctx);
ab919e5f 1119 p->ctx = NULL;
b21da190 1120 }
ba2be1a8
CB
1121}
1122
338b230f 1123__noreturn static void do_attach(struct attach_payload *ap)
a998454a 1124{
afc691a0
CB
1125 lxc_attach_exec_t attach_function = move_ptr(ap->exec_function);
1126 void *attach_function_args = move_ptr(ap->exec_payload);
8723f88e 1127 int fd_lsm, ret;
338b230f
CB
1128 lxc_attach_options_t* options = ap->options;
1129 struct attach_context *ctx = ap->ctx;
ab919e5f 1130 struct lxc_conf *conf = ctx->container->lxc_conf;
a998454a
CB
1131
1132 /* A description of the purpose of this functionality is provided in the
1133 * lxc-attach(1) manual page. We have to remount here and not in the
1134 * parent process, otherwise /proc may not properly reflect the new pid
1135 * namespace.
1136 */
1137 if (!(options->namespaces & CLONE_NEWNS) &&
1138 (options->attach_flags & LXC_ATTACH_REMOUNT_PROC_SYS)) {
1139 ret = lxc_attach_remount_sys_proc();
b75c344c
CB
1140 if (ret < 0)
1141 goto on_error;
ea918412 1142
b75c344c 1143 TRACE("Remounted \"/proc\" and \"/sys\"");
a998454a
CB
1144 }
1145
5b514ce3 1146 /* Now perform additional attachments. */
a998454a 1147 if (options->attach_flags & LXC_ATTACH_SET_PERSONALITY) {
b75c344c
CB
1148 long new_personality;
1149
3a881819 1150 if (options->personality == LXC_ATTACH_DETECT_PERSONALITY)
ab919e5f 1151 new_personality = ctx->personality;
b75c344c
CB
1152 else
1153 new_personality = options->personality;
ea918412 1154
ee142207 1155 if (new_personality != LXC_ARCH_UNCHANGED) {
64a04c84 1156 ret = lxc_personality(new_personality);
ee142207
CB
1157 if (ret < 0)
1158 goto on_error;
ea918412 1159
ee142207
CB
1160 TRACE("Set new personality");
1161 }
a998454a 1162 }
a998454a
CB
1163
1164 if (options->attach_flags & LXC_ATTACH_DROP_CAPABILITIES) {
677e1d27 1165 ret = drop_capabilities(ctx);
b75c344c
CB
1166 if (ret < 0)
1167 goto on_error;
ea918412 1168
b75c344c 1169 TRACE("Dropped capabilities");
a998454a
CB
1170 }
1171
1172 /* Always set the environment (specify (LXC_ATTACH_KEEP_ENV, NULL, NULL)
1173 * if you want this to be a no-op).
1174 */
ab919e5f 1175 ret = lxc_attach_set_environment(ctx,
7385273f 1176 options->env_policy,
a998454a
CB
1177 options->extra_env_vars,
1178 options->extra_keep_env);
b75c344c
CB
1179 if (ret < 0)
1180 goto on_error;
ea918412 1181
b75c344c 1182 TRACE("Set up environment");
a998454a 1183
afc691a0
CB
1184 /*
1185 * This remark only affects fully unprivileged containers:
57de839f
CB
1186 * Receive fd for LSM security module before we set{g,u}id(). The reason
1187 * is that on set{g,u}id() the kernel will a) make us undumpable and b)
1188 * we will change our effective uid. This means our effective uid will
1189 * be different from the effective uid of the process that created us
1190 * which means that this processs no longer has capabilities in our
1191 * namespace including CAP_SYS_PTRACE. This means we will not be able to
1192 * read and /proc/<pid> files for the process anymore when /proc is
1193 * mounted with hidepid={1,2}. So let's get the lsm label fd before the
1194 * set{g,u}id().
1195 */
afc691a0 1196 if (attach_lsm(options) && ctx->lsm_label) {
8723f88e 1197 if (!sync_wait_fd(ap->ipc_socket, &fd_lsm)) {
6e36c297 1198 SYSERROR("Failed to receive lsm label fd");
b75c344c 1199 goto on_error;
9044b79e 1200 }
1201
8723f88e 1202 TRACE("Received LSM label file descriptor %d from parent", fd_lsm);
57de839f
CB
1203 }
1204
08ea9270 1205 if (options->stdin_fd > 0 && isatty(options->stdin_fd)) {
cd0a2b2f 1206 ret = lxc_make_controlling_terminal(options->stdin_fd);
08ea9270
CB
1207 if (ret < 0)
1208 goto on_error;
1209 }
1210
9475d2b9
CB
1211 if ((options->attach_flags & LXC_ATTACH_SETGROUPS) &&
1212 options->groups.size > 0) {
8caac583
RJ
1213 if (!lxc_setgroups(options->groups.list, options->groups.size))
1214 goto on_error;
1215 } else {
1216 if (!lxc_drop_groups() && errno != EPERM)
1217 goto on_error;
1218 }
b58214ac 1219
4475fabb 1220 if (options->namespaces & CLONE_NEWUSER)
3ac4480a 1221 if (!lxc_switch_uid_gid(ctx->setup_ns_uid, ctx->setup_ns_gid))
b75c344c 1222 goto on_error;
936efc72 1223
afc691a0 1224 if (attach_lsm(options) && ctx->lsm_label) {
d3ba7c98 1225 bool on_exec;
a998454a
CB
1226
1227 /* Change into our new LSM profile. */
d3ba7c98 1228 on_exec = options->attach_flags & LXC_ATTACH_LSM_EXEC ? true : false;
8723f88e
CB
1229 ret = ctx->lsm_ops->process_label_set_at(ctx->lsm_ops, fd_lsm, ctx->lsm_label, on_exec);
1230 close_prot_errno_disarm(fd_lsm);
b75c344c
CB
1231 if (ret < 0)
1232 goto on_error;
ea918412 1233
ab919e5f 1234 TRACE("Set %s LSM label to \"%s\"", ctx->lsm_ops->name, ctx->lsm_label);
a998454a
CB
1235 }
1236
640952e5 1237 if (conf->no_new_privs || (options->attach_flags & LXC_ATTACH_NO_NEW_PRIVS)) {
6ce8e678
AL
1238 ret = prctl(PR_SET_NO_NEW_PRIVS, prctl_arg(1), prctl_arg(0),
1239 prctl_arg(0), prctl_arg(0));
1240 if (ret < 0)
1241 goto on_error;
1242
1243 TRACE("Set PR_SET_NO_NEW_PRIVS");
1244 }
1245
a998454a
CB
1246 /* The following is done after the communication socket is shut down.
1247 * That way, all errors that might (though unlikely) occur up until this
1248 * point will have their messages printed to the original stderr (if
1249 * logging is so configured) and not the fd the user supplied, if any.
1250 */
1251
1252 /* Fd handling for stdin, stdout and stderr; ignore errors here, user
1253 * may want to make sure the fds are closed, for example.
1254 */
08ea9270 1255 if (options->stdin_fd >= 0 && options->stdin_fd != STDIN_FILENO)
40301d48 1256 if (dup2(options->stdin_fd, STDIN_FILENO) < 0)
a7563434 1257 SYSDEBUG("Failed to replace stdin with %d", options->stdin_fd);
08ea9270
CB
1258
1259 if (options->stdout_fd >= 0 && options->stdout_fd != STDOUT_FILENO)
40301d48 1260 if (dup2(options->stdout_fd, STDOUT_FILENO) < 0)
93b9960a 1261 SYSDEBUG("Failed to replace stdout with %d", options->stdout_fd);
08ea9270
CB
1262
1263 if (options->stderr_fd >= 0 && options->stderr_fd != STDERR_FILENO)
40301d48 1264 if (dup2(options->stderr_fd, STDERR_FILENO) < 0)
93b9960a 1265 SYSDEBUG("Failed to replace stderr with %d", options->stderr_fd);
a998454a
CB
1266
1267 /* close the old fds */
08ea9270 1268 if (options->stdin_fd > STDERR_FILENO)
a998454a 1269 close(options->stdin_fd);
08ea9270
CB
1270
1271 if (options->stdout_fd > STDERR_FILENO)
a998454a 1272 close(options->stdout_fd);
08ea9270
CB
1273
1274 if (options->stderr_fd > STDERR_FILENO)
a998454a
CB
1275 close(options->stderr_fd);
1276
427a8067
CB
1277 /*
1278 * Try to remove FD_CLOEXEC flag from stdin/stdout/stderr, but also
a998454a
CB
1279 * here, ignore errors.
1280 */
427a8067 1281 for (int fd = STDIN_FILENO; fd <= STDERR_FILENO; fd++) {
3f62938a 1282 ret = fd_cloexec(fd, false);
b75c344c
CB
1283 if (ret < 0) {
1284 SYSERROR("Failed to clear FD_CLOEXEC from file descriptor %d", fd);
1285 goto on_error;
1286 }
a998454a
CB
1287 }
1288
9e84479f 1289 if (options->attach_flags & LXC_ATTACH_TERMINAL) {
338b230f 1290 ret = lxc_terminal_prepare_login(ap->terminal_pts_fd);
ba2be1a8 1291 if (ret < 0) {
338b230f 1292 SYSERROR("Failed to prepare terminal file descriptor %d", ap->terminal_pts_fd);
ba2be1a8
CB
1293 goto on_error;
1294 }
ea918412 1295
338b230f 1296 TRACE("Prepared terminal file descriptor %d", ap->terminal_pts_fd);
ba2be1a8
CB
1297 }
1298
4475fabb 1299 /* Avoid unnecessary syscalls. */
3ac4480a
CB
1300 if (ctx->setup_ns_uid == ctx->target_ns_uid)
1301 ctx->target_ns_uid = LXC_INVALID_UID;
1302
1303 if (ctx->setup_ns_gid == ctx->target_ns_gid)
1304 ctx->target_ns_gid = LXC_INVALID_GID;
4475fabb 1305
3ac4480a
CB
1306 /*
1307 * Make sure that the processes STDIO is correctly owned by the user
1308 * that we are switching to.
1309 */
1310 ret = fix_stdio_permissions(ctx->target_ns_uid);
1311 if (ret)
1312 INFO("Failed to adjust stdio permissions");
4475fabb 1313
e18aba7d
CB
1314 if (conf->seccomp.seccomp) {
1315 ret = lxc_seccomp_load(conf);
1316 if (ret < 0)
1317 goto on_error;
1318
1319 TRACE("Loaded seccomp profile");
1320
1321 ret = lxc_seccomp_send_notifier_fd(&conf->seccomp, ap->ipc_socket);
1322 if (ret < 0)
1323 goto on_error;
c5bac506 1324 lxc_seccomp_close_notifier_fd(&conf->seccomp);
e18aba7d
CB
1325 }
1326
3ac4480a 1327 if (!lxc_switch_uid_gid(ctx->target_ns_uid, ctx->target_ns_gid))
936efc72
CB
1328 goto on_error;
1329
cd5f35ec
CB
1330 put_attach_payload(ap);
1331
a998454a 1332 /* We're done, so we can now do whatever the user intended us to do. */
afc691a0 1333 _exit(attach_function(attach_function_args));
b75c344c
CB
1334
1335on_error:
dab02267 1336 ERROR("Failed to attach to container");
cd5f35ec 1337 put_attach_payload(ap);
c7ac2e1c 1338 _exit(EXIT_FAILURE);
a998454a
CB
1339}
1340
f797f05e 1341static int lxc_attach_terminal(const char *name, const char *lxcpath, struct lxc_conf *conf,
9e84479f 1342 struct lxc_terminal *terminal)
ba2be1a8
CB
1343{
1344 int ret;
1345
9e84479f 1346 lxc_terminal_init(terminal);
ba2be1a8 1347
8ea93a0f 1348 ret = lxc_terminal_create(name, lxcpath, conf, terminal);
c2af3a15
CB
1349 if (ret < 0)
1350 return log_error(-1, "Failed to create terminal");
ba2be1a8 1351
ba2be1a8 1352 return 0;
ba2be1a8
CB
1353}
1354
9e84479f 1355static int lxc_attach_terminal_mainloop_init(struct lxc_terminal *terminal,
3298b37d 1356 struct lxc_async_descr *descr)
ba2be1a8
CB
1357{
1358 int ret;
1359
1360 ret = lxc_mainloop_open(descr);
c2af3a15
CB
1361 if (ret < 0)
1362 return log_error(-1, "Failed to create mainloop");
ba2be1a8 1363
9e84479f 1364 ret = lxc_terminal_mainloop_add(descr, terminal);
ba2be1a8 1365 if (ret < 0) {
ba2be1a8 1366 lxc_mainloop_close(descr);
c2af3a15 1367 return log_error(-1, "Failed to add handlers to mainloop");
ba2be1a8
CB
1368 }
1369
1370 return 0;
1371}
1372
36a94ce8 1373static inline void lxc_attach_terminal_close_ptx(struct lxc_terminal *terminal)
ba2be1a8 1374{
36a94ce8 1375 close_prot_errno_disarm(terminal->ptx);
ba2be1a8
CB
1376}
1377
cecf3e83 1378static inline void lxc_attach_terminal_close_pts(struct lxc_terminal *terminal)
ba2be1a8 1379{
41808e20 1380 close_prot_errno_disarm(terminal->pty);
ba2be1a8
CB
1381}
1382
9e84479f 1383static inline void lxc_attach_terminal_close_peer(struct lxc_terminal *terminal)
ba2be1a8 1384{
19a3e906 1385 close_prot_errno_disarm(terminal->peer);
ba2be1a8
CB
1386}
1387
9e84479f 1388static inline void lxc_attach_terminal_close_log(struct lxc_terminal *terminal)
ba2be1a8 1389{
19a3e906 1390 close_prot_errno_disarm(terminal->log_fd);
ba2be1a8
CB
1391}
1392
908fbc1a
CB
1393int lxc_attach(struct lxc_container *container, lxc_attach_exec_t exec_function,
1394 void *exec_payload, lxc_attach_options_t *options,
1395 pid_t *attached_process)
9c4693b8 1396{
6f9fe5d0 1397 int ret_parent = -1;
3298b37d 1398 struct lxc_async_descr descr = {};
a9f0cecf 1399 int ret;
26abd7ea 1400 char *name, *lxcpath;
9c4693b8 1401 int ipc_sockets[2];
500ed813 1402 pid_t attached_pid, pid, to_cleanup_pid;
ab919e5f 1403 struct attach_context *ctx;
9e84479f 1404 struct lxc_terminal terminal;
1cce35e6 1405 struct lxc_conf *conf;
9c4693b8 1406
908fbc1a 1407 if (!container)
540a2f70 1408 return ret_set_errno(-1, EINVAL);
908fbc1a
CB
1409
1410 if (!lxc_container_get(container))
540a2f70 1411 return ret_set_errno(-1, EINVAL);
908fbc1a
CB
1412
1413 name = container->name;
1414 lxcpath = container->config_path;
1415
afc691a0 1416 if (!options) {
9c4693b8 1417 options = &attach_static_default_options;
afc691a0
CB
1418 options->lsm_label = NULL;
1419 }
9c4693b8 1420
9745eb8a 1421 ctx = alloc_attach_context();
ab919e5f 1422 if (!ctx) {
9745eb8a
CB
1423 lxc_container_put(container);
1424 return log_error_errno(-ENOMEM, ENOMEM, "Failed to allocate attach context");
1425 }
1426
afc691a0 1427 ret = get_attach_context(ctx, container, options);
9745eb8a 1428 if (ret) {
7e995801 1429 put_attach_context(ctx);
74ce42b5 1430 return log_error(-1, "Failed to get attach context");
9c4693b8
CS
1431 }
1432
ab919e5f 1433 conf = ctx->container->lxc_conf;
ba773996 1434
ab919e5f 1435 if (!fetch_seccomp(ctx->container, options))
ae026f55 1436 WARN("Failed to get seccomp policy");
2c4ea790 1437
ab919e5f 1438 if (!no_new_privs(ctx->container, options))
ae026f55 1439 WARN("Could not determine whether PR_SET_NO_NEW_PRIVS is set");
2e812c16 1440
9b31ab58 1441 ret = prepare_namespaces(ctx, options);
b7873c95 1442 if (ret) {
52ed870e 1443 put_attach_context(ctx);
74ce42b5 1444 return log_error(-1, "Failed to get namespace file descriptors");
9c4693b8
CS
1445 }
1446
9e84479f 1447 if (options->attach_flags & LXC_ATTACH_TERMINAL) {
f797f05e 1448 ret = lxc_attach_terminal(name, lxcpath, conf, &terminal);
ba2be1a8 1449 if (ret < 0) {
dd53c8af 1450 put_attach_context(ctx);
74ce42b5 1451 return log_error(-1, "Failed to setup new terminal");
ba2be1a8
CB
1452 }
1453
9e84479f 1454 terminal.log_fd = options->log_fd;
c948657b 1455 } else {
9e84479f 1456 lxc_terminal_init(&terminal);
ba2be1a8
CB
1457 }
1458
8ce83369
CB
1459 /* Create a socket pair for IPC communication; set SOCK_CLOEXEC in order
1460 * to make sure we don't irritate other threads that want to fork+exec
1461 * away
9c4693b8
CS
1462 *
1463 * IMPORTANT: if the initial process is multithreaded and another call
1464 * just fork()s away without exec'ing directly after, the socket fd will
1465 * exist in the forked process from the other thread and any close() in
8ce83369 1466 * our own child process will not really cause the socket to close
4f6c7312 1467 * properly, potentially causing the parent to get stuck.
9c4693b8
CS
1468 *
1469 * For this reason, while IPC is still active, we have to use shutdown()
8ce83369
CB
1470 * if the child exits prematurely in order to signal that the socket is
1471 * closed and cannot assume that the child exiting will automatically do
1472 * that.
9c4693b8
CS
1473 *
1474 * IPC mechanism: (X is receiver)
bd6a2355 1475 * initial process transient process attached process
9c4693b8
CS
1476 * X <--- send pid of
1477 * attached proc,
1478 * then exit
1479 * send 0 ------------------------------------> X
1480 * [do initialization]
1481 * X <------------------------------------ send 1
1482 * [add to cgroup, ...]
1483 * send 2 ------------------------------------> X
81f466d0
CB
1484 * [set LXC_ATTACH_NO_NEW_PRIVS]
1485 * X <------------------------------------ send 3
1486 * [open LSM label fd]
1487 * send 4 ------------------------------------> X
1488 * [set LSM label]
9c4693b8
CS
1489 * close socket close socket
1490 * run program
1491 */
1492 ret = socketpair(PF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
1493 if (ret < 0) {
dd53c8af 1494 put_attach_context(ctx);
74ce42b5 1495 return log_error_errno(-1, errno, "Could not set up required IPC mechanism for attaching");
9c4693b8
CS
1496 }
1497
bd6a2355 1498 /* Create transient process, two reasons:
e3f0e436 1499 * 1. We can't setns() in the child itself, since we want to make
8ce83369 1500 * sure we are properly attached to the pidns.
e3f0e436 1501 * 2. Also, the initial thread has to put the attached process
8ce83369
CB
1502 * into the cgroup, which we can only do if we didn't already
1503 * setns() (otherwise, user namespaces will hate us).
9c4693b8
CS
1504 */
1505 pid = fork();
9c4693b8 1506 if (pid < 0) {
dd53c8af 1507 put_attach_context(ctx);
74ce42b5 1508 return log_error_errno(-1, errno, "Failed to create first subprocess");
9c4693b8
CS
1509 }
1510
4f25e72f 1511 if (pid == 0) {
26abd7ea 1512 char *cwd, *new_cwd;
a588a482 1513
ba2be1a8 1514 /* close unneeded file descriptors */
4f25e72f 1515 close_prot_errno_disarm(ipc_sockets[0]);
2202afc9 1516
4f25e72f
CB
1517 if (options->attach_flags & LXC_ATTACH_TERMINAL) {
1518 lxc_attach_terminal_close_ptx(&terminal);
1519 lxc_attach_terminal_close_peer(&terminal);
1520 lxc_attach_terminal_close_log(&terminal);
f4364484
SG
1521 }
1522
4f25e72f 1523 /* Wait for the parent to have setup cgroups. */
6e48e7c5 1524 if (!sync_wait(ipc_sockets[1], ATTACH_SYNC_CGROUP)) {
4f25e72f 1525 shutdown(ipc_sockets[1], SHUT_RDWR);
dd53c8af 1526 put_attach_context(ctx);
4f25e72f 1527 _exit(EXIT_FAILURE);
bb2ada6f
CB
1528 }
1529
c538837d
CB
1530 if (!attach_context_security_barrier(ctx)) {
1531 shutdown(ipc_sockets[1], SHUT_RDWR);
1532 put_attach_context(ctx);
1533 _exit(EXIT_FAILURE);
1534 }
1535
a588a482
CB
1536 cwd = getcwd(NULL, 0);
1537
c538837d
CB
1538 /*
1539 * Attach now, create another subprocess later, since pid
1540 * namespaces only really affect the children of the current
1541 * process.
1542 *
1543 * Note that this is a crucial barrier. We're no moving into
1544 * the container's context so we need to make sure to not leak
1545 * anything sensitive. That especially means things such as
1546 * open file descriptors!
4f25e72f 1547 */
9b31ab58 1548 ret = attach_namespaces(ctx, options);
4f25e72f
CB
1549 if (ret < 0) {
1550 ERROR("Failed to enter namespaces");
1551 shutdown(ipc_sockets[1], SHUT_RDWR);
dd53c8af 1552 put_attach_context(ctx);
4f25e72f 1553 _exit(EXIT_FAILURE);
ba2be1a8
CB
1554 }
1555
4f25e72f
CB
1556 /* Attach succeeded, try to cwd. */
1557 if (options->initial_cwd)
1558 new_cwd = options->initial_cwd;
1559 else
1560 new_cwd = cwd;
1561 if (new_cwd) {
1562 ret = chdir(new_cwd);
1563 if (ret < 0)
1564 WARN("Could not change directory to \"%s\"", new_cwd);
ba2be1a8 1565 }
a588a482 1566 free_disarm(cwd);
c6d09e15 1567
4f25e72f 1568 /* Create attached process. */
4f25e72f
CB
1569 pid = lxc_raw_clone(CLONE_PARENT, NULL);
1570 if (pid < 0) {
1571 SYSERROR("Failed to clone attached process");
1572 shutdown(ipc_sockets[1], SHUT_RDWR);
dd53c8af 1573 put_attach_context(ctx);
4f25e72f
CB
1574 _exit(EXIT_FAILURE);
1575 }
f4364484 1576
4f25e72f 1577 if (pid == 0) {
338b230f 1578 struct attach_payload ap = {
a64902ab
CB
1579 .ipc_socket = ipc_sockets[1],
1580 .options = options,
1581 .ctx = ctx,
1582 .terminal_pts_fd = terminal.pty,
1583 .exec_function = exec_function,
1584 .exec_payload = exec_payload,
1585 };
1586
4f25e72f
CB
1587 if (options->attach_flags & LXC_ATTACH_TERMINAL) {
1588 ret = lxc_terminal_signal_sigmask_safe_blocked(&terminal);
1589 if (ret < 0) {
1590 SYSERROR("Failed to reset signal mask");
1591 _exit(EXIT_FAILURE);
1592 }
1593 }
ea918412 1594
a64902ab 1595 /* Does not return. */
338b230f 1596 do_attach(&ap);
62183f1a 1597 }
bd6a2355 1598 TRACE("Attached process %d started initializing", pid);
2eef2bda 1599
4f25e72f
CB
1600 if (options->attach_flags & LXC_ATTACH_TERMINAL)
1601 lxc_attach_terminal_close_pts(&terminal);
ea918412 1602
4f25e72f 1603 /* Tell grandparent the pid of the pid of the newly created child. */
8723f88e 1604 if (!sync_wake_pid(ipc_sockets[1], pid)) {
4f25e72f
CB
1605 /* If this really happens here, this is very unfortunate, since
1606 * the parent will not know the pid of the attached process and
1607 * will not be able to wait for it (and we won't either due to
1608 * CLONE_PARENT) so the parent won't be able to reap it and the
1609 * attached process will remain a zombie.
1610 */
1611 shutdown(ipc_sockets[1], SHUT_RDWR);
dd53c8af 1612 put_attach_context(ctx);
4f25e72f
CB
1613 _exit(EXIT_FAILURE);
1614 }
9c4693b8 1615
4f25e72f 1616 /* The rest is in the hands of the initial and the attached process. */
dd53c8af 1617 put_attach_context(ctx);
4f25e72f
CB
1618 _exit(EXIT_SUCCESS);
1619 }
bd6a2355 1620 TRACE("Transient process %d started initializing", pid);
6f4f1937 1621
4f25e72f 1622 to_cleanup_pid = pid;
ea918412 1623
4f25e72f 1624 /* close unneeded file descriptors */
cb2420df 1625 close_prot_errno_disarm(ipc_sockets[1]);
9b31ab58 1626 put_namespaces(ctx);
4f25e72f
CB
1627 if (options->attach_flags & LXC_ATTACH_TERMINAL)
1628 lxc_attach_terminal_close_pts(&terminal);
81f466d0 1629
4f25e72f
CB
1630 /* Attach to cgroup, if requested. */
1631 if (options->attach_flags & LXC_ATTACH_MOVE_TO_CGROUP) {
1632 /*
1633 * If this is the unified hierarchy cgroup_attach() is
1634 * enough.
1635 */
1636 ret = cgroup_attach(conf, name, lxcpath, pid);
9a57778b 1637 if (ret) {
4f25e72f 1638 call_cleaner(cgroup_exit) struct cgroup_ops *cgroup_ops = NULL;
f740bc63 1639 if (!ERRNO_IS_NOT_SUPPORTED(ret)) {
9a57778b
CB
1640 SYSERROR("Failed to attach cgroup");
1641 goto on_error;
1642 }
1643
4f25e72f
CB
1644 cgroup_ops = cgroup_init(conf);
1645 if (!cgroup_ops)
1646 goto on_error;
9044b79e 1647
4f25e72f
CB
1648 if (!cgroup_ops->attach(cgroup_ops, conf, name, lxcpath, pid))
1649 goto on_error;
81f466d0 1650 }
9a57778b 1651
bd6a2355 1652 TRACE("Moved transient process %d into container cgroup", pid);
4f25e72f 1653 }
81f466d0 1654
6ee13f5b
CB
1655 /*
1656 * Close sensitive file descriptors we don't need anymore. Even if
1657 * we're the parent.
1658 */
1659 if (!attach_context_security_barrier(ctx))
1660 goto on_error;
1661
4f25e72f
CB
1662 /* Setup /proc limits */
1663 if (!lxc_list_empty(&conf->procs)) {
1664 ret = setup_proc_filesystem(&conf->procs, pid);
1665 if (ret < 0)
1666 goto on_error;
4f3b6a85
CB
1667
1668 TRACE("Setup /proc/%d settings", pid);
4f25e72f 1669 }
cdb2a47f 1670
4f25e72f
CB
1671 /* Setup resource limits */
1672 if (!lxc_list_empty(&conf->limits)) {
1673 ret = setup_resource_limits(&conf->limits, pid);
1674 if (ret < 0)
1675 goto on_error;
4f3b6a85
CB
1676
1677 TRACE("Setup resource limits");
4f25e72f 1678 }
cdb2a47f 1679
4f25e72f
CB
1680 if (options->attach_flags & LXC_ATTACH_TERMINAL) {
1681 ret = lxc_attach_terminal_mainloop_init(&terminal, &descr);
1682 if (ret < 0)
1683 goto on_error;
9c4693b8 1684
4f25e72f
CB
1685 TRACE("Initialized terminal mainloop");
1686 }
9c4693b8 1687
4f25e72f 1688 /* Let the child process know to go ahead. */
6e48e7c5 1689 if (!sync_wake(ipc_sockets[0], ATTACH_SYNC_CGROUP))
4f25e72f 1690 goto close_mainloop;
ba2be1a8 1691
bd6a2355 1692 TRACE("Told transient process to start initializing");
ea918412 1693
bd6a2355 1694 /* Get pid of attached process from transient process. */
8723f88e 1695 if (!sync_wait_pid(ipc_sockets[0], &attached_pid))
4f25e72f 1696 goto close_mainloop;
ba2be1a8 1697
4f25e72f 1698 TRACE("Received pid %d of attached process in parent pid namespace", attached_pid);
ba2be1a8 1699
4f25e72f 1700 /* Ignore SIGKILL (CTRL-C) and SIGQUIT (CTRL-\) - issue #313. */
5d2b46fb 1701 if (options->stdin_fd == STDIN_FILENO) {
4f25e72f
CB
1702 signal(SIGINT, SIG_IGN);
1703 signal(SIGQUIT, SIG_IGN);
1704 }
ba2be1a8 1705
bd6a2355 1706 /* Reap transient process. */
4f25e72f
CB
1707 ret = wait_for_pid(pid);
1708 if (ret < 0)
1709 goto close_mainloop;
ba2be1a8 1710
bd6a2355 1711 TRACE("Transient process %d exited", pid);
ea918412 1712
4f25e72f
CB
1713 /* We will always have to reap the attached process now. */
1714 to_cleanup_pid = attached_pid;
9c4693b8 1715
4f25e72f 1716 /* Open LSM fd and send it to child. */
afc691a0 1717 if (attach_lsm(options) && ctx->lsm_label) {
8723f88e 1718 __do_close int fd_lsm = -EBADF;
4f25e72f 1719 bool on_exec;
ea918412 1720
4f25e72f 1721 on_exec = options->attach_flags & LXC_ATTACH_LSM_EXEC ? true : false;
8723f88e
CB
1722 fd_lsm = ctx->lsm_ops->process_label_fd_get(ctx->lsm_ops, attached_pid, on_exec);
1723 if (fd_lsm < 0)
4f25e72f 1724 goto close_mainloop;
9c4693b8 1725
8723f88e 1726 TRACE("Opened LSM label file descriptor %d", fd_lsm);
ea918412 1727
4f25e72f 1728 /* Send child fd of the LSM security module to write to. */
8723f88e 1729 if (!sync_wake_fd(ipc_sockets[0], fd_lsm)) {
6e36c297 1730 SYSERROR("Failed to send lsm label fd");
4f25e72f
CB
1731 goto close_mainloop;
1732 }
1733
8723f88e 1734 TRACE("Sent LSM label file descriptor %d to child", fd_lsm);
9c4693b8 1735 }
ea918412 1736
4f25e72f
CB
1737 if (conf->seccomp.seccomp) {
1738 ret = lxc_seccomp_recv_notifier_fd(&conf->seccomp, ipc_sockets[0]);
1739 if (ret < 0)
1740 goto close_mainloop;
9c4693b8 1741
4f25e72f 1742 ret = lxc_seccomp_add_notifier(name, lxcpath, &conf->seccomp);
d6d979bc 1743 if (ret < 0)
4f25e72f 1744 goto close_mainloop;
d6d979bc 1745 }
9c4693b8 1746
4f25e72f
CB
1747 /* We're done, the child process should now execute whatever it
1748 * is that the user requested. The parent can now track it with
1749 * waitpid() or similar.
1750 */
9c4693b8 1751
4f25e72f 1752 *attached_process = attached_pid;
a998454a 1753
4f25e72f
CB
1754 /* Now shut down communication with child, we're done. */
1755 shutdown(ipc_sockets[0], SHUT_RDWR);
cb2420df 1756 close_prot_errno_disarm(ipc_sockets[0]);
f157b056 1757
4f25e72f
CB
1758 ret_parent = 0;
1759 to_cleanup_pid = -1;
ea918412 1760
4f25e72f
CB
1761 if (options->attach_flags & LXC_ATTACH_TERMINAL) {
1762 ret = lxc_mainloop(&descr, -1);
1763 if (ret < 0) {
1764 ret_parent = -1;
1765 to_cleanup_pid = attached_pid;
1766 }
a998454a 1767 }
ea918412 1768
4f25e72f 1769close_mainloop:
9e84479f 1770 if (options->attach_flags & LXC_ATTACH_TERMINAL)
4f25e72f 1771 lxc_mainloop_close(&descr);
9c4693b8 1772
4f25e72f
CB
1773on_error:
1774 if (ipc_sockets[0] >= 0) {
1775 shutdown(ipc_sockets[0], SHUT_RDWR);
cb2420df 1776 close_prot_errno_disarm(ipc_sockets[0]);
9c4693b8 1777 }
ea918412 1778
4f25e72f
CB
1779 if (to_cleanup_pid > 0)
1780 (void)wait_for_pid(to_cleanup_pid);
1781
1782 if (options->attach_flags & LXC_ATTACH_TERMINAL) {
1783 lxc_terminal_delete(&terminal);
1784 lxc_terminal_conf_free(&terminal);
1785 }
9c4693b8 1786
dd53c8af 1787 put_attach_context(ctx);
4f25e72f 1788 return ret_parent;
9c4693b8
CS
1789}
1790
06346bb0 1791int lxc_attach_run_command(void *payload)
9c4693b8 1792{
06346bb0
CB
1793 int ret = -1;
1794 lxc_attach_command_t *cmd = payload;
9c4693b8 1795
06346bb0
CB
1796 ret = execvp(cmd->program, cmd->argv);
1797 if (ret < 0) {
1798 switch (errno) {
1799 case ENOEXEC:
1800 ret = 126;
cf0fd972 1801 break;
06346bb0
CB
1802 case ENOENT:
1803 ret = 127;
cf0fd972 1804 break;
06346bb0
CB
1805 }
1806 }
ea918412 1807
c2af3a15 1808 return log_error_errno(ret, errno, "Failed to exec \"%s\"", cmd->program);
9c4693b8
CS
1809}
1810
1811int lxc_attach_run_shell(void* payload)
1812{
cd8f5663 1813 __do_free char *buf = NULL;
9c4693b8 1814 uid_t uid;
cb7aa5e8
DJ
1815 struct passwd pwent;
1816 struct passwd *pwentp = NULL;
9c4693b8 1817 char *user_shell;
cb7aa5e8
DJ
1818 size_t bufsize;
1819 int ret;
9c4693b8 1820
8ce83369 1821 /* Ignore payload parameter. */
9c4693b8
CS
1822 (void)payload;
1823
1824 uid = getuid();
cb7aa5e8
DJ
1825
1826 bufsize = sysconf(_SC_GETPW_R_SIZE_MAX);
1827 if (bufsize == -1)
1828 bufsize = 1024;
1829
1830 buf = malloc(bufsize);
1831 if (buf) {
1832 ret = getpwuid_r(uid, &pwent, buf, bufsize, &pwentp);
1833 if (!pwentp) {
1834 if (ret == 0)
ea918412 1835 WARN("Could not find matched password record");
cb7aa5e8
DJ
1836
1837 WARN("Failed to get password record - %u", uid);
1838 }
1839 }
9c4693b8 1840
8ce83369
CB
1841 /* This probably happens because of incompatible nss implementations in
1842 * host and container (remember, this code is still using the host's
1843 * glibc but our mount namespace is in the container) we may try to get
1844 * the information by spawning a [getent passwd uid] process and parsing
1845 * the result.
9c4693b8 1846 */
cb7aa5e8 1847 if (!pwentp)
9c4693b8
CS
1848 user_shell = lxc_attach_getpwshell(uid);
1849 else
cb7aa5e8 1850 user_shell = pwent.pw_shell;
ea918412 1851
9c4693b8 1852 if (user_shell)
acf47e1b 1853 execlp(user_shell, user_shell, (char *)NULL);
9c4693b8 1854
8ce83369
CB
1855 /* Executed if either no passwd entry or execvp fails, we will fall back
1856 * on /bin/sh as a default shell.
9c4693b8 1857 */
acf47e1b 1858 execlp("/bin/sh", "/bin/sh", (char *)NULL);
ea918412 1859
edeb1836 1860 SYSERROR("Failed to execute shell");
cb7aa5e8 1861 if (!pwentp)
edeb1836 1862 free(user_shell);
ea918412 1863
9c4693b8
CS
1864 return -1;
1865}