]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/attach.c
conf: remove unused variables
[mirror_lxc.git] / src / lxc / attach.c
CommitLineData
cc73685d 1/* SPDX-License-Identifier: LGPL-2.1+ */
e0732705 2
d38dd64a
CB
3#ifndef _GNU_SOURCE
4#define _GNU_SOURCE 1
5#endif
e0732705
CS
6#include <errno.h>
7#include <fcntl.h>
c476bdce 8#include <grp.h>
604ca1c0 9#include <linux/unistd.h>
6f4f1937 10#include <pwd.h>
0bece477 11#include <pthread.h>
6f4f1937
CB
12#include <signal.h>
13#include <stdio.h>
14#include <stdlib.h>
15#include <string.h>
6f4f1937 16#include <sys/mount.h>
e0732705
CS
17#include <sys/param.h>
18#include <sys/prctl.h>
5ec27989 19#include <sys/socket.h>
1ba0013f 20#include <sys/syscall.h>
905022f7 21#include <sys/wait.h>
604ca1c0
CB
22#include <termios.h>
23#include <unistd.h>
6f4f1937
CB
24
25#include <lxc/lxccontainer.h>
e0732705 26
81f466d0 27#include "af_unix.h"
e0732705
CS
28#include "attach.h"
29#include "caps.h"
c988c8b1
CB
30#include "cgroups/cgroup.h"
31#include "cgroups/cgroup_utils.h"
6f4f1937 32#include "commands.h"
2c4ea790 33#include "conf.h"
6f4f1937 34#include "config.h"
9b8e3c96 35#include "confile.h"
6f4f1937
CB
36#include "log.h"
37#include "lsm/lsm.h"
38#include "lxclock.h"
39#include "lxcseccomp.h"
604ca1c0 40#include "macro.h"
ba2be1a8 41#include "mainloop.h"
cd8f5663 42#include "memory_utils.h"
657256e0 43#include "mount_utils.h"
6f4f1937 44#include "namespace.h"
f40988c7 45#include "process_utils.h"
a9f0cecf 46#include "sync.h"
59524108 47#include "syscall_wrappers.h"
0ed9b1bc 48#include "terminal.h"
6f4f1937 49#include "utils.h"
9c4693b8 50
ac2cecc4 51lxc_log_define(attach, lxc);
e0732705 52
ef05d368
CB
53/* Define default options if no options are supplied by the user. */
54static lxc_attach_options_t attach_static_default_options = LXC_ATTACH_OPTIONS_DEFAULT;
55
20718e39
CB
56/*
57 * The context used to attach to the container.
3ac4480a
CB
58 * @attach_flags : the attach flags specified in lxc_attach_options_t
59 * @init_pid : the PID of the container's init process
60 * @dfd_init_pid : file descriptor to /proc/@init_pid
61 * __Must be closed in attach_context_security_barrier()__!
62 * @dfd_self_pid : file descriptor to /proc/self
63 * __Must be closed in attach_context_security_barrier()__!
64 * @setup_ns_uid : if CLONE_NEWUSER is specified will contain the uid used
65 * during attach setup.
66 * @setup_ns_gid : if CLONE_NEWUSER is specified will contain the gid used
67 * during attach setup.
68 * @target_ns_uid : if CLONE_NEWUSER is specified the uid that the final
69 * program will be run with.
70 * @target_ns_gid : if CLONE_NEWUSER is specified the gid that the final
71 * program will be run with.
72 * @target_host_uid : if CLONE_NEWUSER is specified the uid that the final
73 * program will be run with on the host.
74 * @target_host_gid : if CLONE_NEWUSER is specified the gid that the final
75 * program will be run with on the host.
76 * @lsm_label : LSM label to be used for the attaching process
77 * @container : the container we're attaching o
78 * @personality : the personality to use for the final program
79 * @capability : the capability mask of the @init_pid
80 * @ns_inherited : flags of namespaces that the final program will inherit
81 * from @init_pid
82 * @ns_fd : file descriptors to @init_pid's namespaces
20718e39 83 */
ab919e5f 84struct attach_context {
afc691a0 85 unsigned int attach_flags;
500ed813 86 int init_pid;
9b31ab58 87 int init_pidfd;
25c659d5
CB
88 int dfd_init_pid;
89 int dfd_self_pid;
3ac4480a
CB
90 uid_t setup_ns_uid;
91 gid_t setup_ns_gid;
92 uid_t target_ns_uid;
93 gid_t target_ns_gid;
94 uid_t target_host_uid;
95 uid_t target_host_gid;
0e304baa
CB
96 char *lsm_label;
97 struct lxc_container *container;
64a04c84 98 personality_t personality;
0e304baa
CB
99 unsigned long long capability_mask;
100 int ns_inherited;
101 int ns_fd[LXC_NS_MAX];
102 struct lsm_ops *lsm_ops;
103};
104
6f0c2cea 105static pid_t pidfd_get_pid(int dfd_init_pid, int pidfd)
d8764025
CB
106{
107 __do_free char *line = NULL;
108 __do_fclose FILE *f = NULL;
109 size_t len = 0;
6f0c2cea 110 char path[STRLITERALLEN("fdinfo/") + INTTYPE_TO_STRLEN(int) + 1 ] = "fdinfo/";
d8764025
CB
111 int ret;
112
6f0c2cea
CB
113 if (dfd_init_pid < 0 || pidfd < 0)
114 return ret_errno(EBADF);
d8764025 115
f51c7eb4
CB
116 ret = strnprintf(path + STRLITERALLEN("fdinfo/"), INTTYPE_TO_STRLEN(int), "%d", pidfd);
117 if (ret < 0)
d8764025
CB
118 return ret_errno(EIO);
119
6f0c2cea 120 f = fdopen_at(dfd_init_pid, path, "re", PROTECT_OPEN, PROTECT_LOOKUP_BENEATH);
d8764025
CB
121 if (!f)
122 return -errno;
123
124 while (getline(&line, &len, f) != -1) {
125 const char *prefix = "Pid:\t";
126 const size_t prefix_len = STRLITERALLEN("Pid:\t");
127 int pid = -ESRCH;
128 char *slider = line;
129
6a6c7030 130 if (!strnequal(slider, prefix, prefix_len))
d8764025
CB
131 continue;
132
133 slider += prefix_len;
134 slider = lxc_trim_whitespace_in_place(slider);
135
136 ret = lxc_safe_int(slider, &pid);
137 if (ret)
138 return -ret;
139
140 return pid;
141 }
142
143 return ret_errno(ENOENT);
144}
145
6e36c297
CB
146static inline bool sync_wake_pid(int fd, pid_t pid)
147{
148 return lxc_write_nointr(fd, &pid, sizeof(pid_t)) == sizeof(pid_t);
149}
150
151static inline bool sync_wait_pid(int fd, pid_t *pid)
152{
153 return lxc_read_nointr(fd, pid, sizeof(pid_t)) == sizeof(pid_t);
154}
155
156static inline bool sync_wake_fd(int fd, int fd_send)
157{
158 return lxc_abstract_unix_send_fds(fd, &fd_send, 1, NULL, 0) > 0;
159}
160
161static inline bool sync_wait_fd(int fd, int *fd_recv)
162{
d17c815d 163 return lxc_abstract_unix_recv_one_fd(fd, fd_recv, NULL, 0) > 0;
6e36c297
CB
164}
165
afc691a0
CB
166static bool attach_lsm(lxc_attach_options_t *options)
167{
b445fcb1 168 return (options->attach_flags & (LXC_ATTACH_LSM | LXC_ATTACH_LSM_LABEL));
afc691a0
CB
169}
170
9745eb8a
CB
171static struct attach_context *alloc_attach_context(void)
172{
581b849a
CB
173 struct attach_context *ctx;
174
175 ctx = zalloc(sizeof(struct attach_context));
176 if (!ctx)
177 return ret_set_errno(NULL, ENOMEM);
178
f620ed44
CB
179 ctx->init_pid = -ESRCH;
180
9b31ab58
CB
181 ctx->dfd_self_pid = -EBADF;
182 ctx->dfd_init_pid = -EBADF;
183 ctx->init_pidfd = -EBADF;
f620ed44 184
9b31ab58
CB
185 ctx->setup_ns_uid = LXC_INVALID_UID;
186 ctx->setup_ns_gid = LXC_INVALID_GID;
187 ctx->target_ns_uid = LXC_INVALID_UID;
188 ctx->target_ns_gid = LXC_INVALID_GID;
189 ctx->target_host_uid = LXC_INVALID_UID;
190 ctx->target_host_gid = LXC_INVALID_GID;
581b849a 191
2533995e 192 for (lxc_namespace_t i = 0; i < LXC_NS_MAX; i++)
581b849a
CB
193 ctx->ns_fd[i] = -EBADF;
194
195 return ctx;
9745eb8a
CB
196}
197
ee142207 198static int get_personality(const char *name, const char *lxcpath,
64a04c84 199 personality_t *personality)
d92c8e40
CB
200{
201 __do_free char *p = NULL;
7c43fa56 202 int ret;
ee142207 203 signed long per;
d92c8e40
CB
204
205 p = lxc_cmd_get_config_item(name, "lxc.arch", lxcpath);
ee142207
CB
206 if (!p) {
207 *personality = LXC_ARCH_UNCHANGED;
208 return 0;
209 }
d92c8e40 210
7c43fa56
CB
211 ret = lxc_config_parse_arch(p, &per);
212 if (ret < 0)
213 return syserror("Failed to parse personality");
ee142207
CB
214
215 *personality = per;
216 return 0;
d92c8e40
CB
217}
218
4475fabb 219static int userns_setup_ids(struct attach_context *ctx,
3ac4480a 220 lxc_attach_options_t *options)
4475fabb
CB
221{
222 __do_free char *line = NULL;
223 __do_fclose FILE *f_gidmap = NULL, *f_uidmap = NULL;
224 size_t len = 0;
225 uid_t init_ns_uid = LXC_INVALID_UID;
226 gid_t init_ns_gid = LXC_INVALID_GID;
227 uid_t nsuid, hostuid, range_uid;
228 gid_t nsgid, hostgid, range_gid;
229
230 if (!(options->namespaces & CLONE_NEWUSER))
231 return 0;
232
72a19d2f 233 f_uidmap = fdopen_at(ctx->dfd_init_pid, "uid_map", "re", PROTECT_OPEN, PROTECT_LOOKUP_BENEATH);
4475fabb
CB
234 if (!f_uidmap)
235 return log_error_errno(-errno, errno, "Failed to open uid_map");
236
237 while (getline(&line, &len, f_uidmap) != -1) {
238 if (sscanf(line, "%u %u %u", &nsuid, &hostuid, &range_uid) != 3)
239 continue;
240
241 if (0 >= nsuid && 0 < nsuid + range_uid) {
3ac4480a 242 ctx->setup_ns_uid = 0;
4475fabb
CB
243 TRACE("Container has mapping for uid 0");
244 break;
245 }
246
3ac4480a
CB
247 if (ctx->target_host_uid >= hostuid && ctx->target_host_uid < hostuid + range_uid) {
248 init_ns_uid = (ctx->target_host_uid - hostuid) + nsuid;
4475fabb
CB
249 TRACE("Container runs with uid %d", init_ns_uid);
250 }
251 }
252
72a19d2f 253 f_gidmap = fdopen_at(ctx->dfd_init_pid, "gid_map", "re", PROTECT_OPEN, PROTECT_LOOKUP_BENEATH);
4475fabb
CB
254 if (!f_gidmap)
255 return log_error_errno(-errno, errno, "Failed to open gid_map");
256
257 while (getline(&line, &len, f_gidmap) != -1) {
258 if (sscanf(line, "%u %u %u", &nsgid, &hostgid, &range_gid) != 3)
259 continue;
260
261 if (0 >= nsgid && 0 < nsgid + range_gid) {
3ac4480a 262 ctx->setup_ns_gid = 0;
4475fabb
CB
263 TRACE("Container has mapping for gid 0");
264 break;
265 }
266
3ac4480a
CB
267 if (ctx->target_host_gid >= hostgid && ctx->target_host_gid < hostgid + range_gid) {
268 init_ns_gid = (ctx->target_host_gid - hostgid) + nsgid;
4475fabb
CB
269 TRACE("Container runs with gid %d", init_ns_gid);
270 }
271 }
272
3ac4480a
CB
273 if (ctx->setup_ns_uid == LXC_INVALID_UID)
274 ctx->setup_ns_uid = init_ns_uid;
4475fabb 275
3ac4480a
CB
276 if (ctx->setup_ns_gid == LXC_INVALID_UID)
277 ctx->setup_ns_gid = init_ns_gid;
4475fabb 278
4475fabb
CB
279 return 0;
280}
281
282static void userns_target_ids(struct attach_context *ctx, lxc_attach_options_t *options)
283{
284 if (options->uid != LXC_INVALID_UID)
3ac4480a 285 ctx->target_ns_uid = options->uid;
4475fabb 286 else if (options->namespaces & CLONE_NEWUSER)
3ac4480a 287 ctx->target_ns_uid = ctx->setup_ns_uid;
4475fabb 288 else
3ac4480a 289 ctx->target_ns_uid = 0;
4475fabb 290
3ac4480a 291 if (ctx->target_ns_uid == LXC_INVALID_UID)
4475fabb
CB
292 WARN("Invalid uid specified");
293
294 if (options->gid != LXC_INVALID_GID)
3ac4480a 295 ctx->target_ns_gid = options->gid;
4475fabb 296 else if (options->namespaces & CLONE_NEWUSER)
3ac4480a 297 ctx->target_ns_gid = ctx->setup_ns_gid;
4475fabb 298 else
3ac4480a 299 ctx->target_ns_gid = 0;
4475fabb 300
3ac4480a 301 if (ctx->target_ns_gid == LXC_INVALID_GID)
4475fabb
CB
302 WARN("Invalid gid specified");
303}
304
9680e7b0
CB
305static int parse_init_status(struct attach_context *ctx, lxc_attach_options_t *options)
306{
307 __do_free char *line = NULL;
308 __do_fclose FILE *f = NULL;
309 size_t len = 0;
310 bool caps_found = false;
4475fabb 311 int ret;
9680e7b0 312
72a19d2f 313 f = fdopen_at(ctx->dfd_init_pid, "status", "re", PROTECT_OPEN, PROTECT_LOOKUP_BENEATH);
9680e7b0 314 if (!f)
4475fabb 315 return log_error_errno(-errno, errno, "Failed to open status file");
9680e7b0
CB
316
317 while (getline(&line, &len, f) != -1) {
318 signed long value = -1;
9680e7b0 319
4475fabb
CB
320 /*
321 * Format is: real, effective, saved set user, fs we only care
322 * about real uid.
323 */
324 ret = sscanf(line, "Uid: %ld", &value);
325 if (ret != EOF && ret == 1) {
3ac4480a
CB
326 ctx->target_host_uid = (uid_t)value;
327 TRACE("Container's init process runs with hostuid %d", ctx->target_host_uid);
4475fabb
CB
328 goto next;
329 }
9680e7b0 330
4475fabb
CB
331 ret = sscanf(line, "Gid: %ld", &value);
332 if (ret != EOF && ret == 1) {
3ac4480a
CB
333 ctx->target_host_gid = (gid_t)value;
334 TRACE("Container's init process runs with hostgid %d", ctx->target_host_gid);
4475fabb 335 goto next;
9680e7b0
CB
336 }
337
338 ret = sscanf(line, "CapBnd: %llx", &ctx->capability_mask);
339 if (ret != EOF && ret == 1) {
340 caps_found = true;
341 goto next;
342 }
343
344 next:
3ac4480a
CB
345 if (ctx->target_host_uid != LXC_INVALID_UID &&
346 ctx->target_host_gid != LXC_INVALID_GID &&
4475fabb 347 caps_found)
9680e7b0
CB
348 break;
349
350 }
351
3ac4480a 352 ret = userns_setup_ids(ctx, options);
4475fabb
CB
353 if (ret)
354 return log_error_errno(ret, errno, "Failed to get setup ids");
355 userns_target_ids(ctx, options);
356
9680e7b0
CB
357 return 0;
358}
359
9b31ab58
CB
360static bool pidfd_setns_supported(struct attach_context *ctx)
361{
362 int ret;
363
364 /*
365 * The ability to attach to time namespaces came after the introduction
366 * of of using pidfds for attaching to namespaces. To avoid having to
367 * special-case both CLONE_NEWUSER and CLONE_NEWTIME handling, let's
368 * use CLONE_NEWTIME as gatekeeper.
369 */
370 if (ctx->init_pidfd >= 0)
371 ret = setns(ctx->init_pidfd, CLONE_NEWTIME);
372 else
373 ret = -EOPNOTSUPP;
374 TRACE("Attaching to namespaces via pidfds %s",
375 ret ? "unsupported" : "supported");
376 return ret == 0;
377}
378
500ed813 379static int get_attach_context(struct attach_context *ctx,
afc691a0
CB
380 struct lxc_container *container,
381 lxc_attach_options_t *options)
e0732705 382{
9680e7b0 383 __do_free char *lsm_label = NULL;
6f4f1937 384 int ret;
c538837d 385 char path[LXC_PROC_PID_LEN];
e0732705 386
500ed813 387 ctx->container = container;
afc691a0 388 ctx->attach_flags = options->attach_flags;
500ed813 389
6f0c2cea
CB
390 ctx->dfd_self_pid = open_at(-EBADF, "/proc/self",
391 PROTECT_OPATH_FILE & ~O_NOFOLLOW,
392 (PROTECT_LOOKUP_ABSOLUTE_WITH_SYMLINKS & ~RESOLVE_NO_XDEV), 0);
393 if (ctx->dfd_self_pid < 0)
394 return log_error_errno(-errno, errno, "Failed to open /proc/self");
395
9b31ab58
CB
396 ctx->init_pidfd = lxc_cmd_get_init_pidfd(container->name, container->config_path);
397 if (ctx->init_pidfd >= 0)
398 ctx->init_pid = pidfd_get_pid(ctx->dfd_self_pid, ctx->init_pidfd);
d8764025
CB
399 else
400 ctx->init_pid = lxc_cmd_get_init_pid(container->name, container->config_path);
500ed813
CB
401 if (ctx->init_pid < 0)
402 return log_error(-1, "Failed to get init pid");
403
f51c7eb4
CB
404 ret = strnprintf(path, sizeof(path), "/proc/%d", ctx->init_pid);
405 if (ret < 0)
c538837d 406 return ret_errno(EIO);
e0732705 407
5129b2d3
CB
408 ctx->dfd_init_pid = open_at(-EBADF, path,
409 PROTECT_OPATH_DIRECTORY,
410 (PROTECT_LOOKUP_ABSOLUTE & ~RESOLVE_NO_XDEV), 0);
9680e7b0 411 if (ctx->dfd_init_pid < 0)
4475fabb 412 return log_error_errno(-errno, errno, "Failed to open /proc/%d", ctx->init_pid);
c538837d 413
9b31ab58
CB
414 if (ctx->init_pidfd >= 0) {
415 ret = lxc_raw_pidfd_send_signal(ctx->init_pidfd, 0, NULL, 0);
d8764025
CB
416 if (ret)
417 return log_error_errno(-errno, errno, "Container process exited or PID has been recycled");
418 else
419 TRACE("Container process still running and PID was not recycled");
9b31ab58
CB
420
421 if (!pidfd_setns_supported(ctx)) {
422 /* We can't risk leaking file descriptors during attach. */
423 if (close(ctx->init_pidfd))
424 return log_error_errno(-errno, errno, "Failed to close pidfd");
425
426 ctx->init_pidfd = -EBADF;
427 TRACE("Attaching to namespaces via pidfds not supported");
428 }
d8764025
CB
429 }
430
4475fabb
CB
431 /* Determine which namespaces the container was created with. */
432 if (options->namespaces == -1) {
433 options->namespaces = lxc_cmd_get_clone_flags(container->name, container->config_path);
434 if (options->namespaces == -1)
435 return log_error_errno(-EINVAL, EINVAL, "Failed to automatically determine the namespaces which the container uses");
436
2533995e 437 for (lxc_namespace_t i = 0; i < LXC_NS_MAX; i++) {
4475fabb
CB
438 if (ns_info[i].clone_flag & CLONE_NEWCGROUP)
439 if (!(options->attach_flags & LXC_ATTACH_MOVE_TO_CGROUP) ||
440 !cgns_supported())
441 continue;
442
443 if (ns_info[i].clone_flag & options->namespaces)
444 continue;
445
446 ctx->ns_inherited |= ns_info[i].clone_flag;
447 }
448 }
449
9680e7b0
CB
450 ret = parse_init_status(ctx, options);
451 if (ret)
4475fabb 452 return log_error_errno(-errno, errno, "Failed to open parse file");
e0732705 453
4eb19ac0 454 ctx->lsm_ops = lsm_init_static();
d701d729 455
afc691a0
CB
456 if (attach_lsm(options)) {
457 if (ctx->attach_flags & LXC_ATTACH_LSM_LABEL)
458 lsm_label = options->lsm_label;
459 else
9680e7b0 460 lsm_label = ctx->lsm_ops->process_label_get_at(ctx->lsm_ops, ctx->dfd_init_pid);
afc691a0
CB
461 if (!lsm_label)
462 WARN("No security context received");
463 else
464 INFO("Retrieved security context %s", lsm_label);
465 }
e0732705 466
ee142207
CB
467 ret = get_personality(container->name, container->config_path, &ctx->personality);
468 if (ret)
469 return log_error_errno(ret, errno, "Failed to get personality of the container");
d92c8e40 470
1874ef74
CB
471 if (!ctx->container->lxc_conf) {
472 ctx->container->lxc_conf = lxc_conf_init();
473 if (!ctx->container->lxc_conf)
474 return log_error_errno(-ENOMEM, ENOMEM, "Failed to allocate new lxc config");
475 }
476
afc691a0 477 ctx->lsm_label = move_ptr(lsm_label);
9745eb8a 478 return 0;
e0732705
CS
479}
480
9b31ab58 481static int same_nsfd(int dfd_pid1, int dfd_pid2, const char *ns_path)
299d1198 482{
9b31ab58 483 int ret;
299d1198
CB
484 struct stat ns_st1, ns_st2;
485
9b31ab58
CB
486 ret = fstatat(dfd_pid1, ns_path, &ns_st1, 0);
487 if (ret)
3a6678c7 488 return -errno;
299d1198 489
9b31ab58
CB
490 ret = fstatat(dfd_pid2, ns_path, &ns_st2, 0);
491 if (ret)
3a6678c7 492 return -errno;
299d1198
CB
493
494 /* processes are in the same namespace */
9b31ab58
CB
495 if ((ns_st1.st_dev == ns_st2.st_dev) &&
496 (ns_st1.st_ino == ns_st2.st_ino))
3a6678c7 497 return 1;
299d1198 498
9b31ab58
CB
499 return 0;
500}
501
502static int same_ns(int dfd_pid1, int dfd_pid2, const char *ns_path)
503{
504 __do_close int ns_fd2 = -EBADF;
505 int ret = -1;
506
507 ns_fd2 = open_at(dfd_pid2, ns_path, PROTECT_OPEN_WITH_TRAILING_SYMLINKS,
508 (PROTECT_LOOKUP_BENEATH_WITH_MAGICLINKS &
509 ~(RESOLVE_NO_XDEV | RESOLVE_BENEATH)), 0);
510 if (ns_fd2 < 0) {
9b31ab58 511 if (errno == ENOENT)
fb54db2c 512 return -ENOENT;
2d7b0895 513 return syserror("Failed to open %d(%s)", dfd_pid2, ns_path);
9b31ab58
CB
514 }
515
516 ret = same_nsfd(dfd_pid1, dfd_pid2, ns_path);
3a6678c7
CB
517 switch (ret) {
518 case -ENOENT:
519 __fallthrough;
520 case 1:
521 return ret_errno(ENOENT);
522 case 0:
523 /* processes are in different namespaces */
524 return move_fd(ns_fd2);
525 }
9b31ab58 526
3a6678c7 527 return ret;
299d1198
CB
528}
529
9b31ab58
CB
530static int __prepare_namespaces_pidfd(struct attach_context *ctx)
531{
2533995e 532 for (lxc_namespace_t i = 0; i < LXC_NS_MAX; i++) {
9b31ab58
CB
533 int ret;
534
9b31ab58
CB
535 ret = same_nsfd(ctx->dfd_self_pid,
536 ctx->dfd_init_pid,
537 ns_info[i].proc_path);
3a6678c7
CB
538 switch (ret) {
539 case -ENOENT:
540 __fallthrough;
541 case 1:
9b31ab58 542 ctx->ns_inherited &= ~ns_info[i].clone_flag;
0b8e876f
CB
543 TRACE("Shared %s namespace doesn't need attach", ns_info[i].proc_name);
544 continue;
3a6678c7 545 case 0:
0b8e876f
CB
546 TRACE("Different %s namespace needs attach", ns_info[i].proc_name);
547 continue;
3a6678c7
CB
548 }
549
2d7b0895 550 return syserror("Failed to determine whether %s namespace is shared",
3a6678c7 551 ns_info[i].proc_name);
9b31ab58
CB
552 }
553
554 return 0;
555}
556
557static int __prepare_namespaces_nsfd(struct attach_context *ctx,
558 lxc_attach_options_t *options)
b7873c95 559{
2533995e
CB
560 for (lxc_namespace_t i = 0; i < LXC_NS_MAX; i++) {
561 lxc_namespace_t j;
b7873c95
CB
562
563 if (options->namespaces & ns_info[i].clone_flag)
5129b2d3
CB
564 ctx->ns_fd[i] = open_at(ctx->dfd_init_pid,
565 ns_info[i].proc_path,
566 PROTECT_OPEN_WITH_TRAILING_SYMLINKS,
9b31ab58
CB
567 (PROTECT_LOOKUP_BENEATH_WITH_MAGICLINKS &
568 ~(RESOLVE_NO_XDEV | RESOLVE_BENEATH)),
5129b2d3 569 0);
b7873c95 570 else if (ctx->ns_inherited & ns_info[i].clone_flag)
5129b2d3
CB
571 ctx->ns_fd[i] = same_ns(ctx->dfd_self_pid,
572 ctx->dfd_init_pid,
573 ns_info[i].proc_path);
b7873c95
CB
574 else
575 continue;
576
577 if (ctx->ns_fd[i] >= 0)
578 continue;
579
fb54db2c 580 if (ctx->ns_fd[i] == -ENOENT) {
b7873c95
CB
581 ctx->ns_inherited &= ~ns_info[i].clone_flag;
582 continue;
583 }
584
585 /* We failed to preserve the namespace. */
9b31ab58
CB
586 SYSERROR("Failed to preserve %s namespace of %d",
587 ns_info[i].proc_name, ctx->init_pid);
b7873c95
CB
588
589 /* Close all already opened file descriptors before we return an
590 * error, so we don't leak them.
591 */
592 for (j = 0; j < i; j++)
593 close_prot_errno_disarm(ctx->ns_fd[j]);
594
595 return -1;
596 }
597
598 return 0;
599}
600
9b31ab58
CB
601static int prepare_namespaces(struct attach_context *ctx,
602 lxc_attach_options_t *options)
b7873c95 603{
9b31ab58
CB
604 if (ctx->init_pidfd < 0)
605 return __prepare_namespaces_nsfd(ctx, options);
606
607 return __prepare_namespaces_pidfd(ctx);
b7873c95
CB
608}
609
9b31ab58 610static inline void put_namespaces(struct attach_context *ctx)
b7873c95 611{
9b31ab58
CB
612 if (ctx->init_pidfd < 0) {
613 for (int i = 0; i < LXC_NS_MAX; i++)
614 close_prot_errno_disarm(ctx->ns_fd[i]);
615 }
616}
b7873c95 617
9b31ab58
CB
618static int __attach_namespaces_pidfd(struct attach_context *ctx,
619 lxc_attach_options_t *options)
620{
621 unsigned int ns_flags = options->namespaces | ctx->ns_inherited;
622 int ret;
b7873c95 623
9b31ab58
CB
624 /* The common case is to attach to all namespaces. */
625 ret = setns(ctx->init_pidfd, ns_flags);
626 if (ret)
627 return log_error_errno(-errno, errno,
628 "Failed to attach to namespaces via pidfd");
629
630 /* We can't risk leaking file descriptors into the container. */
631 if (close(ctx->init_pidfd))
632 return log_error_errno(-errno, errno, "Failed to close pidfd");
633 ctx->init_pidfd = -EBADF;
634
635 return log_trace(0, "Attached to container namespaces via pidfd");
b7873c95
CB
636}
637
9b31ab58
CB
638static int __attach_namespaces_nsfd(struct attach_context *ctx,
639 lxc_attach_options_t *options)
99d50954 640{
92466fe3
CB
641 int fret = 0;
642
2533995e 643 for (lxc_namespace_t i = 0; i < LXC_NS_MAX; i++) {
ffeeed8b 644 int ret;
99d50954 645
877f3a04 646 if (ctx->ns_fd[i] < 0)
26818618
CB
647 continue;
648
21d0acc2 649 ret = setns(ctx->ns_fd[i], ns_info[i].clone_flag);
92466fe3 650 if (ret)
9b31ab58
CB
651 return log_error_errno(-errno, errno,
652 "Failed to attach to %s namespace of %d",
653 ns_info[i].proc_name,
654 ctx->init_pid);
92466fe3
CB
655
656 if (close(ctx->ns_fd[i])) {
657 fret = -errno;
9b31ab58
CB
658 SYSERROR("Failed to close file descriptor for %s namespace",
659 ns_info[i].proc_name);
92466fe3
CB
660 }
661 ctx->ns_fd[i] = -EBADF;
99d50954
CS
662 }
663
92466fe3 664 return fret;
99d50954
CS
665}
666
9b31ab58
CB
667static int attach_namespaces(struct attach_context *ctx,
668 lxc_attach_options_t *options)
669{
670 if (lxc_log_trace()) {
2533995e 671 for (lxc_namespace_t i = 0; i < LXC_NS_MAX; i++) {
9b31ab58
CB
672 if (ns_info[i].clone_flag & options->namespaces) {
673 TRACE("Attaching to %s namespace", ns_info[i].proc_name);
674 continue;
675 }
676 if (ns_info[i].clone_flag & ctx->ns_inherited) {
677 TRACE("Sharing %s namespace", ns_info[i].proc_name);
678 continue;
679 }
680 TRACE("Inheriting %s namespace", ns_info[i].proc_name);
681 }
682 }
683
684 if (ctx->init_pidfd < 0)
685 return __attach_namespaces_nsfd(ctx, options);
686
687 return __attach_namespaces_pidfd(ctx, options);
688}
689
690static void put_attach_context(struct attach_context *ctx)
691{
692 if (ctx) {
693 if (!(ctx->attach_flags & LXC_ATTACH_LSM_LABEL))
694 free_disarm(ctx->lsm_label);
695 close_prot_errno_disarm(ctx->dfd_init_pid);
696
697 if (ctx->container) {
698 lxc_container_put(ctx->container);
699 ctx->container = NULL;
700 }
701
702 put_namespaces(ctx);
703 free(ctx);
704 }
705}
706
c538837d
CB
707/*
708 * Place anything in here that needs to be get rid of before we move into the
709 * container's context and fail hard if we can't.
710 */
711static bool attach_context_security_barrier(struct attach_context *ctx)
712{
713 if (ctx) {
25c659d5
CB
714 if (close(ctx->dfd_self_pid))
715 return false;
716 ctx->dfd_self_pid = -EBADF;
717
718 if (close(ctx->dfd_init_pid))
c538837d 719 return false;
25c659d5 720 ctx->dfd_init_pid = -EBADF;
c538837d
CB
721 }
722
723 return true;
724}
725
e4103cf6 726int lxc_attach_remount_sys_proc(void)
7a0b0b56
CS
727{
728 int ret;
729
730 ret = unshare(CLONE_NEWNS);
ffeeed8b
CB
731 if (ret < 0)
732 return log_error_errno(-1, errno, "Failed to unshare mount namespace");
7a0b0b56 733
9e61fb1f
CB
734 if (detect_shared_rootfs() && mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL))
735 SYSERROR("Failed to recursively turn root mount tree into dependent mount. Continuing...");
2c6f3fc9 736
8ce83369 737 /* Assume /proc is always mounted, so remount it. */
7a0b0b56 738 ret = umount2("/proc", MNT_DETACH);
ffeeed8b
CB
739 if (ret < 0)
740 return log_error_errno(-1, errno, "Failed to unmount /proc");
7a0b0b56 741
0d50f288 742 ret = mount("none", "/proc", "proc", 0, NULL);
ffeeed8b
CB
743 if (ret < 0)
744 return log_error_errno(-1, errno, "Failed to remount /proc");
7a0b0b56 745
ffeeed8b
CB
746 /*
747 * Try to umount /sys. If it's not a mount point, we'll get EINVAL, then
8ce83369 748 * we ignore it because it may not have been mounted in the first place.
7a0b0b56
CS
749 */
750 ret = umount2("/sys", MNT_DETACH);
ffeeed8b
CB
751 if (ret < 0 && errno != EINVAL)
752 return log_error_errno(-1, errno, "Failed to unmount /sys");
753
754 /* Remount it. */
0d50f288 755 if (ret == 0 && mount("none", "/sys", "sysfs", 0, NULL))
ffeeed8b 756 return log_error_errno(-1, errno, "Failed to remount /sys");
7a0b0b56
CS
757
758 return 0;
759}
760
677e1d27 761static int drop_capabilities(struct attach_context *ctx)
e0732705 762{
ffeeed8b 763 int last_cap;
e0732705 764
6f4f1937 765 last_cap = lxc_caps_last_cap();
ffeeed8b 766 for (int cap = 0; cap <= last_cap; cap++) {
e0732705
CS
767 if (ctx->capability_mask & (1LL << cap))
768 continue;
769
b81689a1 770 if (prctl(PR_CAPBSET_DROP, prctl_arg(cap), prctl_arg(0),
ffeeed8b
CB
771 prctl_arg(0), prctl_arg(0)))
772 return log_error_errno(-1, errno, "Failed to drop capability %d", cap);
ea918412 773
94ac256f 774 TRACE("Dropped capability %d", cap);
e0732705
CS
775 }
776
777 return 0;
778}
905022f7 779
ab919e5f 780static int lxc_attach_set_environment(struct attach_context *ctx,
7385273f 781 enum lxc_attach_env_policy_t policy,
6f4f1937 782 char **extra_env, char **extra_keep)
b3a39ba6 783{
3d55242a 784 int ret;
7385273f 785 struct lxc_list *iterator;
786
799f96fd 787 if (policy == LXC_ATTACH_CLEAR_ENV) {
3d5e9f48 788 int path_kept = 0;
6f4f1937 789 char **extra_keep_store = NULL;
3d5e9f48
CS
790
791 if (extra_keep) {
792 size_t count, i;
793
3d55242a
CB
794 for (count = 0; extra_keep[count]; count++)
795 ;
3d5e9f48 796
89b7bfe3 797 extra_keep_store = zalloc(count * sizeof(char *));
3d55242a 798 if (!extra_keep_store)
3d5e9f48 799 return -1;
3d55242a 800
3d5e9f48
CS
801 for (i = 0; i < count; i++) {
802 char *v = getenv(extra_keep[i]);
803 if (v) {
804 extra_keep_store[i] = strdup(v);
805 if (!extra_keep_store[i]) {
3d5e9f48
CS
806 while (i > 0)
807 free(extra_keep_store[--i]);
ea918412 808
3d5e9f48
CS
809 free(extra_keep_store);
810 return -1;
811 }
3d55242a 812
e8c43357 813 if (strequal(extra_keep[i], "PATH"))
3d5e9f48
CS
814 path_kept = 1;
815 }
3d5e9f48
CS
816 }
817 }
818
799f96fd 819 if (clearenv()) {
a9cab7e3 820 if (extra_keep_store) {
3d55242a
CB
821 char **p;
822
a9cab7e3
CS
823 for (p = extra_keep_store; *p; p++)
824 free(*p);
3d55242a 825
a9cab7e3
CS
826 free(extra_keep_store);
827 }
3d55242a 828
ffeeed8b 829 return log_error(-1, "Failed to clear environment");
3d5e9f48
CS
830 }
831
832 if (extra_keep_store) {
833 size_t i;
6f4f1937 834
3d5e9f48 835 for (i = 0; extra_keep[i]; i++) {
acd4922e 836 if (extra_keep_store[i]) {
3d55242a
CB
837 ret = setenv(extra_keep[i], extra_keep_store[i], 1);
838 if (ret < 0)
a24c5678 839 SYSWARN("Failed to set environment variable");
acd4922e 840 }
ea918412 841
3d5e9f48
CS
842 free(extra_keep_store[i]);
843 }
ea918412 844
3d5e9f48
CS
845 free(extra_keep_store);
846 }
847
8ce83369
CB
848 /* Always set a default path; shells and execlp tend to be fine
849 * without it, but there is a disturbing number of C programs
850 * out there that just assume that getenv("PATH") is never NULL
851 * and then die a painful segfault death.
852 */
3d55242a
CB
853 if (!path_kept) {
854 ret = setenv("PATH", "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", 1);
855 if (ret < 0)
a24c5678 856 SYSWARN("Failed to set environment variable");
3d55242a 857 }
b3a39ba6
DW
858 }
859
3d55242a 860 ret = putenv("container=lxc");
ffeeed8b 861 if (ret < 0)
818a57fc 862 return log_warn(-1, "Failed to set environment variable");
b3a39ba6 863
7385273f 864 /* Set container environment variables.*/
640952e5 865 if (ctx->container->lxc_conf) {
ab919e5f 866 lxc_list_for_each(iterator, &ctx->container->lxc_conf->environment) {
3d55242a
CB
867 char *env_tmp;
868
869 env_tmp = strdup((char *)iterator->elem);
870 if (!env_tmp)
7385273f 871 return -1;
7385273f 872
3d55242a 873 ret = putenv(env_tmp);
ffeeed8b
CB
874 if (ret < 0)
875 return log_error_errno(-1, errno, "Failed to set environment variable: %s", (char *)iterator->elem);
7385273f 876 }
877 }
878
8ce83369 879 /* Set extra environment variables. */
3d5e9f48
CS
880 if (extra_env) {
881 for (; *extra_env; extra_env++) {
3d55242a 882 char *p;
ea918412 883
8ce83369
CB
884 /* We just assume the user knows what they are doing, so
885 * we don't do any checks.
886 */
3d55242a
CB
887 p = strdup(*extra_env);
888 if (!p)
3d5e9f48 889 return -1;
3d55242a
CB
890
891 ret = putenv(p);
892 if (ret < 0)
a24c5678 893 SYSWARN("Failed to set environment variable");
3d5e9f48
CS
894 }
895 }
896
b3a39ba6
DW
897 return 0;
898}
899
74a3920a 900static char *lxc_attach_getpwshell(uid_t uid)
905022f7 901{
1b9c9f5b 902 __do_free char *line = NULL, *result = NULL;
cd8f5663 903 __do_fclose FILE *pipe_f = NULL;
6f4f1937 904 int fd, ret;
905022f7
CS
905 pid_t pid;
906 int pipes[2];
3fa23ac3
CB
907 bool found = false;
908 size_t line_bufsz = 0;
905022f7 909
8ce83369
CB
910 /* We need to fork off a process that runs the getent program, and we
911 * need to capture its output, so we use a pipe for that purpose.
905022f7 912 */
3fa23ac3 913 ret = pipe2(pipes, O_CLOEXEC);
905022f7
CS
914 if (ret < 0)
915 return NULL;
916
917 pid = fork();
918 if (pid < 0) {
919 close(pipes[0]);
920 close(pipes[1]);
921 return NULL;
922 }
923
3fa23ac3 924 if (!pid) {
905022f7
CS
925 char uid_buf[32];
926 char *arguments[] = {
927 "getent",
928 "passwd",
929 uid_buf,
930 NULL
931 };
932
933 close(pipes[0]);
934
8ce83369 935 /* We want to capture stdout. */
3fa23ac3 936 ret = dup2(pipes[1], STDOUT_FILENO);
905022f7 937 close(pipes[1]);
3fa23ac3 938 if (ret < 0)
ea918412 939 _exit(EXIT_FAILURE);
905022f7 940
8ce83369
CB
941 /* Get rid of stdin/stderr, so we try to associate it with
942 * /dev/null.
905022f7 943 */
3fa23ac3 944 fd = open_devnull();
905022f7 945 if (fd < 0) {
3fa23ac3
CB
946 close(STDIN_FILENO);
947 close(STDERR_FILENO);
905022f7 948 } else {
3fa23ac3 949 (void)dup3(fd, STDIN_FILENO, O_CLOEXEC);
59f0e209 950 (void)dup3(fd, STDERR_FILENO, O_CLOEXEC);
905022f7
CS
951 close(fd);
952 }
953
8ce83369 954 /* Finish argument list. */
f51c7eb4
CB
955 ret = strnprintf(uid_buf, sizeof(uid_buf), "%ld", (long)uid);
956 if (ret <= 0)
ea918412 957 _exit(EXIT_FAILURE);
905022f7 958
8ce83369 959 /* Try to run getent program. */
3fa23ac3 960 (void)execvp("getent", arguments);
ea918412 961 _exit(EXIT_FAILURE);
905022f7 962 }
3fa23ac3
CB
963
964 close(pipes[1]);
965
4110345b 966 pipe_f = fdopen(pipes[0], "re");
cf4026f1
CB
967 if (!pipe_f) {
968 close(pipes[0]);
969 goto reap_child;
970 }
971 /* Transfer ownership of pipes[0] to pipe_f. */
972 move_fd(pipes[0]);
973
3fa23ac3
CB
974 while (getline(&line, &line_bufsz, pipe_f) != -1) {
975 int i;
976 long value;
977 char *token;
978 char *endptr = NULL, *saveptr = NULL;
979
980 /* If we already found something, just continue to read
981 * until the pipe doesn't deliver any more data, but
982 * don't modify the existing data structure.
983 */
984 if (found)
985 continue;
986
18d4ffde 987 if (!line)
988 continue;
989
3fa23ac3
CB
990 /* Trim line on the right hand side. */
991 for (i = strlen(line); i > 0 && (line[i - 1] == '\n' || line[i - 1] == '\r'); --i)
992 line[i - 1] = '\0';
993
994 /* Split into tokens: first: user name. */
995 token = strtok_r(line, ":", &saveptr);
996 if (!token)
997 continue;
998
8de0119d 999 /* next: placeholder password field */
3fa23ac3
CB
1000 token = strtok_r(NULL, ":", &saveptr);
1001 if (!token)
1002 continue;
1003
1004 /* next: user id */
1005 token = strtok_r(NULL, ":", &saveptr);
1006 value = token ? strtol(token, &endptr, 10) : 0;
1007 if (!token || !endptr || *endptr || value == LONG_MIN ||
ea918412 1008 value == LONG_MAX)
3fa23ac3
CB
1009 continue;
1010
8de0119d 1011 /* placeholder conherence check: user id matches */
3fa23ac3
CB
1012 if ((uid_t)value != uid)
1013 continue;
1014
1015 /* skip fields: gid, gecos, dir, go to next field 'shell' */
1016 for (i = 0; i < 4; i++) {
1017 token = strtok_r(NULL, ":", &saveptr);
1018 if (!token)
1019 continue;
1020 }
ea918412 1021
3fa23ac3
CB
1022 if (!token)
1023 continue;
ea918412 1024
1b9c9f5b 1025 free_disarm(result);
3fa23ac3
CB
1026 result = strdup(token);
1027
1028 /* Sanity check that there are no fields after that. */
1029 token = strtok_r(NULL, ":", &saveptr);
1030 if (token)
1031 continue;
1032
1033 found = true;
1034 }
ea918412 1035
cf4026f1 1036reap_child:
3fa23ac3 1037 ret = wait_for_pid(pid);
1b9c9f5b 1038 if (ret < 0)
3fa23ac3 1039 return NULL;
3fa23ac3 1040
1b9c9f5b 1041 if (!found)
3fa23ac3 1042 return NULL;
3fa23ac3 1043
1b9c9f5b 1044 return move_ptr(result);
905022f7 1045}
cb3e61fa 1046
d4db3d14 1047static bool fetch_seccomp(struct lxc_container *c, lxc_attach_options_t *options)
2c4ea790 1048{
cd8f5663 1049 __do_free char *path = NULL;
d4db3d14
CB
1050 int ret;
1051 bool bret;
2eef2bda 1052
afc691a0 1053 if (!attach_lsm(options)) {
cca66e06 1054 free_disarm(c->lxc_conf->seccomp.seccomp);
2c4ea790 1055 return true;
bd4307f0 1056 }
bd7b4e28 1057
afc691a0 1058 /* Remove current setting. */
d4db3d14 1059 if (!c->set_config_item(c, "lxc.seccomp.profile", "") &&
ea918412 1060 !c->set_config_item(c, "lxc.seccomp", ""))
2c4ea790 1061 return false;
bd7b4e28 1062
8ce83369 1063 /* Fetch the current profile path over the cmd interface. */
0b427da0 1064 path = c->get_running_config_item(c, "lxc.seccomp.profile");
bd7b4e28 1065 if (!path) {
d4db3d14 1066 INFO("Failed to retrieve lxc.seccomp.profile");
ea918412 1067
0b427da0 1068 path = c->get_running_config_item(c, "lxc.seccomp");
cca66e06
CB
1069 if (!path)
1070 return log_info(true, "Failed to retrieve lxc.seccomp");
bd7b4e28
SG
1071 }
1072
8ce83369 1073 /* Copy the value into the new lxc_conf. */
d4db3d14 1074 bret = c->set_config_item(c, "lxc.seccomp.profile", path);
d4db3d14
CB
1075 if (!bret)
1076 return false;
bd7b4e28 1077
8ce83369 1078 /* Attempt to parse the resulting config. */
d4db3d14 1079 ret = lxc_read_seccomp_config(c->lxc_conf);
cca66e06
CB
1080 if (ret < 0)
1081 return log_error(false, "Failed to retrieve seccomp policy");
2c4ea790 1082
cca66e06 1083 return log_info(true, "Retrieved seccomp policy");
2e812c16
CB
1084}
1085
6f4f1937 1086static bool no_new_privs(struct lxc_container *c, lxc_attach_options_t *options)
2e812c16 1087{
cd8f5663 1088 __do_free char *val = NULL;
2e812c16 1089
2e812c16 1090 /* Remove current setting. */
02d3b72b
CB
1091 if (!c->set_config_item(c, "lxc.no_new_privs", ""))
1092 return log_info(false, "Failed to unset lxc.no_new_privs");
2e812c16
CB
1093
1094 /* Retrieve currently active setting. */
1095 val = c->get_running_config_item(c, "lxc.no_new_privs");
02d3b72b
CB
1096 if (!val)
1097 return log_info(false, "Failed to retrieve lxc.no_new_privs");
2e812c16
CB
1098
1099 /* Set currently active setting. */
cd8f5663 1100 return c->set_config_item(c, "lxc.no_new_privs", val);
2c4ea790
SH
1101}
1102
338b230f 1103struct attach_payload {
a998454a 1104 int ipc_socket;
cecf3e83 1105 int terminal_pts_fd;
a998454a 1106 lxc_attach_options_t *options;
ab919e5f 1107 struct attach_context *ctx;
a998454a
CB
1108 lxc_attach_exec_t exec_function;
1109 void *exec_payload;
1110};
1111
338b230f 1112static void put_attach_payload(struct attach_payload *p)
ba2be1a8 1113{
afc691a0
CB
1114 if (p) {
1115 close_prot_errno_disarm(p->ipc_socket);
1116 close_prot_errno_disarm(p->terminal_pts_fd);
dd53c8af 1117 put_attach_context(p->ctx);
ab919e5f 1118 p->ctx = NULL;
b21da190 1119 }
ba2be1a8
CB
1120}
1121
338b230f 1122__noreturn static void do_attach(struct attach_payload *ap)
a998454a 1123{
afc691a0
CB
1124 lxc_attach_exec_t attach_function = move_ptr(ap->exec_function);
1125 void *attach_function_args = move_ptr(ap->exec_payload);
8723f88e 1126 int fd_lsm, ret;
338b230f
CB
1127 lxc_attach_options_t* options = ap->options;
1128 struct attach_context *ctx = ap->ctx;
ab919e5f 1129 struct lxc_conf *conf = ctx->container->lxc_conf;
a998454a
CB
1130
1131 /* A description of the purpose of this functionality is provided in the
1132 * lxc-attach(1) manual page. We have to remount here and not in the
1133 * parent process, otherwise /proc may not properly reflect the new pid
1134 * namespace.
1135 */
1136 if (!(options->namespaces & CLONE_NEWNS) &&
1137 (options->attach_flags & LXC_ATTACH_REMOUNT_PROC_SYS)) {
1138 ret = lxc_attach_remount_sys_proc();
b75c344c
CB
1139 if (ret < 0)
1140 goto on_error;
ea918412 1141
b75c344c 1142 TRACE("Remounted \"/proc\" and \"/sys\"");
a998454a
CB
1143 }
1144
5b514ce3 1145 /* Now perform additional attachments. */
a998454a 1146 if (options->attach_flags & LXC_ATTACH_SET_PERSONALITY) {
b75c344c
CB
1147 long new_personality;
1148
3a881819 1149 if (options->personality == LXC_ATTACH_DETECT_PERSONALITY)
ab919e5f 1150 new_personality = ctx->personality;
b75c344c
CB
1151 else
1152 new_personality = options->personality;
ea918412 1153
ee142207 1154 if (new_personality != LXC_ARCH_UNCHANGED) {
64a04c84 1155 ret = lxc_personality(new_personality);
ee142207
CB
1156 if (ret < 0)
1157 goto on_error;
ea918412 1158
ee142207
CB
1159 TRACE("Set new personality");
1160 }
a998454a 1161 }
a998454a
CB
1162
1163 if (options->attach_flags & LXC_ATTACH_DROP_CAPABILITIES) {
677e1d27 1164 ret = drop_capabilities(ctx);
b75c344c
CB
1165 if (ret < 0)
1166 goto on_error;
ea918412 1167
b75c344c 1168 TRACE("Dropped capabilities");
a998454a
CB
1169 }
1170
1171 /* Always set the environment (specify (LXC_ATTACH_KEEP_ENV, NULL, NULL)
1172 * if you want this to be a no-op).
1173 */
ab919e5f 1174 ret = lxc_attach_set_environment(ctx,
7385273f 1175 options->env_policy,
a998454a
CB
1176 options->extra_env_vars,
1177 options->extra_keep_env);
b75c344c
CB
1178 if (ret < 0)
1179 goto on_error;
ea918412 1180
b75c344c 1181 TRACE("Set up environment");
a998454a 1182
afc691a0
CB
1183 /*
1184 * This remark only affects fully unprivileged containers:
57de839f
CB
1185 * Receive fd for LSM security module before we set{g,u}id(). The reason
1186 * is that on set{g,u}id() the kernel will a) make us undumpable and b)
1187 * we will change our effective uid. This means our effective uid will
1188 * be different from the effective uid of the process that created us
1189 * which means that this processs no longer has capabilities in our
1190 * namespace including CAP_SYS_PTRACE. This means we will not be able to
1191 * read and /proc/<pid> files for the process anymore when /proc is
1192 * mounted with hidepid={1,2}. So let's get the lsm label fd before the
1193 * set{g,u}id().
1194 */
afc691a0 1195 if (attach_lsm(options) && ctx->lsm_label) {
8723f88e 1196 if (!sync_wait_fd(ap->ipc_socket, &fd_lsm)) {
6e36c297 1197 SYSERROR("Failed to receive lsm label fd");
b75c344c 1198 goto on_error;
9044b79e 1199 }
1200
8723f88e 1201 TRACE("Received LSM label file descriptor %d from parent", fd_lsm);
57de839f
CB
1202 }
1203
08ea9270 1204 if (options->stdin_fd > 0 && isatty(options->stdin_fd)) {
cd0a2b2f 1205 ret = lxc_make_controlling_terminal(options->stdin_fd);
08ea9270
CB
1206 if (ret < 0)
1207 goto on_error;
1208 }
1209
9475d2b9
CB
1210 if ((options->attach_flags & LXC_ATTACH_SETGROUPS) &&
1211 options->groups.size > 0) {
8caac583
RJ
1212 if (!lxc_setgroups(options->groups.list, options->groups.size))
1213 goto on_error;
1214 } else {
1215 if (!lxc_drop_groups() && errno != EPERM)
1216 goto on_error;
1217 }
b58214ac 1218
4475fabb 1219 if (options->namespaces & CLONE_NEWUSER)
3ac4480a 1220 if (!lxc_switch_uid_gid(ctx->setup_ns_uid, ctx->setup_ns_gid))
b75c344c 1221 goto on_error;
936efc72 1222
afc691a0 1223 if (attach_lsm(options) && ctx->lsm_label) {
d3ba7c98 1224 bool on_exec;
a998454a
CB
1225
1226 /* Change into our new LSM profile. */
d3ba7c98 1227 on_exec = options->attach_flags & LXC_ATTACH_LSM_EXEC ? true : false;
8723f88e
CB
1228 ret = ctx->lsm_ops->process_label_set_at(ctx->lsm_ops, fd_lsm, ctx->lsm_label, on_exec);
1229 close_prot_errno_disarm(fd_lsm);
b75c344c
CB
1230 if (ret < 0)
1231 goto on_error;
ea918412 1232
ab919e5f 1233 TRACE("Set %s LSM label to \"%s\"", ctx->lsm_ops->name, ctx->lsm_label);
a998454a
CB
1234 }
1235
640952e5 1236 if (conf->no_new_privs || (options->attach_flags & LXC_ATTACH_NO_NEW_PRIVS)) {
6ce8e678
AL
1237 ret = prctl(PR_SET_NO_NEW_PRIVS, prctl_arg(1), prctl_arg(0),
1238 prctl_arg(0), prctl_arg(0));
1239 if (ret < 0)
1240 goto on_error;
1241
1242 TRACE("Set PR_SET_NO_NEW_PRIVS");
1243 }
1244
a998454a
CB
1245 /* The following is done after the communication socket is shut down.
1246 * That way, all errors that might (though unlikely) occur up until this
1247 * point will have their messages printed to the original stderr (if
1248 * logging is so configured) and not the fd the user supplied, if any.
1249 */
1250
1251 /* Fd handling for stdin, stdout and stderr; ignore errors here, user
1252 * may want to make sure the fds are closed, for example.
1253 */
08ea9270 1254 if (options->stdin_fd >= 0 && options->stdin_fd != STDIN_FILENO)
40301d48 1255 if (dup2(options->stdin_fd, STDIN_FILENO) < 0)
a7563434 1256 SYSDEBUG("Failed to replace stdin with %d", options->stdin_fd);
08ea9270
CB
1257
1258 if (options->stdout_fd >= 0 && options->stdout_fd != STDOUT_FILENO)
40301d48 1259 if (dup2(options->stdout_fd, STDOUT_FILENO) < 0)
93b9960a 1260 SYSDEBUG("Failed to replace stdout with %d", options->stdout_fd);
08ea9270
CB
1261
1262 if (options->stderr_fd >= 0 && options->stderr_fd != STDERR_FILENO)
40301d48 1263 if (dup2(options->stderr_fd, STDERR_FILENO) < 0)
93b9960a 1264 SYSDEBUG("Failed to replace stderr with %d", options->stderr_fd);
a998454a
CB
1265
1266 /* close the old fds */
08ea9270 1267 if (options->stdin_fd > STDERR_FILENO)
a998454a 1268 close(options->stdin_fd);
08ea9270
CB
1269
1270 if (options->stdout_fd > STDERR_FILENO)
a998454a 1271 close(options->stdout_fd);
08ea9270
CB
1272
1273 if (options->stderr_fd > STDERR_FILENO)
a998454a
CB
1274 close(options->stderr_fd);
1275
427a8067
CB
1276 /*
1277 * Try to remove FD_CLOEXEC flag from stdin/stdout/stderr, but also
a998454a
CB
1278 * here, ignore errors.
1279 */
427a8067 1280 for (int fd = STDIN_FILENO; fd <= STDERR_FILENO; fd++) {
3f62938a 1281 ret = fd_cloexec(fd, false);
b75c344c
CB
1282 if (ret < 0) {
1283 SYSERROR("Failed to clear FD_CLOEXEC from file descriptor %d", fd);
1284 goto on_error;
1285 }
a998454a
CB
1286 }
1287
9e84479f 1288 if (options->attach_flags & LXC_ATTACH_TERMINAL) {
338b230f 1289 ret = lxc_terminal_prepare_login(ap->terminal_pts_fd);
ba2be1a8 1290 if (ret < 0) {
338b230f 1291 SYSERROR("Failed to prepare terminal file descriptor %d", ap->terminal_pts_fd);
ba2be1a8
CB
1292 goto on_error;
1293 }
ea918412 1294
338b230f 1295 TRACE("Prepared terminal file descriptor %d", ap->terminal_pts_fd);
ba2be1a8
CB
1296 }
1297
4475fabb 1298 /* Avoid unnecessary syscalls. */
3ac4480a
CB
1299 if (ctx->setup_ns_uid == ctx->target_ns_uid)
1300 ctx->target_ns_uid = LXC_INVALID_UID;
1301
1302 if (ctx->setup_ns_gid == ctx->target_ns_gid)
1303 ctx->target_ns_gid = LXC_INVALID_GID;
4475fabb 1304
3ac4480a
CB
1305 /*
1306 * Make sure that the processes STDIO is correctly owned by the user
1307 * that we are switching to.
1308 */
1309 ret = fix_stdio_permissions(ctx->target_ns_uid);
1310 if (ret)
1311 INFO("Failed to adjust stdio permissions");
4475fabb 1312
e18aba7d
CB
1313 if (conf->seccomp.seccomp) {
1314 ret = lxc_seccomp_load(conf);
1315 if (ret < 0)
1316 goto on_error;
1317
1318 TRACE("Loaded seccomp profile");
1319
1320 ret = lxc_seccomp_send_notifier_fd(&conf->seccomp, ap->ipc_socket);
1321 if (ret < 0)
1322 goto on_error;
c5bac506 1323 lxc_seccomp_close_notifier_fd(&conf->seccomp);
e18aba7d
CB
1324 }
1325
3ac4480a 1326 if (!lxc_switch_uid_gid(ctx->target_ns_uid, ctx->target_ns_gid))
936efc72
CB
1327 goto on_error;
1328
cd5f35ec
CB
1329 put_attach_payload(ap);
1330
a998454a 1331 /* We're done, so we can now do whatever the user intended us to do. */
afc691a0 1332 _exit(attach_function(attach_function_args));
b75c344c
CB
1333
1334on_error:
dab02267 1335 ERROR("Failed to attach to container");
cd5f35ec 1336 put_attach_payload(ap);
c7ac2e1c 1337 _exit(EXIT_FAILURE);
a998454a
CB
1338}
1339
f797f05e 1340static int lxc_attach_terminal(const char *name, const char *lxcpath, struct lxc_conf *conf,
9e84479f 1341 struct lxc_terminal *terminal)
ba2be1a8
CB
1342{
1343 int ret;
1344
9e84479f 1345 lxc_terminal_init(terminal);
ba2be1a8 1346
8ea93a0f 1347 ret = lxc_terminal_create(name, lxcpath, conf, terminal);
c2af3a15
CB
1348 if (ret < 0)
1349 return log_error(-1, "Failed to create terminal");
ba2be1a8 1350
ba2be1a8 1351 return 0;
ba2be1a8
CB
1352}
1353
9e84479f 1354static int lxc_attach_terminal_mainloop_init(struct lxc_terminal *terminal,
3298b37d 1355 struct lxc_async_descr *descr)
ba2be1a8
CB
1356{
1357 int ret;
1358
1359 ret = lxc_mainloop_open(descr);
c2af3a15
CB
1360 if (ret < 0)
1361 return log_error(-1, "Failed to create mainloop");
ba2be1a8 1362
9e84479f 1363 ret = lxc_terminal_mainloop_add(descr, terminal);
ba2be1a8 1364 if (ret < 0) {
ba2be1a8 1365 lxc_mainloop_close(descr);
c2af3a15 1366 return log_error(-1, "Failed to add handlers to mainloop");
ba2be1a8
CB
1367 }
1368
1369 return 0;
1370}
1371
36a94ce8 1372static inline void lxc_attach_terminal_close_ptx(struct lxc_terminal *terminal)
ba2be1a8 1373{
36a94ce8 1374 close_prot_errno_disarm(terminal->ptx);
ba2be1a8
CB
1375}
1376
cecf3e83 1377static inline void lxc_attach_terminal_close_pts(struct lxc_terminal *terminal)
ba2be1a8 1378{
41808e20 1379 close_prot_errno_disarm(terminal->pty);
ba2be1a8
CB
1380}
1381
9e84479f 1382static inline void lxc_attach_terminal_close_peer(struct lxc_terminal *terminal)
ba2be1a8 1383{
19a3e906 1384 close_prot_errno_disarm(terminal->peer);
ba2be1a8
CB
1385}
1386
9e84479f 1387static inline void lxc_attach_terminal_close_log(struct lxc_terminal *terminal)
ba2be1a8 1388{
19a3e906 1389 close_prot_errno_disarm(terminal->log_fd);
ba2be1a8
CB
1390}
1391
908fbc1a
CB
1392int lxc_attach(struct lxc_container *container, lxc_attach_exec_t exec_function,
1393 void *exec_payload, lxc_attach_options_t *options,
1394 pid_t *attached_process)
9c4693b8 1395{
6f9fe5d0 1396 int ret_parent = -1;
3298b37d 1397 struct lxc_async_descr descr = {};
a9f0cecf 1398 int ret;
26abd7ea 1399 char *name, *lxcpath;
9c4693b8 1400 int ipc_sockets[2];
500ed813 1401 pid_t attached_pid, pid, to_cleanup_pid;
ab919e5f 1402 struct attach_context *ctx;
9e84479f 1403 struct lxc_terminal terminal;
1cce35e6 1404 struct lxc_conf *conf;
9c4693b8 1405
908fbc1a 1406 if (!container)
540a2f70 1407 return ret_set_errno(-1, EINVAL);
908fbc1a
CB
1408
1409 if (!lxc_container_get(container))
540a2f70 1410 return ret_set_errno(-1, EINVAL);
908fbc1a
CB
1411
1412 name = container->name;
1413 lxcpath = container->config_path;
1414
afc691a0 1415 if (!options) {
9c4693b8 1416 options = &attach_static_default_options;
afc691a0
CB
1417 options->lsm_label = NULL;
1418 }
9c4693b8 1419
9745eb8a 1420 ctx = alloc_attach_context();
ab919e5f 1421 if (!ctx) {
9745eb8a
CB
1422 lxc_container_put(container);
1423 return log_error_errno(-ENOMEM, ENOMEM, "Failed to allocate attach context");
1424 }
1425
afc691a0 1426 ret = get_attach_context(ctx, container, options);
9745eb8a 1427 if (ret) {
7e995801 1428 put_attach_context(ctx);
74ce42b5 1429 return log_error(-1, "Failed to get attach context");
9c4693b8
CS
1430 }
1431
ab919e5f 1432 conf = ctx->container->lxc_conf;
ba773996 1433
ab919e5f 1434 if (!fetch_seccomp(ctx->container, options))
ae026f55 1435 WARN("Failed to get seccomp policy");
2c4ea790 1436
ab919e5f 1437 if (!no_new_privs(ctx->container, options))
ae026f55 1438 WARN("Could not determine whether PR_SET_NO_NEW_PRIVS is set");
2e812c16 1439
9b31ab58 1440 ret = prepare_namespaces(ctx, options);
b7873c95 1441 if (ret) {
52ed870e 1442 put_attach_context(ctx);
74ce42b5 1443 return log_error(-1, "Failed to get namespace file descriptors");
9c4693b8
CS
1444 }
1445
9e84479f 1446 if (options->attach_flags & LXC_ATTACH_TERMINAL) {
f797f05e 1447 ret = lxc_attach_terminal(name, lxcpath, conf, &terminal);
ba2be1a8 1448 if (ret < 0) {
dd53c8af 1449 put_attach_context(ctx);
74ce42b5 1450 return log_error(-1, "Failed to setup new terminal");
ba2be1a8
CB
1451 }
1452
9e84479f 1453 terminal.log_fd = options->log_fd;
c948657b 1454 } else {
9e84479f 1455 lxc_terminal_init(&terminal);
ba2be1a8
CB
1456 }
1457
8ce83369
CB
1458 /* Create a socket pair for IPC communication; set SOCK_CLOEXEC in order
1459 * to make sure we don't irritate other threads that want to fork+exec
1460 * away
9c4693b8
CS
1461 *
1462 * IMPORTANT: if the initial process is multithreaded and another call
1463 * just fork()s away without exec'ing directly after, the socket fd will
1464 * exist in the forked process from the other thread and any close() in
8ce83369 1465 * our own child process will not really cause the socket to close
4f6c7312 1466 * properly, potentially causing the parent to get stuck.
9c4693b8
CS
1467 *
1468 * For this reason, while IPC is still active, we have to use shutdown()
8ce83369
CB
1469 * if the child exits prematurely in order to signal that the socket is
1470 * closed and cannot assume that the child exiting will automatically do
1471 * that.
9c4693b8
CS
1472 *
1473 * IPC mechanism: (X is receiver)
bd6a2355 1474 * initial process transient process attached process
9c4693b8
CS
1475 * X <--- send pid of
1476 * attached proc,
1477 * then exit
1478 * send 0 ------------------------------------> X
1479 * [do initialization]
1480 * X <------------------------------------ send 1
1481 * [add to cgroup, ...]
1482 * send 2 ------------------------------------> X
81f466d0
CB
1483 * [set LXC_ATTACH_NO_NEW_PRIVS]
1484 * X <------------------------------------ send 3
1485 * [open LSM label fd]
1486 * send 4 ------------------------------------> X
1487 * [set LSM label]
9c4693b8
CS
1488 * close socket close socket
1489 * run program
1490 */
1491 ret = socketpair(PF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
1492 if (ret < 0) {
dd53c8af 1493 put_attach_context(ctx);
74ce42b5 1494 return log_error_errno(-1, errno, "Could not set up required IPC mechanism for attaching");
9c4693b8
CS
1495 }
1496
bd6a2355 1497 /* Create transient process, two reasons:
e3f0e436 1498 * 1. We can't setns() in the child itself, since we want to make
8ce83369 1499 * sure we are properly attached to the pidns.
e3f0e436 1500 * 2. Also, the initial thread has to put the attached process
8ce83369
CB
1501 * into the cgroup, which we can only do if we didn't already
1502 * setns() (otherwise, user namespaces will hate us).
9c4693b8
CS
1503 */
1504 pid = fork();
9c4693b8 1505 if (pid < 0) {
dd53c8af 1506 put_attach_context(ctx);
74ce42b5 1507 return log_error_errno(-1, errno, "Failed to create first subprocess");
9c4693b8
CS
1508 }
1509
4f25e72f 1510 if (pid == 0) {
26abd7ea 1511 char *cwd, *new_cwd;
a588a482 1512
ba2be1a8 1513 /* close unneeded file descriptors */
4f25e72f 1514 close_prot_errno_disarm(ipc_sockets[0]);
2202afc9 1515
4f25e72f
CB
1516 if (options->attach_flags & LXC_ATTACH_TERMINAL) {
1517 lxc_attach_terminal_close_ptx(&terminal);
1518 lxc_attach_terminal_close_peer(&terminal);
1519 lxc_attach_terminal_close_log(&terminal);
f4364484
SG
1520 }
1521
4f25e72f 1522 /* Wait for the parent to have setup cgroups. */
6e48e7c5 1523 if (!sync_wait(ipc_sockets[1], ATTACH_SYNC_CGROUP)) {
4f25e72f 1524 shutdown(ipc_sockets[1], SHUT_RDWR);
dd53c8af 1525 put_attach_context(ctx);
4f25e72f 1526 _exit(EXIT_FAILURE);
bb2ada6f
CB
1527 }
1528
c538837d
CB
1529 if (!attach_context_security_barrier(ctx)) {
1530 shutdown(ipc_sockets[1], SHUT_RDWR);
1531 put_attach_context(ctx);
1532 _exit(EXIT_FAILURE);
1533 }
1534
a588a482
CB
1535 cwd = getcwd(NULL, 0);
1536
c538837d
CB
1537 /*
1538 * Attach now, create another subprocess later, since pid
1539 * namespaces only really affect the children of the current
1540 * process.
1541 *
1542 * Note that this is a crucial barrier. We're no moving into
1543 * the container's context so we need to make sure to not leak
1544 * anything sensitive. That especially means things such as
1545 * open file descriptors!
4f25e72f 1546 */
9b31ab58 1547 ret = attach_namespaces(ctx, options);
4f25e72f
CB
1548 if (ret < 0) {
1549 ERROR("Failed to enter namespaces");
1550 shutdown(ipc_sockets[1], SHUT_RDWR);
dd53c8af 1551 put_attach_context(ctx);
4f25e72f 1552 _exit(EXIT_FAILURE);
ba2be1a8
CB
1553 }
1554
4f25e72f
CB
1555 /* Attach succeeded, try to cwd. */
1556 if (options->initial_cwd)
1557 new_cwd = options->initial_cwd;
1558 else
1559 new_cwd = cwd;
1560 if (new_cwd) {
1561 ret = chdir(new_cwd);
1562 if (ret < 0)
1563 WARN("Could not change directory to \"%s\"", new_cwd);
ba2be1a8 1564 }
a588a482 1565 free_disarm(cwd);
c6d09e15 1566
4f25e72f 1567 /* Create attached process. */
4f25e72f
CB
1568 pid = lxc_raw_clone(CLONE_PARENT, NULL);
1569 if (pid < 0) {
1570 SYSERROR("Failed to clone attached process");
1571 shutdown(ipc_sockets[1], SHUT_RDWR);
dd53c8af 1572 put_attach_context(ctx);
4f25e72f
CB
1573 _exit(EXIT_FAILURE);
1574 }
f4364484 1575
4f25e72f 1576 if (pid == 0) {
338b230f 1577 struct attach_payload ap = {
a64902ab
CB
1578 .ipc_socket = ipc_sockets[1],
1579 .options = options,
1580 .ctx = ctx,
1581 .terminal_pts_fd = terminal.pty,
1582 .exec_function = exec_function,
1583 .exec_payload = exec_payload,
1584 };
1585
4f25e72f
CB
1586 if (options->attach_flags & LXC_ATTACH_TERMINAL) {
1587 ret = lxc_terminal_signal_sigmask_safe_blocked(&terminal);
1588 if (ret < 0) {
1589 SYSERROR("Failed to reset signal mask");
1590 _exit(EXIT_FAILURE);
1591 }
1592 }
ea918412 1593
a64902ab 1594 /* Does not return. */
338b230f 1595 do_attach(&ap);
62183f1a 1596 }
bd6a2355 1597 TRACE("Attached process %d started initializing", pid);
2eef2bda 1598
4f25e72f
CB
1599 if (options->attach_flags & LXC_ATTACH_TERMINAL)
1600 lxc_attach_terminal_close_pts(&terminal);
ea918412 1601
4f25e72f 1602 /* Tell grandparent the pid of the pid of the newly created child. */
8723f88e 1603 if (!sync_wake_pid(ipc_sockets[1], pid)) {
4f25e72f
CB
1604 /* If this really happens here, this is very unfortunate, since
1605 * the parent will not know the pid of the attached process and
1606 * will not be able to wait for it (and we won't either due to
1607 * CLONE_PARENT) so the parent won't be able to reap it and the
1608 * attached process will remain a zombie.
1609 */
1610 shutdown(ipc_sockets[1], SHUT_RDWR);
dd53c8af 1611 put_attach_context(ctx);
4f25e72f
CB
1612 _exit(EXIT_FAILURE);
1613 }
9c4693b8 1614
4f25e72f 1615 /* The rest is in the hands of the initial and the attached process. */
dd53c8af 1616 put_attach_context(ctx);
4f25e72f
CB
1617 _exit(EXIT_SUCCESS);
1618 }
bd6a2355 1619 TRACE("Transient process %d started initializing", pid);
6f4f1937 1620
4f25e72f 1621 to_cleanup_pid = pid;
ea918412 1622
4f25e72f 1623 /* close unneeded file descriptors */
cb2420df 1624 close_prot_errno_disarm(ipc_sockets[1]);
9b31ab58 1625 put_namespaces(ctx);
4f25e72f
CB
1626 if (options->attach_flags & LXC_ATTACH_TERMINAL)
1627 lxc_attach_terminal_close_pts(&terminal);
81f466d0 1628
4f25e72f
CB
1629 /* Attach to cgroup, if requested. */
1630 if (options->attach_flags & LXC_ATTACH_MOVE_TO_CGROUP) {
1631 /*
1632 * If this is the unified hierarchy cgroup_attach() is
1633 * enough.
1634 */
1635 ret = cgroup_attach(conf, name, lxcpath, pid);
9a57778b 1636 if (ret) {
4f25e72f 1637 call_cleaner(cgroup_exit) struct cgroup_ops *cgroup_ops = NULL;
f740bc63 1638 if (!ERRNO_IS_NOT_SUPPORTED(ret)) {
9a57778b
CB
1639 SYSERROR("Failed to attach cgroup");
1640 goto on_error;
1641 }
1642
4f25e72f
CB
1643 cgroup_ops = cgroup_init(conf);
1644 if (!cgroup_ops)
1645 goto on_error;
9044b79e 1646
4f25e72f
CB
1647 if (!cgroup_ops->attach(cgroup_ops, conf, name, lxcpath, pid))
1648 goto on_error;
81f466d0 1649 }
9a57778b 1650
bd6a2355 1651 TRACE("Moved transient process %d into container cgroup", pid);
4f25e72f 1652 }
81f466d0 1653
6ee13f5b
CB
1654 /*
1655 * Close sensitive file descriptors we don't need anymore. Even if
1656 * we're the parent.
1657 */
1658 if (!attach_context_security_barrier(ctx))
1659 goto on_error;
1660
4f25e72f 1661 /* Setup /proc limits */
91d04bf9
CB
1662 ret = setup_proc_filesystem(conf, pid);
1663 if (ret < 0)
1664 goto on_error;
cdb2a47f 1665
4f25e72f 1666 /* Setup resource limits */
223797c3
CB
1667 ret = setup_resource_limits(conf, pid);
1668 if (ret < 0)
1669 goto on_error;
cdb2a47f 1670
4f25e72f
CB
1671 if (options->attach_flags & LXC_ATTACH_TERMINAL) {
1672 ret = lxc_attach_terminal_mainloop_init(&terminal, &descr);
1673 if (ret < 0)
1674 goto on_error;
9c4693b8 1675
4f25e72f
CB
1676 TRACE("Initialized terminal mainloop");
1677 }
9c4693b8 1678
4f25e72f 1679 /* Let the child process know to go ahead. */
6e48e7c5 1680 if (!sync_wake(ipc_sockets[0], ATTACH_SYNC_CGROUP))
4f25e72f 1681 goto close_mainloop;
ba2be1a8 1682
bd6a2355 1683 TRACE("Told transient process to start initializing");
ea918412 1684
bd6a2355 1685 /* Get pid of attached process from transient process. */
8723f88e 1686 if (!sync_wait_pid(ipc_sockets[0], &attached_pid))
4f25e72f 1687 goto close_mainloop;
ba2be1a8 1688
4f25e72f 1689 TRACE("Received pid %d of attached process in parent pid namespace", attached_pid);
ba2be1a8 1690
4f25e72f 1691 /* Ignore SIGKILL (CTRL-C) and SIGQUIT (CTRL-\) - issue #313. */
5d2b46fb 1692 if (options->stdin_fd == STDIN_FILENO) {
4f25e72f
CB
1693 signal(SIGINT, SIG_IGN);
1694 signal(SIGQUIT, SIG_IGN);
1695 }
ba2be1a8 1696
bd6a2355 1697 /* Reap transient process. */
4f25e72f
CB
1698 ret = wait_for_pid(pid);
1699 if (ret < 0)
1700 goto close_mainloop;
ba2be1a8 1701
bd6a2355 1702 TRACE("Transient process %d exited", pid);
ea918412 1703
4f25e72f
CB
1704 /* We will always have to reap the attached process now. */
1705 to_cleanup_pid = attached_pid;
9c4693b8 1706
4f25e72f 1707 /* Open LSM fd and send it to child. */
afc691a0 1708 if (attach_lsm(options) && ctx->lsm_label) {
8723f88e 1709 __do_close int fd_lsm = -EBADF;
4f25e72f 1710 bool on_exec;
ea918412 1711
4f25e72f 1712 on_exec = options->attach_flags & LXC_ATTACH_LSM_EXEC ? true : false;
8723f88e
CB
1713 fd_lsm = ctx->lsm_ops->process_label_fd_get(ctx->lsm_ops, attached_pid, on_exec);
1714 if (fd_lsm < 0)
4f25e72f 1715 goto close_mainloop;
9c4693b8 1716
8723f88e 1717 TRACE("Opened LSM label file descriptor %d", fd_lsm);
ea918412 1718
4f25e72f 1719 /* Send child fd of the LSM security module to write to. */
8723f88e 1720 if (!sync_wake_fd(ipc_sockets[0], fd_lsm)) {
6e36c297 1721 SYSERROR("Failed to send lsm label fd");
4f25e72f
CB
1722 goto close_mainloop;
1723 }
1724
8723f88e 1725 TRACE("Sent LSM label file descriptor %d to child", fd_lsm);
9c4693b8 1726 }
ea918412 1727
4f25e72f
CB
1728 if (conf->seccomp.seccomp) {
1729 ret = lxc_seccomp_recv_notifier_fd(&conf->seccomp, ipc_sockets[0]);
1730 if (ret < 0)
1731 goto close_mainloop;
9c4693b8 1732
4f25e72f 1733 ret = lxc_seccomp_add_notifier(name, lxcpath, &conf->seccomp);
d6d979bc 1734 if (ret < 0)
4f25e72f 1735 goto close_mainloop;
d6d979bc 1736 }
9c4693b8 1737
4f25e72f
CB
1738 /* We're done, the child process should now execute whatever it
1739 * is that the user requested. The parent can now track it with
1740 * waitpid() or similar.
1741 */
9c4693b8 1742
4f25e72f 1743 *attached_process = attached_pid;
a998454a 1744
4f25e72f
CB
1745 /* Now shut down communication with child, we're done. */
1746 shutdown(ipc_sockets[0], SHUT_RDWR);
cb2420df 1747 close_prot_errno_disarm(ipc_sockets[0]);
f157b056 1748
4f25e72f
CB
1749 ret_parent = 0;
1750 to_cleanup_pid = -1;
ea918412 1751
4f25e72f
CB
1752 if (options->attach_flags & LXC_ATTACH_TERMINAL) {
1753 ret = lxc_mainloop(&descr, -1);
1754 if (ret < 0) {
1755 ret_parent = -1;
1756 to_cleanup_pid = attached_pid;
1757 }
a998454a 1758 }
ea918412 1759
4f25e72f 1760close_mainloop:
9e84479f 1761 if (options->attach_flags & LXC_ATTACH_TERMINAL)
4f25e72f 1762 lxc_mainloop_close(&descr);
9c4693b8 1763
4f25e72f
CB
1764on_error:
1765 if (ipc_sockets[0] >= 0) {
1766 shutdown(ipc_sockets[0], SHUT_RDWR);
cb2420df 1767 close_prot_errno_disarm(ipc_sockets[0]);
9c4693b8 1768 }
ea918412 1769
4f25e72f
CB
1770 if (to_cleanup_pid > 0)
1771 (void)wait_for_pid(to_cleanup_pid);
1772
1773 if (options->attach_flags & LXC_ATTACH_TERMINAL) {
1774 lxc_terminal_delete(&terminal);
1775 lxc_terminal_conf_free(&terminal);
1776 }
9c4693b8 1777
dd53c8af 1778 put_attach_context(ctx);
4f25e72f 1779 return ret_parent;
9c4693b8
CS
1780}
1781
06346bb0 1782int lxc_attach_run_command(void *payload)
9c4693b8 1783{
06346bb0
CB
1784 int ret = -1;
1785 lxc_attach_command_t *cmd = payload;
9c4693b8 1786
06346bb0
CB
1787 ret = execvp(cmd->program, cmd->argv);
1788 if (ret < 0) {
1789 switch (errno) {
1790 case ENOEXEC:
1791 ret = 126;
cf0fd972 1792 break;
06346bb0
CB
1793 case ENOENT:
1794 ret = 127;
cf0fd972 1795 break;
06346bb0
CB
1796 }
1797 }
ea918412 1798
c2af3a15 1799 return log_error_errno(ret, errno, "Failed to exec \"%s\"", cmd->program);
9c4693b8
CS
1800}
1801
1802int lxc_attach_run_shell(void* payload)
1803{
cd8f5663 1804 __do_free char *buf = NULL;
9c4693b8 1805 uid_t uid;
cb7aa5e8
DJ
1806 struct passwd pwent;
1807 struct passwd *pwentp = NULL;
9c4693b8 1808 char *user_shell;
cb7aa5e8
DJ
1809 size_t bufsize;
1810 int ret;
9c4693b8 1811
8ce83369 1812 /* Ignore payload parameter. */
9c4693b8
CS
1813 (void)payload;
1814
1815 uid = getuid();
cb7aa5e8
DJ
1816
1817 bufsize = sysconf(_SC_GETPW_R_SIZE_MAX);
1818 if (bufsize == -1)
1819 bufsize = 1024;
1820
1821 buf = malloc(bufsize);
1822 if (buf) {
1823 ret = getpwuid_r(uid, &pwent, buf, bufsize, &pwentp);
1824 if (!pwentp) {
1825 if (ret == 0)
ea918412 1826 WARN("Could not find matched password record");
cb7aa5e8
DJ
1827
1828 WARN("Failed to get password record - %u", uid);
1829 }
1830 }
9c4693b8 1831
8ce83369
CB
1832 /* This probably happens because of incompatible nss implementations in
1833 * host and container (remember, this code is still using the host's
1834 * glibc but our mount namespace is in the container) we may try to get
1835 * the information by spawning a [getent passwd uid] process and parsing
1836 * the result.
9c4693b8 1837 */
cb7aa5e8 1838 if (!pwentp)
9c4693b8
CS
1839 user_shell = lxc_attach_getpwshell(uid);
1840 else
cb7aa5e8 1841 user_shell = pwent.pw_shell;
ea918412 1842
9c4693b8 1843 if (user_shell)
acf47e1b 1844 execlp(user_shell, user_shell, (char *)NULL);
9c4693b8 1845
8ce83369
CB
1846 /* Executed if either no passwd entry or execvp fails, we will fall back
1847 * on /bin/sh as a default shell.
9c4693b8 1848 */
acf47e1b 1849 execlp("/bin/sh", "/bin/sh", (char *)NULL);
ea918412 1850
edeb1836 1851 SYSERROR("Failed to execute shell");
cb7aa5e8 1852 if (!pwentp)
edeb1836 1853 free(user_shell);
ea918412 1854
9c4693b8
CS
1855 return -1;
1856}