]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/attach.c
attach: move lxc_cmd_get_init_pid() int get_attach_context()
[mirror_lxc.git] / src / lxc / attach.c
CommitLineData
cc73685d 1/* SPDX-License-Identifier: LGPL-2.1+ */
e0732705 2
d38dd64a
CB
3#ifndef _GNU_SOURCE
4#define _GNU_SOURCE 1
5#endif
e0732705
CS
6#include <errno.h>
7#include <fcntl.h>
c476bdce 8#include <grp.h>
604ca1c0 9#include <linux/unistd.h>
6f4f1937 10#include <pwd.h>
0bece477 11#include <pthread.h>
6f4f1937
CB
12#include <signal.h>
13#include <stdio.h>
14#include <stdlib.h>
15#include <string.h>
6f4f1937 16#include <sys/mount.h>
e0732705
CS
17#include <sys/param.h>
18#include <sys/prctl.h>
5ec27989 19#include <sys/socket.h>
1ba0013f 20#include <sys/syscall.h>
905022f7 21#include <sys/wait.h>
604ca1c0
CB
22#include <termios.h>
23#include <unistd.h>
6f4f1937
CB
24
25#include <lxc/lxccontainer.h>
e0732705 26
81f466d0 27#include "af_unix.h"
e0732705
CS
28#include "attach.h"
29#include "caps.h"
9c4693b8 30#include "cgroup.h"
6f4f1937 31#include "commands.h"
2c4ea790 32#include "conf.h"
6f4f1937 33#include "config.h"
9b8e3c96 34#include "confile.h"
6f4f1937
CB
35#include "log.h"
36#include "lsm/lsm.h"
37#include "lxclock.h"
38#include "lxcseccomp.h"
604ca1c0 39#include "macro.h"
ba2be1a8 40#include "mainloop.h"
cd8f5663 41#include "memory_utils.h"
657256e0 42#include "mount_utils.h"
6f4f1937 43#include "namespace.h"
f40988c7 44#include "process_utils.h"
59524108 45#include "syscall_wrappers.h"
0ed9b1bc 46#include "terminal.h"
6f4f1937 47#include "utils.h"
9c4693b8
CS
48
49#if HAVE_SYS_PERSONALITY_H
50#include <sys/personality.h>
51#endif
e0732705 52
ac2cecc4 53lxc_log_define(attach, lxc);
e0732705 54
ef05d368
CB
55/* Define default options if no options are supplied by the user. */
56static lxc_attach_options_t attach_static_default_options = LXC_ATTACH_OPTIONS_DEFAULT;
57
ab919e5f 58struct attach_context {
500ed813 59 int init_pid;
0e304baa
CB
60 char *lsm_label;
61 struct lxc_container *container;
62 signed long personality;
63 unsigned long long capability_mask;
64 int ns_inherited;
65 int ns_fd[LXC_NS_MAX];
66 struct lsm_ops *lsm_ops;
67};
68
9745eb8a
CB
69static struct attach_context *alloc_attach_context(void)
70{
71 return zalloc(sizeof(struct attach_context));
72}
73
500ed813
CB
74static int get_attach_context(struct attach_context *ctx,
75 struct lxc_container *container)
e0732705 76{
cd8f5663
CB
77 __do_free char *line = NULL;
78 __do_fclose FILE *proc_file = NULL;
6f4f1937
CB
79 int ret;
80 bool found;
604ca1c0 81 char proc_fn[LXC_PROC_STATUS_LEN];
e0732705 82 size_t line_bufsz = 0;
e0732705 83
500ed813
CB
84 ctx->container = container;
85
86 ctx->init_pid = lxc_cmd_get_init_pid(container->name, container->config_path);
87 if (ctx->init_pid < 0)
88 return log_error(-1, "Failed to get init pid");
89
8ce83369 90 /* Read capabilities. */
500ed813 91 ret = snprintf(proc_fn, LXC_PROC_STATUS_LEN, "/proc/%d/status", ctx->init_pid);
604ca1c0 92 if (ret < 0 || ret >= LXC_PROC_STATUS_LEN)
9745eb8a 93 return -EIO;
e0732705 94
ffeeed8b
CB
95 proc_file = fopen(proc_fn, "re");
96 if (!proc_file)
9745eb8a 97 return log_error_errno(-errno, errno, "Failed to open %s", proc_fn);
8ce83369
CB
98
99 found = false;
ea918412 100
e0732705 101 while (getline(&line, &line_bufsz, proc_file) != -1) {
9745eb8a 102 ret = sscanf(line, "CapBnd: %llx", &ctx->capability_mask);
8ce83369
CB
103 if (ret != EOF && ret == 1) {
104 found = true;
e0732705
CS
105 break;
106 }
107 }
108
ffeeed8b 109 if (!found)
9745eb8a 110 return log_error_errno(-ENOENT, ENOENT, "Failed to read capability bounding set from %s", proc_fn);
e0732705 111
9745eb8a 112 ctx->lsm_ops = lsm_init();
d701d729 113
500ed813 114 ctx->lsm_label = ctx->lsm_ops->process_label_get(ctx->lsm_ops, ctx->init_pid);
9745eb8a 115 ctx->ns_inherited = 0;
b87ee312 116 for (int i = 0; i < LXC_NS_MAX; i++)
9745eb8a 117 ctx->ns_fd[i] = -EBADF;
e0732705 118
9745eb8a 119 return 0;
e0732705
CS
120}
121
ab919e5f 122static inline void lxc_proc_close_ns_fd(struct attach_context *ctx)
877f3a04 123{
81102768
CB
124 for (int i = 0; i < LXC_NS_MAX; i++)
125 close_prot_errno_disarm(ctx->ns_fd[i]);
877f3a04
CB
126}
127
dd53c8af 128static void put_attach_context(struct attach_context *ctx)
fe4de9a6 129{
7444657c 130 free_disarm(ctx->lsm_label);
08ea9270
CB
131
132 if (ctx->container) {
2c4ea790 133 lxc_container_put(ctx->container);
08ea9270
CB
134 ctx->container = NULL;
135 }
136
877f3a04 137 lxc_proc_close_ns_fd(ctx);
fe4de9a6
DE
138 free(ctx);
139}
140
299d1198
CB
141/**
142 * in_same_namespace - Check whether two processes are in the same namespace.
143 * @pid1 - PID of the first process.
144 * @pid2 - PID of the second process.
145 * @ns - Name of the namespace to check. Must correspond to one of the names
146 * for the namespaces as shown in /proc/<pid/ns/
147 *
148 * If the two processes are not in the same namespace returns an fd to the
149 * namespace of the second process identified by @pid2. If the two processes are
150 * in the same namespace returns -EINVAL, -1 if an error occurred.
151 */
152static int in_same_namespace(pid_t pid1, pid_t pid2, const char *ns)
153{
644e7393 154 __do_close int ns_fd1 = -EBADF, ns_fd2 = -EBADF;
3cc629fe 155 int ret = -1;
299d1198
CB
156 struct stat ns_st1, ns_st2;
157
158 ns_fd1 = lxc_preserve_ns(pid1, ns);
134284c3
CB
159 if (ns_fd1 < 0) {
160 /* The kernel does not support this namespace. This is not an
161 * error.
162 */
163 if (errno == ENOENT)
164 return -EINVAL;
165
3cc629fe 166 return -1;
134284c3 167 }
299d1198
CB
168
169 ns_fd2 = lxc_preserve_ns(pid2, ns);
21d0acc2 170 if (ns_fd2 < 0)
3cc629fe 171 return -1;
299d1198
CB
172
173 ret = fstat(ns_fd1, &ns_st1);
21d0acc2 174 if (ret < 0)
3cc629fe 175 return -1;
299d1198
CB
176
177 ret = fstat(ns_fd2, &ns_st2);
21d0acc2 178 if (ret < 0)
3cc629fe 179 return -1;
299d1198
CB
180
181 /* processes are in the same namespace */
3cc629fe
CB
182 if ((ns_st1.st_dev == ns_st2.st_dev) && (ns_st1.st_ino == ns_st2.st_ino))
183 return -EINVAL;
299d1198
CB
184
185 /* processes are in different namespaces */
3cc629fe 186 return move_fd(ns_fd2);
299d1198
CB
187}
188
ab919e5f 189static int lxc_attach_to_ns(pid_t pid, struct attach_context *ctx)
99d50954 190{
ffeeed8b
CB
191 for (int i = 0; i < LXC_NS_MAX; i++) {
192 int ret;
99d50954 193
877f3a04 194 if (ctx->ns_fd[i] < 0)
26818618
CB
195 continue;
196
21d0acc2 197 ret = setns(ctx->ns_fd[i], ns_info[i].clone_flag);
ffeeed8b
CB
198 if (ret < 0)
199 return log_error_errno(-1,
200 errno, "Failed to attach to %s namespace of %d",
201 ns_info[i].proc_name, pid);
99d50954 202
299d1198 203 DEBUG("Attached to %s namespace of %d", ns_info[i].proc_name, pid);
99d50954
CS
204 }
205
206 return 0;
207}
208
e4103cf6 209int lxc_attach_remount_sys_proc(void)
7a0b0b56
CS
210{
211 int ret;
212
213 ret = unshare(CLONE_NEWNS);
ffeeed8b
CB
214 if (ret < 0)
215 return log_error_errno(-1, errno, "Failed to unshare mount namespace");
7a0b0b56 216
9e61fb1f
CB
217 if (detect_shared_rootfs() && mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL))
218 SYSERROR("Failed to recursively turn root mount tree into dependent mount. Continuing...");
2c6f3fc9 219
8ce83369 220 /* Assume /proc is always mounted, so remount it. */
7a0b0b56 221 ret = umount2("/proc", MNT_DETACH);
ffeeed8b
CB
222 if (ret < 0)
223 return log_error_errno(-1, errno, "Failed to unmount /proc");
7a0b0b56 224
657256e0 225 ret = mount_filesystem("proc", "/proc", 0);
ffeeed8b
CB
226 if (ret < 0)
227 return log_error_errno(-1, errno, "Failed to remount /proc");
7a0b0b56 228
ffeeed8b
CB
229 /*
230 * Try to umount /sys. If it's not a mount point, we'll get EINVAL, then
8ce83369 231 * we ignore it because it may not have been mounted in the first place.
7a0b0b56
CS
232 */
233 ret = umount2("/sys", MNT_DETACH);
ffeeed8b
CB
234 if (ret < 0 && errno != EINVAL)
235 return log_error_errno(-1, errno, "Failed to unmount /sys");
236
237 /* Remount it. */
657256e0 238 if (ret == 0 && mount_filesystem("sysfs", "/sys", 0))
ffeeed8b 239 return log_error_errno(-1, errno, "Failed to remount /sys");
7a0b0b56
CS
240
241 return 0;
242}
243
ab919e5f 244static int lxc_attach_drop_privs(struct attach_context *ctx)
e0732705 245{
ffeeed8b 246 int last_cap;
e0732705 247
6f4f1937 248 last_cap = lxc_caps_last_cap();
ffeeed8b 249 for (int cap = 0; cap <= last_cap; cap++) {
e0732705
CS
250 if (ctx->capability_mask & (1LL << cap))
251 continue;
252
b81689a1 253 if (prctl(PR_CAPBSET_DROP, prctl_arg(cap), prctl_arg(0),
ffeeed8b
CB
254 prctl_arg(0), prctl_arg(0)))
255 return log_error_errno(-1, errno, "Failed to drop capability %d", cap);
ea918412 256
94ac256f 257 TRACE("Dropped capability %d", cap);
e0732705
CS
258 }
259
260 return 0;
261}
905022f7 262
ab919e5f 263static int lxc_attach_set_environment(struct attach_context *ctx,
7385273f 264 enum lxc_attach_env_policy_t policy,
6f4f1937 265 char **extra_env, char **extra_keep)
b3a39ba6 266{
3d55242a 267 int ret;
7385273f 268 struct lxc_list *iterator;
269
799f96fd 270 if (policy == LXC_ATTACH_CLEAR_ENV) {
3d5e9f48 271 int path_kept = 0;
6f4f1937 272 char **extra_keep_store = NULL;
3d5e9f48
CS
273
274 if (extra_keep) {
275 size_t count, i;
276
3d55242a
CB
277 for (count = 0; extra_keep[count]; count++)
278 ;
3d5e9f48 279
89b7bfe3 280 extra_keep_store = zalloc(count * sizeof(char *));
3d55242a 281 if (!extra_keep_store)
3d5e9f48 282 return -1;
3d55242a 283
3d5e9f48
CS
284 for (i = 0; i < count; i++) {
285 char *v = getenv(extra_keep[i]);
286 if (v) {
287 extra_keep_store[i] = strdup(v);
288 if (!extra_keep_store[i]) {
3d5e9f48
CS
289 while (i > 0)
290 free(extra_keep_store[--i]);
ea918412 291
3d5e9f48
CS
292 free(extra_keep_store);
293 return -1;
294 }
3d55242a 295
3d5e9f48
CS
296 if (strcmp(extra_keep[i], "PATH") == 0)
297 path_kept = 1;
298 }
3d5e9f48
CS
299 }
300 }
301
799f96fd 302 if (clearenv()) {
a9cab7e3 303 if (extra_keep_store) {
3d55242a
CB
304 char **p;
305
a9cab7e3
CS
306 for (p = extra_keep_store; *p; p++)
307 free(*p);
3d55242a 308
a9cab7e3
CS
309 free(extra_keep_store);
310 }
3d55242a 311
ffeeed8b 312 return log_error(-1, "Failed to clear environment");
3d5e9f48
CS
313 }
314
315 if (extra_keep_store) {
316 size_t i;
6f4f1937 317
3d5e9f48 318 for (i = 0; extra_keep[i]; i++) {
acd4922e 319 if (extra_keep_store[i]) {
3d55242a
CB
320 ret = setenv(extra_keep[i], extra_keep_store[i], 1);
321 if (ret < 0)
a24c5678 322 SYSWARN("Failed to set environment variable");
acd4922e 323 }
ea918412 324
3d5e9f48
CS
325 free(extra_keep_store[i]);
326 }
ea918412 327
3d5e9f48
CS
328 free(extra_keep_store);
329 }
330
8ce83369
CB
331 /* Always set a default path; shells and execlp tend to be fine
332 * without it, but there is a disturbing number of C programs
333 * out there that just assume that getenv("PATH") is never NULL
334 * and then die a painful segfault death.
335 */
3d55242a
CB
336 if (!path_kept) {
337 ret = setenv("PATH", "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", 1);
338 if (ret < 0)
a24c5678 339 SYSWARN("Failed to set environment variable");
3d55242a 340 }
b3a39ba6
DW
341 }
342
3d55242a 343 ret = putenv("container=lxc");
ffeeed8b 344 if (ret < 0)
818a57fc 345 return log_warn(-1, "Failed to set environment variable");
b3a39ba6 346
7385273f 347 /* Set container environment variables.*/
ab919e5f
CB
348 if (ctx && ctx->container && ctx->container->lxc_conf) {
349 lxc_list_for_each(iterator, &ctx->container->lxc_conf->environment) {
3d55242a
CB
350 char *env_tmp;
351
352 env_tmp = strdup((char *)iterator->elem);
353 if (!env_tmp)
7385273f 354 return -1;
7385273f 355
3d55242a 356 ret = putenv(env_tmp);
ffeeed8b
CB
357 if (ret < 0)
358 return log_error_errno(-1, errno, "Failed to set environment variable: %s", (char *)iterator->elem);
7385273f 359 }
360 }
361
8ce83369 362 /* Set extra environment variables. */
3d5e9f48
CS
363 if (extra_env) {
364 for (; *extra_env; extra_env++) {
3d55242a 365 char *p;
ea918412 366
8ce83369
CB
367 /* We just assume the user knows what they are doing, so
368 * we don't do any checks.
369 */
3d55242a
CB
370 p = strdup(*extra_env);
371 if (!p)
3d5e9f48 372 return -1;
3d55242a
CB
373
374 ret = putenv(p);
375 if (ret < 0)
a24c5678 376 SYSWARN("Failed to set environment variable");
3d5e9f48
CS
377 }
378 }
379
b3a39ba6
DW
380 return 0;
381}
382
74a3920a 383static char *lxc_attach_getpwshell(uid_t uid)
905022f7 384{
1b9c9f5b 385 __do_free char *line = NULL, *result = NULL;
cd8f5663 386 __do_fclose FILE *pipe_f = NULL;
6f4f1937 387 int fd, ret;
905022f7
CS
388 pid_t pid;
389 int pipes[2];
3fa23ac3
CB
390 bool found = false;
391 size_t line_bufsz = 0;
905022f7 392
8ce83369
CB
393 /* We need to fork off a process that runs the getent program, and we
394 * need to capture its output, so we use a pipe for that purpose.
905022f7 395 */
3fa23ac3 396 ret = pipe2(pipes, O_CLOEXEC);
905022f7
CS
397 if (ret < 0)
398 return NULL;
399
400 pid = fork();
401 if (pid < 0) {
402 close(pipes[0]);
403 close(pipes[1]);
404 return NULL;
405 }
406
3fa23ac3 407 if (!pid) {
905022f7
CS
408 char uid_buf[32];
409 char *arguments[] = {
410 "getent",
411 "passwd",
412 uid_buf,
413 NULL
414 };
415
416 close(pipes[0]);
417
8ce83369 418 /* We want to capture stdout. */
3fa23ac3 419 ret = dup2(pipes[1], STDOUT_FILENO);
905022f7 420 close(pipes[1]);
3fa23ac3 421 if (ret < 0)
ea918412 422 _exit(EXIT_FAILURE);
905022f7 423
8ce83369
CB
424 /* Get rid of stdin/stderr, so we try to associate it with
425 * /dev/null.
905022f7 426 */
3fa23ac3 427 fd = open_devnull();
905022f7 428 if (fd < 0) {
3fa23ac3
CB
429 close(STDIN_FILENO);
430 close(STDERR_FILENO);
905022f7 431 } else {
3fa23ac3 432 (void)dup3(fd, STDIN_FILENO, O_CLOEXEC);
59f0e209 433 (void)dup3(fd, STDERR_FILENO, O_CLOEXEC);
905022f7
CS
434 close(fd);
435 }
436
8ce83369 437 /* Finish argument list. */
3fa23ac3
CB
438 ret = snprintf(uid_buf, sizeof(uid_buf), "%ld", (long)uid);
439 if (ret <= 0 || ret >= sizeof(uid_buf))
ea918412 440 _exit(EXIT_FAILURE);
905022f7 441
8ce83369 442 /* Try to run getent program. */
3fa23ac3 443 (void)execvp("getent", arguments);
ea918412 444 _exit(EXIT_FAILURE);
905022f7 445 }
3fa23ac3
CB
446
447 close(pipes[1]);
448
4110345b 449 pipe_f = fdopen(pipes[0], "re");
cf4026f1
CB
450 if (!pipe_f) {
451 close(pipes[0]);
452 goto reap_child;
453 }
454 /* Transfer ownership of pipes[0] to pipe_f. */
455 move_fd(pipes[0]);
456
3fa23ac3
CB
457 while (getline(&line, &line_bufsz, pipe_f) != -1) {
458 int i;
459 long value;
460 char *token;
461 char *endptr = NULL, *saveptr = NULL;
462
463 /* If we already found something, just continue to read
464 * until the pipe doesn't deliver any more data, but
465 * don't modify the existing data structure.
466 */
467 if (found)
468 continue;
469
18d4ffde 470 if (!line)
471 continue;
472
3fa23ac3
CB
473 /* Trim line on the right hand side. */
474 for (i = strlen(line); i > 0 && (line[i - 1] == '\n' || line[i - 1] == '\r'); --i)
475 line[i - 1] = '\0';
476
477 /* Split into tokens: first: user name. */
478 token = strtok_r(line, ":", &saveptr);
479 if (!token)
480 continue;
481
482 /* next: dummy password field */
483 token = strtok_r(NULL, ":", &saveptr);
484 if (!token)
485 continue;
486
487 /* next: user id */
488 token = strtok_r(NULL, ":", &saveptr);
489 value = token ? strtol(token, &endptr, 10) : 0;
490 if (!token || !endptr || *endptr || value == LONG_MIN ||
ea918412 491 value == LONG_MAX)
3fa23ac3
CB
492 continue;
493
494 /* dummy sanity check: user id matches */
495 if ((uid_t)value != uid)
496 continue;
497
498 /* skip fields: gid, gecos, dir, go to next field 'shell' */
499 for (i = 0; i < 4; i++) {
500 token = strtok_r(NULL, ":", &saveptr);
501 if (!token)
502 continue;
503 }
ea918412 504
3fa23ac3
CB
505 if (!token)
506 continue;
ea918412 507
1b9c9f5b 508 free_disarm(result);
3fa23ac3
CB
509 result = strdup(token);
510
511 /* Sanity check that there are no fields after that. */
512 token = strtok_r(NULL, ":", &saveptr);
513 if (token)
514 continue;
515
516 found = true;
517 }
ea918412 518
cf4026f1 519reap_child:
3fa23ac3 520 ret = wait_for_pid(pid);
1b9c9f5b 521 if (ret < 0)
3fa23ac3 522 return NULL;
3fa23ac3 523
1b9c9f5b 524 if (!found)
3fa23ac3 525 return NULL;
3fa23ac3 526
1b9c9f5b 527 return move_ptr(result);
905022f7 528}
cb3e61fa 529
6f4f1937 530static void lxc_attach_get_init_uidgid(uid_t *init_uid, gid_t *init_gid)
cb3e61fa 531{
cd8f5663
CB
532 __do_free char *line = NULL;
533 __do_fclose FILE *proc_file = NULL;
604ca1c0 534 char proc_fn[LXC_PROC_STATUS_LEN];
8ce83369 535 int ret;
cb3e61fa 536 size_t line_bufsz = 0;
cb3e61fa 537 long value = -1;
a5d657d3
CB
538 uid_t uid = LXC_INVALID_UID;
539 gid_t gid = LXC_INVALID_GID;
cb3e61fa 540
604ca1c0
CB
541 ret = snprintf(proc_fn, LXC_PROC_STATUS_LEN, "/proc/%d/status", 1);
542 if (ret < 0 || ret >= LXC_PROC_STATUS_LEN)
7fb45c93 543 return;
cb3e61fa 544
4110345b 545 proc_file = fopen(proc_fn, "re");
cb3e61fa
CS
546 if (!proc_file)
547 return;
548
549 while (getline(&line, &line_bufsz, proc_file) != -1) {
8ce83369
CB
550 /* Format is: real, effective, saved set user, fs we only care
551 * about real uid.
cb3e61fa
CS
552 */
553 ret = sscanf(line, "Uid: %ld", &value);
8ce83369 554 if (ret != EOF && ret == 1) {
6f4f1937 555 uid = (uid_t)value;
cb3e61fa
CS
556 } else {
557 ret = sscanf(line, "Gid: %ld", &value);
8ce83369 558 if (ret != EOF && ret == 1)
6f4f1937 559 gid = (gid_t)value;
cb3e61fa 560 }
ea918412 561
a5d657d3 562 if (uid != LXC_INVALID_UID && gid != LXC_INVALID_GID)
cb3e61fa
CS
563 break;
564 }
565
8ce83369 566 /* Only override arguments if we found something. */
a5d657d3 567 if (uid != LXC_INVALID_UID)
cb3e61fa 568 *init_uid = uid;
ea918412 569
a5d657d3 570 if (gid != LXC_INVALID_GID)
cb3e61fa
CS
571 *init_gid = gid;
572
573 /* TODO: we should also parse supplementary groups and use
8ce83369
CB
574 * setgroups() to set them.
575 */
cb3e61fa 576}
9c4693b8 577
d4db3d14 578static bool fetch_seccomp(struct lxc_container *c, lxc_attach_options_t *options)
2c4ea790 579{
cd8f5663 580 __do_free char *path = NULL;
d4db3d14
CB
581 int ret;
582 bool bret;
2eef2bda 583
6f4f1937
CB
584 if (!(options->namespaces & CLONE_NEWNS) ||
585 !(options->attach_flags & LXC_ATTACH_LSM)) {
cca66e06 586 free_disarm(c->lxc_conf->seccomp.seccomp);
2c4ea790 587 return true;
bd4307f0 588 }
bd7b4e28 589
2e812c16 590 /* Remove current setting. */
d4db3d14 591 if (!c->set_config_item(c, "lxc.seccomp.profile", "") &&
ea918412 592 !c->set_config_item(c, "lxc.seccomp", ""))
2c4ea790 593 return false;
bd7b4e28 594
8ce83369 595 /* Fetch the current profile path over the cmd interface. */
0b427da0 596 path = c->get_running_config_item(c, "lxc.seccomp.profile");
bd7b4e28 597 if (!path) {
d4db3d14 598 INFO("Failed to retrieve lxc.seccomp.profile");
ea918412 599
0b427da0 600 path = c->get_running_config_item(c, "lxc.seccomp");
cca66e06
CB
601 if (!path)
602 return log_info(true, "Failed to retrieve lxc.seccomp");
bd7b4e28
SG
603 }
604
8ce83369 605 /* Copy the value into the new lxc_conf. */
d4db3d14 606 bret = c->set_config_item(c, "lxc.seccomp.profile", path);
d4db3d14
CB
607 if (!bret)
608 return false;
bd7b4e28 609
8ce83369 610 /* Attempt to parse the resulting config. */
d4db3d14 611 ret = lxc_read_seccomp_config(c->lxc_conf);
cca66e06
CB
612 if (ret < 0)
613 return log_error(false, "Failed to retrieve seccomp policy");
2c4ea790 614
cca66e06 615 return log_info(true, "Retrieved seccomp policy");
2e812c16
CB
616}
617
6f4f1937 618static bool no_new_privs(struct lxc_container *c, lxc_attach_options_t *options)
2e812c16 619{
cd8f5663 620 __do_free char *val = NULL;
2e812c16 621
2e812c16 622 /* Remove current setting. */
02d3b72b
CB
623 if (!c->set_config_item(c, "lxc.no_new_privs", ""))
624 return log_info(false, "Failed to unset lxc.no_new_privs");
2e812c16
CB
625
626 /* Retrieve currently active setting. */
627 val = c->get_running_config_item(c, "lxc.no_new_privs");
02d3b72b
CB
628 if (!val)
629 return log_info(false, "Failed to retrieve lxc.no_new_privs");
2e812c16
CB
630
631 /* Set currently active setting. */
cd8f5663 632 return c->set_config_item(c, "lxc.no_new_privs", val);
2c4ea790
SH
633}
634
9b8e3c96
SH
635static signed long get_personality(const char *name, const char *lxcpath)
636{
7c737378 637 __do_free char *p = NULL;
9b8e3c96 638
6f4f1937 639 p = lxc_cmd_get_config_item(name, "lxc.arch", lxcpath);
9b8e3c96
SH
640 if (!p)
641 return -1;
6f4f1937 642
cd8f5663 643 return lxc_config_parse_arch(p);
9b8e3c96
SH
644}
645
a998454a
CB
646struct attach_clone_payload {
647 int ipc_socket;
cecf3e83 648 int terminal_pts_fd;
a998454a 649 lxc_attach_options_t *options;
ab919e5f 650 struct attach_context *ctx;
a998454a
CB
651 lxc_attach_exec_t exec_function;
652 void *exec_payload;
653};
654
ba2be1a8
CB
655static void lxc_put_attach_clone_payload(struct attach_clone_payload *p)
656{
81102768 657 close_prot_errno_disarm(p->ipc_socket);
cecf3e83 658 close_prot_errno_disarm(p->terminal_pts_fd);
ab919e5f 659 if (p->ctx) {
dd53c8af 660 put_attach_context(p->ctx);
ab919e5f 661 p->ctx = NULL;
b21da190 662 }
ba2be1a8
CB
663}
664
dab02267 665__noreturn static void do_attach(struct attach_clone_payload *payload)
a998454a 666{
427a8067 667 int lsm_fd, ret;
a998454a
CB
668 uid_t new_uid;
669 gid_t new_gid;
936efc72
CB
670 uid_t ns_root_uid = 0;
671 gid_t ns_root_gid = 0;
a998454a 672 lxc_attach_options_t* options = payload->options;
ab919e5f
CB
673 struct attach_context *ctx = payload->ctx;
674 struct lxc_conf *conf = ctx->container->lxc_conf;
57de839f
CB
675 bool needs_lsm = (options->namespaces & CLONE_NEWNS) &&
676 (options->attach_flags & LXC_ATTACH_LSM) &&
ab919e5f 677 ctx->lsm_label;
8455e39e 678 char *lsm_label = NULL;
a998454a
CB
679
680 /* A description of the purpose of this functionality is provided in the
681 * lxc-attach(1) manual page. We have to remount here and not in the
682 * parent process, otherwise /proc may not properly reflect the new pid
683 * namespace.
684 */
685 if (!(options->namespaces & CLONE_NEWNS) &&
686 (options->attach_flags & LXC_ATTACH_REMOUNT_PROC_SYS)) {
687 ret = lxc_attach_remount_sys_proc();
b75c344c
CB
688 if (ret < 0)
689 goto on_error;
ea918412 690
b75c344c 691 TRACE("Remounted \"/proc\" and \"/sys\"");
a998454a
CB
692 }
693
5b514ce3 694 /* Now perform additional attachments. */
a998454a 695#if HAVE_SYS_PERSONALITY_H
a998454a 696 if (options->attach_flags & LXC_ATTACH_SET_PERSONALITY) {
b75c344c
CB
697 long new_personality;
698
699 if (options->personality < 0)
ab919e5f 700 new_personality = ctx->personality;
b75c344c
CB
701 else
702 new_personality = options->personality;
ea918412 703
a998454a 704 ret = personality(new_personality);
b75c344c
CB
705 if (ret < 0)
706 goto on_error;
ea918412 707
b75c344c 708 TRACE("Set new personality");
a998454a
CB
709 }
710#endif
711
712 if (options->attach_flags & LXC_ATTACH_DROP_CAPABILITIES) {
ab919e5f 713 ret = lxc_attach_drop_privs(ctx);
b75c344c
CB
714 if (ret < 0)
715 goto on_error;
ea918412 716
b75c344c 717 TRACE("Dropped capabilities");
a998454a
CB
718 }
719
720 /* Always set the environment (specify (LXC_ATTACH_KEEP_ENV, NULL, NULL)
721 * if you want this to be a no-op).
722 */
ab919e5f 723 ret = lxc_attach_set_environment(ctx,
7385273f 724 options->env_policy,
a998454a
CB
725 options->extra_env_vars,
726 options->extra_keep_env);
b75c344c
CB
727 if (ret < 0)
728 goto on_error;
ea918412 729
b75c344c 730 TRACE("Set up environment");
a998454a 731
57de839f
CB
732 /* This remark only affects fully unprivileged containers:
733 * Receive fd for LSM security module before we set{g,u}id(). The reason
734 * is that on set{g,u}id() the kernel will a) make us undumpable and b)
735 * we will change our effective uid. This means our effective uid will
736 * be different from the effective uid of the process that created us
737 * which means that this processs no longer has capabilities in our
738 * namespace including CAP_SYS_PTRACE. This means we will not be able to
739 * read and /proc/<pid> files for the process anymore when /proc is
740 * mounted with hidepid={1,2}. So let's get the lsm label fd before the
741 * set{g,u}id().
742 */
743 if (needs_lsm) {
b75c344c 744 ret = lxc_abstract_unix_recv_fds(payload->ipc_socket, &lsm_fd, 1, NULL, 0);
9044b79e 745 if (ret <= 0) {
746 if (ret < 0)
747 SYSERROR("Failed to receive lsm label fd");
748
b75c344c 749 goto on_error;
9044b79e 750 }
751
57de839f
CB
752 TRACE("Received LSM label file descriptor %d from parent", lsm_fd);
753 }
754
08ea9270 755 if (options->stdin_fd > 0 && isatty(options->stdin_fd)) {
cd0a2b2f 756 ret = lxc_make_controlling_terminal(options->stdin_fd);
08ea9270
CB
757 if (ret < 0)
758 goto on_error;
759 }
760
b58214ac
CB
761 if (!lxc_setgroups(0, NULL) && errno != EPERM)
762 goto on_error;
763
936efc72
CB
764 if (options->namespaces & CLONE_NEWUSER) {
765 /* Check whether nsuid 0 has a mapping. */
766 ns_root_uid = get_ns_uid(0);
ea918412 767
936efc72
CB
768 /* Check whether nsgid 0 has a mapping. */
769 ns_root_gid = get_ns_gid(0);
a998454a 770
936efc72
CB
771 /* If there's no mapping for nsuid 0 try to retrieve the nsuid
772 * init was started with.
773 */
774 if (ns_root_uid == LXC_INVALID_UID)
775 lxc_attach_get_init_uidgid(&ns_root_uid, &ns_root_gid);
ea918412 776
936efc72
CB
777 if (ns_root_uid == LXC_INVALID_UID)
778 goto on_error;
a998454a 779
464c4611 780 if (!lxc_switch_uid_gid(ns_root_uid, ns_root_gid))
b75c344c 781 goto on_error;
a998454a
CB
782 }
783
936efc72
CB
784 /* Set {u,g}id. */
785 if (options->uid != LXC_INVALID_UID)
786 new_uid = options->uid;
787 else
788 new_uid = ns_root_uid;
789
790 if (options->gid != LXC_INVALID_GID)
791 new_gid = options->gid;
792 else
793 new_gid = ns_root_gid;
794
57de839f 795 if (needs_lsm) {
d3ba7c98 796 bool on_exec;
a998454a
CB
797
798 /* Change into our new LSM profile. */
d3ba7c98 799 on_exec = options->attach_flags & LXC_ATTACH_LSM_EXEC ? true : false;
65129087
CB
800 if (options->attach_flags & LXC_ATTACH_LSM_LABEL)
801 lsm_label = options->lsm_label;
802 if (!lsm_label)
ab919e5f
CB
803 lsm_label = ctx->lsm_label;
804 ret = ctx->lsm_ops->process_label_set_at(ctx->lsm_ops, lsm_fd,
805 lsm_label, on_exec);
57de839f 806 close(lsm_fd);
b75c344c
CB
807 if (ret < 0)
808 goto on_error;
ea918412 809
ab919e5f 810 TRACE("Set %s LSM label to \"%s\"", ctx->lsm_ops->name, ctx->lsm_label);
a998454a
CB
811 }
812
ab919e5f 813 if ((ctx->container && conf && conf->no_new_privs) ||
6ce8e678
AL
814 (options->attach_flags & LXC_ATTACH_NO_NEW_PRIVS)) {
815 ret = prctl(PR_SET_NO_NEW_PRIVS, prctl_arg(1), prctl_arg(0),
816 prctl_arg(0), prctl_arg(0));
817 if (ret < 0)
818 goto on_error;
819
820 TRACE("Set PR_SET_NO_NEW_PRIVS");
821 }
822
ab919e5f 823 if (ctx->container && conf && conf->seccomp.seccomp) {
cdb2a47f 824 ret = lxc_seccomp_load(conf);
b75c344c
CB
825 if (ret < 0)
826 goto on_error;
ea918412 827
b75c344c 828 TRACE("Loaded seccomp profile");
cdb2a47f 829
c3e3c21a
CB
830 ret = lxc_seccomp_send_notifier_fd(&conf->seccomp, payload->ipc_socket);
831 if (ret < 0)
832 goto on_error;
a998454a 833 }
ea918412 834
578d4b62 835 close_prot_errno_disarm(payload->ipc_socket);
dd53c8af 836 put_attach_context(ctx);
ab919e5f 837 payload->ctx = NULL;
a998454a
CB
838
839 /* The following is done after the communication socket is shut down.
840 * That way, all errors that might (though unlikely) occur up until this
841 * point will have their messages printed to the original stderr (if
842 * logging is so configured) and not the fd the user supplied, if any.
843 */
844
845 /* Fd handling for stdin, stdout and stderr; ignore errors here, user
846 * may want to make sure the fds are closed, for example.
847 */
08ea9270 848 if (options->stdin_fd >= 0 && options->stdin_fd != STDIN_FILENO)
b4959848
CB
849 if (dup2(options->stdin_fd, STDIN_FILENO))
850 DEBUG("Failed to replace stdin with %d", options->stdin_fd);
08ea9270
CB
851
852 if (options->stdout_fd >= 0 && options->stdout_fd != STDOUT_FILENO)
b4959848
CB
853 if (dup2(options->stdout_fd, STDOUT_FILENO))
854 DEBUG("Failed to replace stdout with %d", options->stdin_fd);
08ea9270
CB
855
856 if (options->stderr_fd >= 0 && options->stderr_fd != STDERR_FILENO)
b4959848
CB
857 if (dup2(options->stderr_fd, STDERR_FILENO))
858 DEBUG("Failed to replace stderr with %d", options->stdin_fd);
a998454a
CB
859
860 /* close the old fds */
08ea9270 861 if (options->stdin_fd > STDERR_FILENO)
a998454a 862 close(options->stdin_fd);
08ea9270
CB
863
864 if (options->stdout_fd > STDERR_FILENO)
a998454a 865 close(options->stdout_fd);
08ea9270
CB
866
867 if (options->stderr_fd > STDERR_FILENO)
a998454a
CB
868 close(options->stderr_fd);
869
427a8067
CB
870 /*
871 * Try to remove FD_CLOEXEC flag from stdin/stdout/stderr, but also
a998454a
CB
872 * here, ignore errors.
873 */
427a8067 874 for (int fd = STDIN_FILENO; fd <= STDERR_FILENO; fd++) {
3f62938a 875 ret = fd_cloexec(fd, false);
b75c344c
CB
876 if (ret < 0) {
877 SYSERROR("Failed to clear FD_CLOEXEC from file descriptor %d", fd);
878 goto on_error;
879 }
a998454a
CB
880 }
881
9e84479f 882 if (options->attach_flags & LXC_ATTACH_TERMINAL) {
cecf3e83 883 ret = lxc_terminal_prepare_login(payload->terminal_pts_fd);
ba2be1a8 884 if (ret < 0) {
cecf3e83 885 SYSERROR("Failed to prepare terminal file descriptor %d", payload->terminal_pts_fd);
ba2be1a8
CB
886 goto on_error;
887 }
ea918412 888
cecf3e83 889 TRACE("Prepared terminal file descriptor %d", payload->terminal_pts_fd);
ba2be1a8
CB
890 }
891
936efc72
CB
892 /* Avoid unnecessary syscalls. */
893 if (new_uid == ns_root_uid)
894 new_uid = LXC_INVALID_UID;
895
896 if (new_gid == ns_root_gid)
897 new_gid = LXC_INVALID_GID;
c353b0b9 898
6aff5157 899 /* Make sure that the processes STDIO is correctly owned by the user that we are switching to */
c353b0b9
CB
900 ret = fix_stdio_permissions(new_uid);
901 if (ret)
a2c26bef 902 INFO("Failed to adjust stdio permissions");
936efc72 903
464c4611 904 if (!lxc_switch_uid_gid(new_uid, new_gid))
936efc72
CB
905 goto on_error;
906
a998454a 907 /* We're done, so we can now do whatever the user intended us to do. */
c7ac2e1c 908 _exit(payload->exec_function(payload->exec_payload));
b75c344c
CB
909
910on_error:
ba2be1a8 911 lxc_put_attach_clone_payload(payload);
dab02267 912 ERROR("Failed to attach to container");
c7ac2e1c 913 _exit(EXIT_FAILURE);
a998454a
CB
914}
915
f797f05e 916static int lxc_attach_terminal(const char *name, const char *lxcpath, struct lxc_conf *conf,
9e84479f 917 struct lxc_terminal *terminal)
ba2be1a8
CB
918{
919 int ret;
920
9e84479f 921 lxc_terminal_init(terminal);
ba2be1a8 922
8ea93a0f 923 ret = lxc_terminal_create(name, lxcpath, conf, terminal);
c2af3a15
CB
924 if (ret < 0)
925 return log_error(-1, "Failed to create terminal");
ba2be1a8 926
ba2be1a8 927 return 0;
ba2be1a8
CB
928}
929
9e84479f
CB
930static int lxc_attach_terminal_mainloop_init(struct lxc_terminal *terminal,
931 struct lxc_epoll_descr *descr)
ba2be1a8
CB
932{
933 int ret;
934
935 ret = lxc_mainloop_open(descr);
c2af3a15
CB
936 if (ret < 0)
937 return log_error(-1, "Failed to create mainloop");
ba2be1a8 938
9e84479f 939 ret = lxc_terminal_mainloop_add(descr, terminal);
ba2be1a8 940 if (ret < 0) {
ba2be1a8 941 lxc_mainloop_close(descr);
c2af3a15 942 return log_error(-1, "Failed to add handlers to mainloop");
ba2be1a8
CB
943 }
944
945 return 0;
946}
947
36a94ce8 948static inline void lxc_attach_terminal_close_ptx(struct lxc_terminal *terminal)
ba2be1a8 949{
36a94ce8 950 close_prot_errno_disarm(terminal->ptx);
ba2be1a8
CB
951}
952
cecf3e83 953static inline void lxc_attach_terminal_close_pts(struct lxc_terminal *terminal)
ba2be1a8 954{
41808e20 955 close_prot_errno_disarm(terminal->pty);
ba2be1a8
CB
956}
957
9e84479f 958static inline void lxc_attach_terminal_close_peer(struct lxc_terminal *terminal)
ba2be1a8 959{
19a3e906 960 close_prot_errno_disarm(terminal->peer);
ba2be1a8
CB
961}
962
9e84479f 963static inline void lxc_attach_terminal_close_log(struct lxc_terminal *terminal)
ba2be1a8 964{
19a3e906 965 close_prot_errno_disarm(terminal->log_fd);
ba2be1a8
CB
966}
967
908fbc1a
CB
968int lxc_attach(struct lxc_container *container, lxc_attach_exec_t exec_function,
969 void *exec_payload, lxc_attach_options_t *options,
970 pid_t *attached_process)
9c4693b8 971{
e11f5b8c 972 __do_free char *cwd = NULL;
6f9fe5d0
CB
973 int ret_parent = -1;
974 struct attach_clone_payload payload = {};
975 struct lxc_epoll_descr descr = {};
877f3a04 976 int i, ret, status;
6f9fe5d0 977 char *name, *lxcpath, *new_cwd;
9c4693b8 978 int ipc_sockets[2];
9b8e3c96 979 signed long personality;
500ed813 980 pid_t attached_pid, pid, to_cleanup_pid;
ab919e5f 981 struct attach_context *ctx;
9e84479f 982 struct lxc_terminal terminal;
1cce35e6 983 struct lxc_conf *conf;
9c4693b8 984
877f3a04 985 ret = access("/proc/self/ns", X_OK);
c2af3a15
CB
986 if (ret)
987 return log_error_errno(-1, errno, "Does this kernel version support namespaces?");
877f3a04 988
908fbc1a 989 if (!container)
540a2f70 990 return ret_set_errno(-1, EINVAL);
908fbc1a
CB
991
992 if (!lxc_container_get(container))
540a2f70 993 return ret_set_errno(-1, EINVAL);
908fbc1a
CB
994
995 name = container->name;
996 lxcpath = container->config_path;
997
9c4693b8
CS
998 if (!options)
999 options = &attach_static_default_options;
1000
9745eb8a 1001 ctx = alloc_attach_context();
ab919e5f 1002 if (!ctx) {
9745eb8a
CB
1003 lxc_container_put(container);
1004 return log_error_errno(-ENOMEM, ENOMEM, "Failed to allocate attach context");
1005 }
1006
500ed813 1007 ret = get_attach_context(ctx, container);
9745eb8a 1008 if (ret) {
500ed813 1009 ERROR("Failed to get attach context");
908fbc1a 1010 lxc_container_put(container);
9c4693b8
CS
1011 return -1;
1012 }
1013
9b8e3c96 1014 personality = get_personality(name, lxcpath);
ab919e5f 1015 if (ctx->personality < 0) {
6f4f1937 1016 ERROR("Failed to get personality of the container");
dd53c8af 1017 put_attach_context(ctx);
9b8e3c96
SH
1018 return -1;
1019 }
ab919e5f 1020 ctx->personality = personality;
9b8e3c96 1021
ab919e5f
CB
1022 if (!ctx->container->lxc_conf) {
1023 ctx->container->lxc_conf = lxc_conf_init();
1024 if (!ctx->container->lxc_conf) {
dd53c8af 1025 put_attach_context(ctx);
ea918412 1026 return -1;
62de1db6 1027 }
ba773996 1028 }
ab919e5f 1029 conf = ctx->container->lxc_conf;
500ed813
CB
1030 if (!conf) {
1031 put_attach_context(ctx);
a9909116 1032 return log_error_errno(-EINVAL, EINVAL, "Missing container confifg");
500ed813 1033 }
ba773996 1034
ab919e5f 1035 if (!fetch_seccomp(ctx->container, options))
ae026f55 1036 WARN("Failed to get seccomp policy");
2c4ea790 1037
ab919e5f 1038 if (!no_new_privs(ctx->container, options))
ae026f55 1039 WARN("Could not determine whether PR_SET_NO_NEW_PRIVS is set");
2e812c16 1040
9c4693b8
CS
1041 cwd = getcwd(NULL, 0);
1042
8ce83369
CB
1043 /* Determine which namespaces the container was created with
1044 * by asking lxc-start, if necessary.
9c4693b8
CS
1045 */
1046 if (options->namespaces == -1) {
1047 options->namespaces = lxc_cmd_get_clone_flags(name, lxcpath);
1048 /* call failed */
1049 if (options->namespaces == -1) {
8ce83369 1050 ERROR("Failed to automatically determine the "
877f3a04 1051 "namespaces which the container uses");
dd53c8af 1052 put_attach_context(ctx);
9c4693b8
CS
1053 return -1;
1054 }
877f3a04
CB
1055
1056 for (i = 0; i < LXC_NS_MAX; i++) {
1057 if (ns_info[i].clone_flag & CLONE_NEWCGROUP)
1058 if (!(options->attach_flags & LXC_ATTACH_MOVE_TO_CGROUP) ||
1059 !cgns_supported())
1060 continue;
1061
1062 if (ns_info[i].clone_flag & options->namespaces)
1063 continue;
1064
ab919e5f 1065 ctx->ns_inherited |= ns_info[i].clone_flag;
877f3a04
CB
1066 }
1067 }
1068
0059379f 1069 pid = lxc_raw_getpid();
ea918412 1070
877f3a04 1071 for (i = 0; i < LXC_NS_MAX; i++) {
ea918412 1072 int j;
877f3a04
CB
1073
1074 if (options->namespaces & ns_info[i].clone_flag)
500ed813 1075 ctx->ns_fd[i] = lxc_preserve_ns(ctx->init_pid, ns_info[i].proc_name);
ab919e5f 1076 else if (ctx->ns_inherited & ns_info[i].clone_flag)
500ed813 1077 ctx->ns_fd[i] = in_same_namespace(pid, ctx->init_pid, ns_info[i].proc_name);
877f3a04
CB
1078 else
1079 continue;
ea918412 1080
ab919e5f 1081 if (ctx->ns_fd[i] >= 0)
877f3a04
CB
1082 continue;
1083
ab919e5f 1084 if (ctx->ns_fd[i] == -EINVAL) {
877f3a04
CB
1085 DEBUG("Inheriting %s namespace from %d",
1086 ns_info[i].proc_name, pid);
ab919e5f 1087 ctx->ns_inherited &= ~ns_info[i].clone_flag;
877f3a04
CB
1088 continue;
1089 }
1090
1091 /* We failed to preserve the namespace. */
ea918412 1092 SYSERROR("Failed to attach to %s namespace of %d",
1093 ns_info[i].proc_name, pid);
1094
877f3a04
CB
1095 /* Close all already opened file descriptors before we return an
1096 * error, so we don't leak them.
1097 */
1098 for (j = 0; j < i; j++)
ab919e5f 1099 close(ctx->ns_fd[j]);
877f3a04 1100
dd53c8af 1101 put_attach_context(ctx);
877f3a04 1102 return -1;
9c4693b8
CS
1103 }
1104
9e84479f 1105 if (options->attach_flags & LXC_ATTACH_TERMINAL) {
f797f05e 1106 ret = lxc_attach_terminal(name, lxcpath, conf, &terminal);
ba2be1a8 1107 if (ret < 0) {
9e84479f 1108 ERROR("Failed to setup new terminal");
dd53c8af 1109 put_attach_context(ctx);
ba2be1a8
CB
1110 return -1;
1111 }
1112
9e84479f 1113 terminal.log_fd = options->log_fd;
c948657b 1114 } else {
9e84479f 1115 lxc_terminal_init(&terminal);
ba2be1a8
CB
1116 }
1117
8ce83369
CB
1118 /* Create a socket pair for IPC communication; set SOCK_CLOEXEC in order
1119 * to make sure we don't irritate other threads that want to fork+exec
1120 * away
9c4693b8
CS
1121 *
1122 * IMPORTANT: if the initial process is multithreaded and another call
1123 * just fork()s away without exec'ing directly after, the socket fd will
1124 * exist in the forked process from the other thread and any close() in
8ce83369 1125 * our own child process will not really cause the socket to close
1d801260 1126 * properly, potentially causing the parent to hang.
9c4693b8
CS
1127 *
1128 * For this reason, while IPC is still active, we have to use shutdown()
8ce83369
CB
1129 * if the child exits prematurely in order to signal that the socket is
1130 * closed and cannot assume that the child exiting will automatically do
1131 * that.
9c4693b8
CS
1132 *
1133 * IPC mechanism: (X is receiver)
1134 * initial process intermediate attached
1135 * X <--- send pid of
1136 * attached proc,
1137 * then exit
1138 * send 0 ------------------------------------> X
1139 * [do initialization]
1140 * X <------------------------------------ send 1
1141 * [add to cgroup, ...]
1142 * send 2 ------------------------------------> X
81f466d0
CB
1143 * [set LXC_ATTACH_NO_NEW_PRIVS]
1144 * X <------------------------------------ send 3
1145 * [open LSM label fd]
1146 * send 4 ------------------------------------> X
1147 * [set LSM label]
9c4693b8
CS
1148 * close socket close socket
1149 * run program
1150 */
1151 ret = socketpair(PF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
1152 if (ret < 0) {
ae026f55 1153 SYSERROR("Could not set up required IPC mechanism for attaching");
dd53c8af 1154 put_attach_context(ctx);
9c4693b8
CS
1155 return -1;
1156 }
1157
e3f0e436
CB
1158 /* Create intermediate subprocess, two reasons:
1159 * 1. We can't setns() in the child itself, since we want to make
8ce83369 1160 * sure we are properly attached to the pidns.
e3f0e436 1161 * 2. Also, the initial thread has to put the attached process
8ce83369
CB
1162 * into the cgroup, which we can only do if we didn't already
1163 * setns() (otherwise, user namespaces will hate us).
9c4693b8
CS
1164 */
1165 pid = fork();
9c4693b8 1166 if (pid < 0) {
ae026f55 1167 SYSERROR("Failed to create first subprocess");
dd53c8af 1168 put_attach_context(ctx);
9c4693b8
CS
1169 return -1;
1170 }
1171
4f25e72f 1172 if (pid == 0) {
ba2be1a8 1173 /* close unneeded file descriptors */
4f25e72f 1174 close_prot_errno_disarm(ipc_sockets[0]);
2202afc9 1175
4f25e72f
CB
1176 if (options->attach_flags & LXC_ATTACH_TERMINAL) {
1177 lxc_attach_terminal_close_ptx(&terminal);
1178 lxc_attach_terminal_close_peer(&terminal);
1179 lxc_attach_terminal_close_log(&terminal);
f4364484
SG
1180 }
1181
4f25e72f
CB
1182 /* Wait for the parent to have setup cgroups. */
1183 ret = lxc_read_nointr(ipc_sockets[1], &status, sizeof(status));
1184 if (ret != sizeof(status)) {
1185 shutdown(ipc_sockets[1], SHUT_RDWR);
dd53c8af 1186 put_attach_context(ctx);
4f25e72f 1187 _exit(EXIT_FAILURE);
bb2ada6f
CB
1188 }
1189
4f25e72f
CB
1190 TRACE("Intermediate process starting to initialize");
1191
1192 /* Attach now, create another subprocess later, since pid namespaces
1193 * only really affect the children of the current process.
1194 */
500ed813 1195 ret = lxc_attach_to_ns(ctx->init_pid, ctx);
4f25e72f
CB
1196 if (ret < 0) {
1197 ERROR("Failed to enter namespaces");
1198 shutdown(ipc_sockets[1], SHUT_RDWR);
dd53c8af 1199 put_attach_context(ctx);
4f25e72f 1200 _exit(EXIT_FAILURE);
ba2be1a8
CB
1201 }
1202
4f25e72f 1203 /* close namespace file descriptors */
ab919e5f 1204 lxc_proc_close_ns_fd(ctx);
ea918412 1205
4f25e72f
CB
1206 /* Attach succeeded, try to cwd. */
1207 if (options->initial_cwd)
1208 new_cwd = options->initial_cwd;
1209 else
1210 new_cwd = cwd;
1211 if (new_cwd) {
1212 ret = chdir(new_cwd);
1213 if (ret < 0)
1214 WARN("Could not change directory to \"%s\"", new_cwd);
ba2be1a8 1215 }
c6d09e15 1216
4f25e72f 1217 /* Create attached process. */
76783714
CB
1218 payload.ipc_socket = ipc_sockets[1];
1219 payload.options = options;
ab919e5f 1220 payload.ctx = ctx;
4f25e72f 1221 payload.terminal_pts_fd = terminal.pty;
76783714
CB
1222 payload.exec_function = exec_function;
1223 payload.exec_payload = exec_payload;
4f25e72f
CB
1224
1225 pid = lxc_raw_clone(CLONE_PARENT, NULL);
1226 if (pid < 0) {
1227 SYSERROR("Failed to clone attached process");
1228 shutdown(ipc_sockets[1], SHUT_RDWR);
dd53c8af 1229 put_attach_context(ctx);
4f25e72f
CB
1230 _exit(EXIT_FAILURE);
1231 }
f4364484 1232
4f25e72f
CB
1233 if (pid == 0) {
1234 if (options->attach_flags & LXC_ATTACH_TERMINAL) {
1235 ret = lxc_terminal_signal_sigmask_safe_blocked(&terminal);
1236 if (ret < 0) {
1237 SYSERROR("Failed to reset signal mask");
1238 _exit(EXIT_FAILURE);
1239 }
1240 }
ea918412 1241
dab02267 1242 do_attach(&payload);
62183f1a 1243 }
2eef2bda 1244
4f25e72f
CB
1245 if (options->attach_flags & LXC_ATTACH_TERMINAL)
1246 lxc_attach_terminal_close_pts(&terminal);
ea918412 1247
4f25e72f
CB
1248 /* Tell grandparent the pid of the pid of the newly created child. */
1249 ret = lxc_write_nointr(ipc_sockets[1], &pid, sizeof(pid));
1250 if (ret != sizeof(pid)) {
1251 /* If this really happens here, this is very unfortunate, since
1252 * the parent will not know the pid of the attached process and
1253 * will not be able to wait for it (and we won't either due to
1254 * CLONE_PARENT) so the parent won't be able to reap it and the
1255 * attached process will remain a zombie.
1256 */
1257 shutdown(ipc_sockets[1], SHUT_RDWR);
dd53c8af 1258 put_attach_context(ctx);
4f25e72f
CB
1259 _exit(EXIT_FAILURE);
1260 }
9c4693b8 1261
4f25e72f 1262 TRACE("Sending pid %d of attached process", pid);
9c4693b8 1263
4f25e72f 1264 /* The rest is in the hands of the initial and the attached process. */
dd53c8af 1265 put_attach_context(ctx);
4f25e72f
CB
1266 _exit(EXIT_SUCCESS);
1267 }
6f4f1937 1268
4f25e72f 1269 to_cleanup_pid = pid;
ea918412 1270
4f25e72f
CB
1271 /* close unneeded file descriptors */
1272 close(ipc_sockets[1]);
e11f5b8c 1273 free_disarm(cwd);
ab919e5f 1274 lxc_proc_close_ns_fd(ctx);
4f25e72f
CB
1275 if (options->attach_flags & LXC_ATTACH_TERMINAL)
1276 lxc_attach_terminal_close_pts(&terminal);
81f466d0 1277
4f25e72f
CB
1278 /* Attach to cgroup, if requested. */
1279 if (options->attach_flags & LXC_ATTACH_MOVE_TO_CGROUP) {
1280 /*
1281 * If this is the unified hierarchy cgroup_attach() is
1282 * enough.
1283 */
1284 ret = cgroup_attach(conf, name, lxcpath, pid);
1285 if (ret) {
1286 call_cleaner(cgroup_exit) struct cgroup_ops *cgroup_ops = NULL;
9044b79e 1287
4f25e72f
CB
1288 cgroup_ops = cgroup_init(conf);
1289 if (!cgroup_ops)
1290 goto on_error;
9044b79e 1291
4f25e72f
CB
1292 if (!cgroup_ops->attach(cgroup_ops, conf, name, lxcpath, pid))
1293 goto on_error;
81f466d0 1294 }
4f25e72f
CB
1295 TRACE("Moved intermediate process %d into container's cgroups", pid);
1296 }
81f466d0 1297
4f25e72f
CB
1298 /* Setup /proc limits */
1299 if (!lxc_list_empty(&conf->procs)) {
1300 ret = setup_proc_filesystem(&conf->procs, pid);
1301 if (ret < 0)
1302 goto on_error;
4f3b6a85
CB
1303
1304 TRACE("Setup /proc/%d settings", pid);
4f25e72f 1305 }
cdb2a47f 1306
4f25e72f
CB
1307 /* Setup resource limits */
1308 if (!lxc_list_empty(&conf->limits)) {
1309 ret = setup_resource_limits(&conf->limits, pid);
1310 if (ret < 0)
1311 goto on_error;
4f3b6a85
CB
1312
1313 TRACE("Setup resource limits");
4f25e72f 1314 }
cdb2a47f 1315
4f25e72f
CB
1316 if (options->attach_flags & LXC_ATTACH_TERMINAL) {
1317 ret = lxc_attach_terminal_mainloop_init(&terminal, &descr);
1318 if (ret < 0)
1319 goto on_error;
9c4693b8 1320
4f25e72f
CB
1321 TRACE("Initialized terminal mainloop");
1322 }
9c4693b8 1323
4f25e72f
CB
1324 /* Let the child process know to go ahead. */
1325 status = 0;
1326 ret = lxc_write_nointr(ipc_sockets[0], &status, sizeof(status));
1327 if (ret != sizeof(status))
1328 goto close_mainloop;
ba2be1a8 1329
4f25e72f 1330 TRACE("Told intermediate process to start initializing");
ea918412 1331
4f25e72f
CB
1332 /* Get pid of attached process from intermediate process. */
1333 ret = lxc_read_nointr(ipc_sockets[0], &attached_pid, sizeof(attached_pid));
1334 if (ret != sizeof(attached_pid))
1335 goto close_mainloop;
ba2be1a8 1336
4f25e72f 1337 TRACE("Received pid %d of attached process in parent pid namespace", attached_pid);
ba2be1a8 1338
4f25e72f
CB
1339 /* Ignore SIGKILL (CTRL-C) and SIGQUIT (CTRL-\) - issue #313. */
1340 if (options->stdin_fd == 0) {
1341 signal(SIGINT, SIG_IGN);
1342 signal(SIGQUIT, SIG_IGN);
1343 }
ba2be1a8 1344
4f25e72f
CB
1345 /* Reap intermediate process. */
1346 ret = wait_for_pid(pid);
1347 if (ret < 0)
1348 goto close_mainloop;
ba2be1a8 1349
4f25e72f 1350 TRACE("Intermediate process %d exited", pid);
ea918412 1351
4f25e72f
CB
1352 /* We will always have to reap the attached process now. */
1353 to_cleanup_pid = attached_pid;
9c4693b8 1354
4f25e72f
CB
1355 /* Open LSM fd and send it to child. */
1356 if ((options->namespaces & CLONE_NEWNS) &&
ab919e5f 1357 (options->attach_flags & LXC_ATTACH_LSM) && ctx->lsm_label) {
ad001fb6 1358 __do_close int labelfd = -EBADF;
4f25e72f 1359 bool on_exec;
ea918412 1360
4f25e72f
CB
1361 ret = -1;
1362 on_exec = options->attach_flags & LXC_ATTACH_LSM_EXEC ? true : false;
ab919e5f
CB
1363 labelfd = ctx->lsm_ops->process_label_fd_get(ctx->lsm_ops,
1364 attached_pid, on_exec);
4f25e72f
CB
1365 if (labelfd < 0)
1366 goto close_mainloop;
9c4693b8 1367
4f25e72f 1368 TRACE("Opened LSM label file descriptor %d", labelfd);
ea918412 1369
4f25e72f
CB
1370 /* Send child fd of the LSM security module to write to. */
1371 ret = lxc_abstract_unix_send_fds(ipc_sockets[0], &labelfd, 1, NULL, 0);
1372 if (ret <= 0) {
1373 if (ret < 0)
1374 SYSERROR("Failed to send lsm label fd");
4f25e72f
CB
1375 goto close_mainloop;
1376 }
1377
4f25e72f 1378 TRACE("Sent LSM label file descriptor %d to child", labelfd);
9c4693b8 1379 }
ea918412 1380
4f25e72f
CB
1381 if (conf->seccomp.seccomp) {
1382 ret = lxc_seccomp_recv_notifier_fd(&conf->seccomp, ipc_sockets[0]);
1383 if (ret < 0)
1384 goto close_mainloop;
9c4693b8 1385
4f25e72f 1386 ret = lxc_seccomp_add_notifier(name, lxcpath, &conf->seccomp);
d6d979bc 1387 if (ret < 0)
4f25e72f 1388 goto close_mainloop;
d6d979bc 1389 }
9c4693b8 1390
4f25e72f
CB
1391 /* We're done, the child process should now execute whatever it
1392 * is that the user requested. The parent can now track it with
1393 * waitpid() or similar.
1394 */
9c4693b8 1395
4f25e72f 1396 *attached_process = attached_pid;
a998454a 1397
4f25e72f
CB
1398 /* Now shut down communication with child, we're done. */
1399 shutdown(ipc_sockets[0], SHUT_RDWR);
1400 close(ipc_sockets[0]);
1401 ipc_sockets[0] = -1;
f157b056 1402
4f25e72f
CB
1403 ret_parent = 0;
1404 to_cleanup_pid = -1;
ea918412 1405
4f25e72f
CB
1406 if (options->attach_flags & LXC_ATTACH_TERMINAL) {
1407 ret = lxc_mainloop(&descr, -1);
1408 if (ret < 0) {
1409 ret_parent = -1;
1410 to_cleanup_pid = attached_pid;
1411 }
a998454a 1412 }
ea918412 1413
4f25e72f 1414close_mainloop:
9e84479f 1415 if (options->attach_flags & LXC_ATTACH_TERMINAL)
4f25e72f 1416 lxc_mainloop_close(&descr);
9c4693b8 1417
4f25e72f
CB
1418on_error:
1419 if (ipc_sockets[0] >= 0) {
1420 shutdown(ipc_sockets[0], SHUT_RDWR);
1421 close(ipc_sockets[0]);
9c4693b8 1422 }
ea918412 1423
4f25e72f
CB
1424 if (to_cleanup_pid > 0)
1425 (void)wait_for_pid(to_cleanup_pid);
1426
1427 if (options->attach_flags & LXC_ATTACH_TERMINAL) {
1428 lxc_terminal_delete(&terminal);
1429 lxc_terminal_conf_free(&terminal);
1430 }
9c4693b8 1431
dd53c8af 1432 put_attach_context(ctx);
4f25e72f 1433 return ret_parent;
9c4693b8
CS
1434}
1435
06346bb0 1436int lxc_attach_run_command(void *payload)
9c4693b8 1437{
06346bb0
CB
1438 int ret = -1;
1439 lxc_attach_command_t *cmd = payload;
9c4693b8 1440
06346bb0
CB
1441 ret = execvp(cmd->program, cmd->argv);
1442 if (ret < 0) {
1443 switch (errno) {
1444 case ENOEXEC:
1445 ret = 126;
cf0fd972 1446 break;
06346bb0
CB
1447 case ENOENT:
1448 ret = 127;
cf0fd972 1449 break;
06346bb0
CB
1450 }
1451 }
ea918412 1452
c2af3a15 1453 return log_error_errno(ret, errno, "Failed to exec \"%s\"", cmd->program);
9c4693b8
CS
1454}
1455
1456int lxc_attach_run_shell(void* payload)
1457{
cd8f5663 1458 __do_free char *buf = NULL;
9c4693b8 1459 uid_t uid;
cb7aa5e8
DJ
1460 struct passwd pwent;
1461 struct passwd *pwentp = NULL;
9c4693b8 1462 char *user_shell;
cb7aa5e8
DJ
1463 size_t bufsize;
1464 int ret;
9c4693b8 1465
8ce83369 1466 /* Ignore payload parameter. */
9c4693b8
CS
1467 (void)payload;
1468
1469 uid = getuid();
cb7aa5e8
DJ
1470
1471 bufsize = sysconf(_SC_GETPW_R_SIZE_MAX);
1472 if (bufsize == -1)
1473 bufsize = 1024;
1474
1475 buf = malloc(bufsize);
1476 if (buf) {
1477 ret = getpwuid_r(uid, &pwent, buf, bufsize, &pwentp);
1478 if (!pwentp) {
1479 if (ret == 0)
ea918412 1480 WARN("Could not find matched password record");
cb7aa5e8
DJ
1481
1482 WARN("Failed to get password record - %u", uid);
1483 }
1484 }
9c4693b8 1485
8ce83369
CB
1486 /* This probably happens because of incompatible nss implementations in
1487 * host and container (remember, this code is still using the host's
1488 * glibc but our mount namespace is in the container) we may try to get
1489 * the information by spawning a [getent passwd uid] process and parsing
1490 * the result.
9c4693b8 1491 */
cb7aa5e8 1492 if (!pwentp)
9c4693b8
CS
1493 user_shell = lxc_attach_getpwshell(uid);
1494 else
cb7aa5e8 1495 user_shell = pwent.pw_shell;
ea918412 1496
9c4693b8 1497 if (user_shell)
acf47e1b 1498 execlp(user_shell, user_shell, (char *)NULL);
9c4693b8 1499
8ce83369
CB
1500 /* Executed if either no passwd entry or execvp fails, we will fall back
1501 * on /bin/sh as a default shell.
9c4693b8 1502 */
acf47e1b 1503 execlp("/bin/sh", "/bin/sh", (char *)NULL);
ea918412 1504
edeb1836 1505 SYSERROR("Failed to execute shell");
cb7aa5e8 1506 if (!pwentp)
edeb1836 1507 free(user_shell);
ea918412 1508
9c4693b8
CS
1509 return -1;
1510}