]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/attach.c
cgroup2_devices: fix logic error
[mirror_lxc.git] / src / lxc / attach.c
CommitLineData
cc73685d 1/* SPDX-License-Identifier: LGPL-2.1+ */
e0732705 2
d38dd64a
CB
3#ifndef _GNU_SOURCE
4#define _GNU_SOURCE 1
5#endif
e0732705
CS
6#include <errno.h>
7#include <fcntl.h>
c476bdce 8#include <grp.h>
604ca1c0 9#include <linux/unistd.h>
6f4f1937 10#include <pwd.h>
0bece477 11#include <pthread.h>
6f4f1937
CB
12#include <signal.h>
13#include <stdio.h>
14#include <stdlib.h>
15#include <string.h>
6f4f1937 16#include <sys/mount.h>
e0732705
CS
17#include <sys/param.h>
18#include <sys/prctl.h>
5ec27989 19#include <sys/socket.h>
1ba0013f 20#include <sys/syscall.h>
905022f7 21#include <sys/wait.h>
604ca1c0
CB
22#include <termios.h>
23#include <unistd.h>
6f4f1937
CB
24
25#include <lxc/lxccontainer.h>
e0732705 26
81f466d0 27#include "af_unix.h"
e0732705
CS
28#include "attach.h"
29#include "caps.h"
9c4693b8 30#include "cgroup.h"
6f4f1937 31#include "commands.h"
2c4ea790 32#include "conf.h"
6f4f1937 33#include "config.h"
9b8e3c96 34#include "confile.h"
6f4f1937
CB
35#include "log.h"
36#include "lsm/lsm.h"
37#include "lxclock.h"
38#include "lxcseccomp.h"
604ca1c0 39#include "macro.h"
ba2be1a8 40#include "mainloop.h"
cd8f5663 41#include "memory_utils.h"
6f4f1937 42#include "namespace.h"
38e5c2db 43#include "raw_syscalls.h"
59524108 44#include "syscall_wrappers.h"
0ed9b1bc 45#include "terminal.h"
6f4f1937 46#include "utils.h"
9c4693b8
CS
47
48#if HAVE_SYS_PERSONALITY_H
49#include <sys/personality.h>
50#endif
e0732705 51
ac2cecc4 52lxc_log_define(attach, lxc);
e0732705 53
ef05d368
CB
54/* Define default options if no options are supplied by the user. */
55static lxc_attach_options_t attach_static_default_options = LXC_ATTACH_OPTIONS_DEFAULT;
56
74a3920a 57static struct lxc_proc_context_info *lxc_proc_get_context_info(pid_t pid)
e0732705 58{
cd8f5663
CB
59 __do_free char *line = NULL;
60 __do_fclose FILE *proc_file = NULL;
6f4f1937
CB
61 int ret;
62 bool found;
604ca1c0 63 char proc_fn[LXC_PROC_STATUS_LEN];
cd8f5663 64 struct lxc_proc_context_info *info;
e0732705 65 size_t line_bufsz = 0;
e0732705 66
8ce83369 67 /* Read capabilities. */
604ca1c0
CB
68 ret = snprintf(proc_fn, LXC_PROC_STATUS_LEN, "/proc/%d/status", pid);
69 if (ret < 0 || ret >= LXC_PROC_STATUS_LEN)
cd8f5663 70 return NULL;
e0732705
CS
71
72 proc_file = fopen(proc_fn, "r");
73 if (!proc_file) {
cd8f5663
CB
74 SYSERROR("Failed to open %s", proc_fn);
75 return NULL;
e0732705
CS
76 }
77
8ce83369 78 info = calloc(1, sizeof(*info));
cd8f5663 79 if (!info)
8ce83369 80 return NULL;
8ce83369
CB
81
82 found = false;
ea918412 83
e0732705
CS
84 while (getline(&line, &line_bufsz, proc_file) != -1) {
85 ret = sscanf(line, "CapBnd: %llx", &info->capability_mask);
8ce83369
CB
86 if (ret != EOF && ret == 1) {
87 found = true;
e0732705
CS
88 break;
89 }
90 }
91
e0732705 92 if (!found) {
cd8f5663
CB
93 ERROR("Could not read capability bounding set from %s", proc_fn);
94 free(info);
95 return NULL;
e0732705
CS
96 }
97
fe4de9a6 98 info->lsm_label = lsm_process_label_get(pid);
877f3a04
CB
99 info->ns_inherited = 0;
100 memset(info->ns_fd, -1, sizeof(int) * LXC_NS_MAX);
e0732705 101
e0732705 102 return info;
e0732705
CS
103}
104
877f3a04
CB
105static inline void lxc_proc_close_ns_fd(struct lxc_proc_context_info *ctx)
106{
81102768
CB
107 for (int i = 0; i < LXC_NS_MAX; i++)
108 close_prot_errno_disarm(ctx->ns_fd[i]);
877f3a04
CB
109}
110
fe4de9a6
DE
111static void lxc_proc_put_context_info(struct lxc_proc_context_info *ctx)
112{
f10fad2f 113 free(ctx->lsm_label);
08ea9270
CB
114 ctx->lsm_label = NULL;
115
116 if (ctx->container) {
2c4ea790 117 lxc_container_put(ctx->container);
08ea9270
CB
118 ctx->container = NULL;
119 }
120
877f3a04 121 lxc_proc_close_ns_fd(ctx);
fe4de9a6
DE
122 free(ctx);
123}
124
299d1198
CB
125/**
126 * in_same_namespace - Check whether two processes are in the same namespace.
127 * @pid1 - PID of the first process.
128 * @pid2 - PID of the second process.
129 * @ns - Name of the namespace to check. Must correspond to one of the names
130 * for the namespaces as shown in /proc/<pid/ns/
131 *
132 * If the two processes are not in the same namespace returns an fd to the
133 * namespace of the second process identified by @pid2. If the two processes are
134 * in the same namespace returns -EINVAL, -1 if an error occurred.
135 */
136static int in_same_namespace(pid_t pid1, pid_t pid2, const char *ns)
137{
3cc629fe
CB
138 __do_close_prot_errno int ns_fd1 = -1, ns_fd2 = -1;
139 int ret = -1;
299d1198
CB
140 struct stat ns_st1, ns_st2;
141
142 ns_fd1 = lxc_preserve_ns(pid1, ns);
134284c3
CB
143 if (ns_fd1 < 0) {
144 /* The kernel does not support this namespace. This is not an
145 * error.
146 */
147 if (errno == ENOENT)
148 return -EINVAL;
149
3cc629fe 150 return -1;
134284c3 151 }
299d1198
CB
152
153 ns_fd2 = lxc_preserve_ns(pid2, ns);
21d0acc2 154 if (ns_fd2 < 0)
3cc629fe 155 return -1;
299d1198
CB
156
157 ret = fstat(ns_fd1, &ns_st1);
21d0acc2 158 if (ret < 0)
3cc629fe 159 return -1;
299d1198
CB
160
161 ret = fstat(ns_fd2, &ns_st2);
21d0acc2 162 if (ret < 0)
3cc629fe 163 return -1;
299d1198
CB
164
165 /* processes are in the same namespace */
3cc629fe
CB
166 if ((ns_st1.st_dev == ns_st2.st_dev) && (ns_st1.st_ino == ns_st2.st_ino))
167 return -EINVAL;
299d1198
CB
168
169 /* processes are in different namespaces */
3cc629fe 170 return move_fd(ns_fd2);
299d1198
CB
171}
172
877f3a04 173static int lxc_attach_to_ns(pid_t pid, struct lxc_proc_context_info *ctx)
99d50954 174{
21d0acc2 175 int i, ret;
99d50954 176
26818618 177 for (i = 0; i < LXC_NS_MAX; i++) {
877f3a04 178 if (ctx->ns_fd[i] < 0)
26818618
CB
179 continue;
180
21d0acc2 181 ret = setns(ctx->ns_fd[i], ns_info[i].clone_flag);
182 if (ret < 0) {
299d1198 183 SYSERROR("Failed to attach to %s namespace of %d",
ea918412 184 ns_info[i].proc_name, pid);
99d50954
CS
185 return -1;
186 }
187
299d1198 188 DEBUG("Attached to %s namespace of %d", ns_info[i].proc_name, pid);
99d50954
CS
189 }
190
191 return 0;
192}
193
e4103cf6 194int lxc_attach_remount_sys_proc(void)
7a0b0b56
CS
195{
196 int ret;
197
198 ret = unshare(CLONE_NEWNS);
199 if (ret < 0) {
ea918412 200 SYSERROR("Failed to unshare mount namespace");
7a0b0b56
CS
201 return -1;
202 }
203
2c6f3fc9 204 if (detect_shared_rootfs()) {
6f4f1937 205 if (mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL)) {
ea918412 206 SYSERROR("Failed to make / rslave");
2c6f3fc9
SH
207 ERROR("Continuing...");
208 }
209 }
210
8ce83369 211 /* Assume /proc is always mounted, so remount it. */
7a0b0b56
CS
212 ret = umount2("/proc", MNT_DETACH);
213 if (ret < 0) {
ea918412 214 SYSERROR("Failed to unmount /proc");
7a0b0b56
CS
215 return -1;
216 }
217
218 ret = mount("none", "/proc", "proc", 0, NULL);
219 if (ret < 0) {
ea918412 220 SYSERROR("Failed to remount /proc");
7a0b0b56
CS
221 return -1;
222 }
223
8ce83369
CB
224 /* Try to umount /sys. If it's not a mount point, we'll get EINVAL, then
225 * we ignore it because it may not have been mounted in the first place.
7a0b0b56
CS
226 */
227 ret = umount2("/sys", MNT_DETACH);
228 if (ret < 0 && errno != EINVAL) {
ea918412 229 SYSERROR("Failed to unmount /sys");
7a0b0b56
CS
230 return -1;
231 } else if (ret == 0) {
8ce83369 232 /* Remount it. */
7a0b0b56
CS
233 ret = mount("none", "/sys", "sysfs", 0, NULL);
234 if (ret < 0) {
ea918412 235 SYSERROR("Failed to remount /sys");
7a0b0b56
CS
236 return -1;
237 }
238 }
239
240 return 0;
241}
242
74a3920a 243static int lxc_attach_drop_privs(struct lxc_proc_context_info *ctx)
e0732705 244{
6f4f1937 245 int cap, last_cap;
e0732705 246
6f4f1937 247 last_cap = lxc_caps_last_cap();
e0732705
CS
248 for (cap = 0; cap <= last_cap; cap++) {
249 if (ctx->capability_mask & (1LL << cap))
250 continue;
251
b81689a1
CB
252 if (prctl(PR_CAPBSET_DROP, prctl_arg(cap), prctl_arg(0),
253 prctl_arg(0), prctl_arg(0))) {
94ac256f 254 SYSERROR("Failed to drop capability %d", cap);
e0732705
CS
255 return -1;
256 }
ea918412 257
94ac256f 258 TRACE("Dropped capability %d", cap);
e0732705
CS
259 }
260
261 return 0;
262}
905022f7 263
7385273f 264static int lxc_attach_set_environment(struct lxc_proc_context_info *init_ctx,
265 enum lxc_attach_env_policy_t policy,
6f4f1937 266 char **extra_env, char **extra_keep)
b3a39ba6 267{
3d55242a 268 int ret;
7385273f 269 struct lxc_list *iterator;
270
799f96fd 271 if (policy == LXC_ATTACH_CLEAR_ENV) {
3d5e9f48 272 int path_kept = 0;
6f4f1937 273 char **extra_keep_store = NULL;
3d5e9f48
CS
274
275 if (extra_keep) {
276 size_t count, i;
277
3d55242a
CB
278 for (count = 0; extra_keep[count]; count++)
279 ;
3d5e9f48
CS
280
281 extra_keep_store = calloc(count, sizeof(char *));
3d55242a 282 if (!extra_keep_store)
3d5e9f48 283 return -1;
3d55242a 284
3d5e9f48
CS
285 for (i = 0; i < count; i++) {
286 char *v = getenv(extra_keep[i]);
287 if (v) {
288 extra_keep_store[i] = strdup(v);
289 if (!extra_keep_store[i]) {
3d5e9f48
CS
290 while (i > 0)
291 free(extra_keep_store[--i]);
ea918412 292
3d5e9f48
CS
293 free(extra_keep_store);
294 return -1;
295 }
3d55242a 296
3d5e9f48
CS
297 if (strcmp(extra_keep[i], "PATH") == 0)
298 path_kept = 1;
299 }
3d5e9f48
CS
300 }
301 }
302
799f96fd 303 if (clearenv()) {
a9cab7e3 304 if (extra_keep_store) {
3d55242a
CB
305 char **p;
306
a9cab7e3
CS
307 for (p = extra_keep_store; *p; p++)
308 free(*p);
3d55242a 309
a9cab7e3
CS
310 free(extra_keep_store);
311 }
3d55242a 312
ea918412 313 ERROR("Failed to clear environment");
3d5e9f48
CS
314 return -1;
315 }
316
317 if (extra_keep_store) {
318 size_t i;
6f4f1937 319
3d5e9f48 320 for (i = 0; extra_keep[i]; i++) {
acd4922e 321 if (extra_keep_store[i]) {
3d55242a
CB
322 ret = setenv(extra_keep[i], extra_keep_store[i], 1);
323 if (ret < 0)
a24c5678 324 SYSWARN("Failed to set environment variable");
acd4922e 325 }
ea918412 326
3d5e9f48
CS
327 free(extra_keep_store[i]);
328 }
ea918412 329
3d5e9f48
CS
330 free(extra_keep_store);
331 }
332
8ce83369
CB
333 /* Always set a default path; shells and execlp tend to be fine
334 * without it, but there is a disturbing number of C programs
335 * out there that just assume that getenv("PATH") is never NULL
336 * and then die a painful segfault death.
337 */
3d55242a
CB
338 if (!path_kept) {
339 ret = setenv("PATH", "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", 1);
340 if (ret < 0)
a24c5678 341 SYSWARN("Failed to set environment variable");
3d55242a 342 }
b3a39ba6
DW
343 }
344
3d55242a
CB
345 ret = putenv("container=lxc");
346 if (ret < 0) {
a24c5678 347 SYSWARN("Failed to set environment variable");
b3a39ba6
DW
348 return -1;
349 }
350
7385273f 351 /* Set container environment variables.*/
352 if (init_ctx && init_ctx->container && init_ctx->container->lxc_conf) {
353 lxc_list_for_each(iterator, &init_ctx->container->lxc_conf->environment) {
3d55242a
CB
354 char *env_tmp;
355
356 env_tmp = strdup((char *)iterator->elem);
357 if (!env_tmp)
7385273f 358 return -1;
7385273f 359
3d55242a
CB
360 ret = putenv(env_tmp);
361 if (ret < 0) {
362 SYSERROR("Failed to set environment variable: %s", (char *)iterator->elem);
7385273f 363 return -1;
364 }
365 }
366 }
367
8ce83369 368 /* Set extra environment variables. */
3d5e9f48
CS
369 if (extra_env) {
370 for (; *extra_env; extra_env++) {
3d55242a 371 char *p;
ea918412 372
8ce83369
CB
373 /* We just assume the user knows what they are doing, so
374 * we don't do any checks.
375 */
3d55242a
CB
376 p = strdup(*extra_env);
377 if (!p)
3d5e9f48 378 return -1;
3d55242a
CB
379
380 ret = putenv(p);
381 if (ret < 0)
a24c5678 382 SYSWARN("Failed to set environment variable");
3d5e9f48
CS
383 }
384 }
385
b3a39ba6
DW
386 return 0;
387}
388
74a3920a 389static char *lxc_attach_getpwshell(uid_t uid)
905022f7 390{
cd8f5663
CB
391 __do_free char *line = NULL;
392 __do_fclose FILE *pipe_f = NULL;
6f4f1937 393 int fd, ret;
905022f7
CS
394 pid_t pid;
395 int pipes[2];
3fa23ac3
CB
396 bool found = false;
397 size_t line_bufsz = 0;
cd8f5663 398 char *result = NULL;
905022f7 399
8ce83369
CB
400 /* We need to fork off a process that runs the getent program, and we
401 * need to capture its output, so we use a pipe for that purpose.
905022f7 402 */
3fa23ac3 403 ret = pipe2(pipes, O_CLOEXEC);
905022f7
CS
404 if (ret < 0)
405 return NULL;
406
407 pid = fork();
408 if (pid < 0) {
409 close(pipes[0]);
410 close(pipes[1]);
411 return NULL;
412 }
413
3fa23ac3 414 if (!pid) {
905022f7
CS
415 char uid_buf[32];
416 char *arguments[] = {
417 "getent",
418 "passwd",
419 uid_buf,
420 NULL
421 };
422
423 close(pipes[0]);
424
8ce83369 425 /* We want to capture stdout. */
3fa23ac3 426 ret = dup2(pipes[1], STDOUT_FILENO);
905022f7 427 close(pipes[1]);
3fa23ac3 428 if (ret < 0)
ea918412 429 _exit(EXIT_FAILURE);
905022f7 430
8ce83369
CB
431 /* Get rid of stdin/stderr, so we try to associate it with
432 * /dev/null.
905022f7 433 */
3fa23ac3 434 fd = open_devnull();
905022f7 435 if (fd < 0) {
3fa23ac3
CB
436 close(STDIN_FILENO);
437 close(STDERR_FILENO);
905022f7 438 } else {
3fa23ac3 439 (void)dup3(fd, STDIN_FILENO, O_CLOEXEC);
59f0e209 440 (void)dup3(fd, STDERR_FILENO, O_CLOEXEC);
905022f7
CS
441 close(fd);
442 }
443
8ce83369 444 /* Finish argument list. */
3fa23ac3
CB
445 ret = snprintf(uid_buf, sizeof(uid_buf), "%ld", (long)uid);
446 if (ret <= 0 || ret >= sizeof(uid_buf))
ea918412 447 _exit(EXIT_FAILURE);
905022f7 448
8ce83369 449 /* Try to run getent program. */
3fa23ac3 450 (void)execvp("getent", arguments);
ea918412 451 _exit(EXIT_FAILURE);
905022f7 452 }
3fa23ac3
CB
453
454 close(pipes[1]);
455
456 pipe_f = fdopen(pipes[0], "r");
457 while (getline(&line, &line_bufsz, pipe_f) != -1) {
458 int i;
459 long value;
460 char *token;
461 char *endptr = NULL, *saveptr = NULL;
462
463 /* If we already found something, just continue to read
464 * until the pipe doesn't deliver any more data, but
465 * don't modify the existing data structure.
466 */
467 if (found)
468 continue;
469
18d4ffde 470 if (!line)
471 continue;
472
3fa23ac3
CB
473 /* Trim line on the right hand side. */
474 for (i = strlen(line); i > 0 && (line[i - 1] == '\n' || line[i - 1] == '\r'); --i)
475 line[i - 1] = '\0';
476
477 /* Split into tokens: first: user name. */
478 token = strtok_r(line, ":", &saveptr);
479 if (!token)
480 continue;
481
482 /* next: dummy password field */
483 token = strtok_r(NULL, ":", &saveptr);
484 if (!token)
485 continue;
486
487 /* next: user id */
488 token = strtok_r(NULL, ":", &saveptr);
489 value = token ? strtol(token, &endptr, 10) : 0;
490 if (!token || !endptr || *endptr || value == LONG_MIN ||
ea918412 491 value == LONG_MAX)
3fa23ac3
CB
492 continue;
493
494 /* dummy sanity check: user id matches */
495 if ((uid_t)value != uid)
496 continue;
497
498 /* skip fields: gid, gecos, dir, go to next field 'shell' */
499 for (i = 0; i < 4; i++) {
500 token = strtok_r(NULL, ":", &saveptr);
501 if (!token)
502 continue;
503 }
ea918412 504
3fa23ac3
CB
505 if (!token)
506 continue;
ea918412 507
3fa23ac3
CB
508 free(result);
509 result = strdup(token);
510
511 /* Sanity check that there are no fields after that. */
512 token = strtok_r(NULL, ":", &saveptr);
513 if (token)
514 continue;
515
516 found = true;
517 }
ea918412 518
3fa23ac3
CB
519 ret = wait_for_pid(pid);
520 if (ret < 0) {
521 free(result);
522 return NULL;
523 }
524
525 if (!found) {
526 free(result);
527 return NULL;
528 }
529
530 return result;
905022f7 531}
cb3e61fa 532
6f4f1937 533static void lxc_attach_get_init_uidgid(uid_t *init_uid, gid_t *init_gid)
cb3e61fa 534{
cd8f5663
CB
535 __do_free char *line = NULL;
536 __do_fclose FILE *proc_file = NULL;
604ca1c0 537 char proc_fn[LXC_PROC_STATUS_LEN];
8ce83369 538 int ret;
cb3e61fa 539 size_t line_bufsz = 0;
cb3e61fa
CS
540 long value = -1;
541 uid_t uid = (uid_t)-1;
542 gid_t gid = (gid_t)-1;
543
604ca1c0
CB
544 ret = snprintf(proc_fn, LXC_PROC_STATUS_LEN, "/proc/%d/status", 1);
545 if (ret < 0 || ret >= LXC_PROC_STATUS_LEN)
7fb45c93 546 return;
cb3e61fa
CS
547
548 proc_file = fopen(proc_fn, "r");
549 if (!proc_file)
550 return;
551
552 while (getline(&line, &line_bufsz, proc_file) != -1) {
8ce83369
CB
553 /* Format is: real, effective, saved set user, fs we only care
554 * about real uid.
cb3e61fa
CS
555 */
556 ret = sscanf(line, "Uid: %ld", &value);
8ce83369 557 if (ret != EOF && ret == 1) {
6f4f1937 558 uid = (uid_t)value;
cb3e61fa
CS
559 } else {
560 ret = sscanf(line, "Gid: %ld", &value);
8ce83369 561 if (ret != EOF && ret == 1)
6f4f1937 562 gid = (gid_t)value;
cb3e61fa 563 }
ea918412 564
cb3e61fa
CS
565 if (uid != (uid_t)-1 && gid != (gid_t)-1)
566 break;
567 }
568
8ce83369 569 /* Only override arguments if we found something. */
cb3e61fa
CS
570 if (uid != (uid_t)-1)
571 *init_uid = uid;
ea918412 572
cb3e61fa
CS
573 if (gid != (gid_t)-1)
574 *init_gid = gid;
575
576 /* TODO: we should also parse supplementary groups and use
8ce83369
CB
577 * setgroups() to set them.
578 */
cb3e61fa 579}
9c4693b8 580
d4db3d14 581static bool fetch_seccomp(struct lxc_container *c, lxc_attach_options_t *options)
2c4ea790 582{
cd8f5663 583 __do_free char *path = NULL;
d4db3d14
CB
584 int ret;
585 bool bret;
2eef2bda 586
6f4f1937
CB
587 if (!(options->namespaces & CLONE_NEWNS) ||
588 !(options->attach_flags & LXC_ATTACH_LSM)) {
c3e3c21a
CB
589 free(c->lxc_conf->seccomp.seccomp);
590 c->lxc_conf->seccomp.seccomp = NULL;
2c4ea790 591 return true;
bd4307f0 592 }
bd7b4e28 593
2e812c16 594 /* Remove current setting. */
d4db3d14 595 if (!c->set_config_item(c, "lxc.seccomp.profile", "") &&
ea918412 596 !c->set_config_item(c, "lxc.seccomp", ""))
2c4ea790 597 return false;
bd7b4e28 598
8ce83369 599 /* Fetch the current profile path over the cmd interface. */
0b427da0 600 path = c->get_running_config_item(c, "lxc.seccomp.profile");
bd7b4e28 601 if (!path) {
d4db3d14 602 INFO("Failed to retrieve lxc.seccomp.profile");
ea918412 603
0b427da0 604 path = c->get_running_config_item(c, "lxc.seccomp");
d4db3d14
CB
605 if (!path) {
606 INFO("Failed to retrieve lxc.seccomp");
607 return true;
608 }
bd7b4e28
SG
609 }
610
8ce83369 611 /* Copy the value into the new lxc_conf. */
d4db3d14 612 bret = c->set_config_item(c, "lxc.seccomp.profile", path);
d4db3d14
CB
613 if (!bret)
614 return false;
bd7b4e28 615
8ce83369 616 /* Attempt to parse the resulting config. */
d4db3d14
CB
617 ret = lxc_read_seccomp_config(c->lxc_conf);
618 if (ret < 0) {
619 ERROR("Failed to retrieve seccomp policy");
2c4ea790
SH
620 return false;
621 }
622
d4db3d14 623 INFO("Retrieved seccomp policy");
2e812c16
CB
624 return true;
625}
626
6f4f1937 627static bool no_new_privs(struct lxc_container *c, lxc_attach_options_t *options)
2e812c16 628{
cd8f5663 629 __do_free char *val = NULL;
2e812c16 630
2e812c16 631 /* Remove current setting. */
bcbef733
CB
632 if (!c->set_config_item(c, "lxc.no_new_privs", "")) {
633 INFO("Failed to unset lxc.no_new_privs");
2e812c16 634 return false;
bcbef733 635 }
2e812c16
CB
636
637 /* Retrieve currently active setting. */
638 val = c->get_running_config_item(c, "lxc.no_new_privs");
639 if (!val) {
bcbef733 640 INFO("Failed to retrieve lxc.no_new_privs");
2e812c16
CB
641 return false;
642 }
643
644 /* Set currently active setting. */
cd8f5663 645 return c->set_config_item(c, "lxc.no_new_privs", val);
2c4ea790
SH
646}
647
9b8e3c96
SH
648static signed long get_personality(const char *name, const char *lxcpath)
649{
7c737378 650 __do_free char *p = NULL;
9b8e3c96 651
6f4f1937 652 p = lxc_cmd_get_config_item(name, "lxc.arch", lxcpath);
9b8e3c96
SH
653 if (!p)
654 return -1;
6f4f1937 655
cd8f5663 656 return lxc_config_parse_arch(p);
9b8e3c96
SH
657}
658
a998454a
CB
659struct attach_clone_payload {
660 int ipc_socket;
9e84479f 661 int terminal_slave_fd;
a998454a
CB
662 lxc_attach_options_t *options;
663 struct lxc_proc_context_info *init_ctx;
664 lxc_attach_exec_t exec_function;
665 void *exec_payload;
666};
667
ba2be1a8
CB
668static void lxc_put_attach_clone_payload(struct attach_clone_payload *p)
669{
81102768
CB
670 close_prot_errno_disarm(p->ipc_socket);
671 close_prot_errno_disarm(p->terminal_slave_fd);
b21da190 672 if (p->init_ctx) {
ba2be1a8 673 lxc_proc_put_context_info(p->init_ctx);
b21da190
CB
674 p->init_ctx = NULL;
675 }
ba2be1a8
CB
676}
677
a998454a
CB
678static int attach_child_main(struct attach_clone_payload *payload)
679{
427a8067 680 int lsm_fd, ret;
a998454a
CB
681 uid_t new_uid;
682 gid_t new_gid;
936efc72
CB
683 uid_t ns_root_uid = 0;
684 gid_t ns_root_gid = 0;
a998454a
CB
685 lxc_attach_options_t* options = payload->options;
686 struct lxc_proc_context_info* init_ctx = payload->init_ctx;
57de839f
CB
687 bool needs_lsm = (options->namespaces & CLONE_NEWNS) &&
688 (options->attach_flags & LXC_ATTACH_LSM) &&
689 init_ctx->lsm_label;
a998454a
CB
690
691 /* A description of the purpose of this functionality is provided in the
692 * lxc-attach(1) manual page. We have to remount here and not in the
693 * parent process, otherwise /proc may not properly reflect the new pid
694 * namespace.
695 */
696 if (!(options->namespaces & CLONE_NEWNS) &&
697 (options->attach_flags & LXC_ATTACH_REMOUNT_PROC_SYS)) {
698 ret = lxc_attach_remount_sys_proc();
b75c344c
CB
699 if (ret < 0)
700 goto on_error;
ea918412 701
b75c344c 702 TRACE("Remounted \"/proc\" and \"/sys\"");
a998454a
CB
703 }
704
b75c344c 705/* Now perform additional attachments. */
a998454a 706#if HAVE_SYS_PERSONALITY_H
a998454a 707 if (options->attach_flags & LXC_ATTACH_SET_PERSONALITY) {
b75c344c
CB
708 long new_personality;
709
710 if (options->personality < 0)
711 new_personality = init_ctx->personality;
712 else
713 new_personality = options->personality;
ea918412 714
a998454a 715 ret = personality(new_personality);
b75c344c
CB
716 if (ret < 0)
717 goto on_error;
ea918412 718
b75c344c 719 TRACE("Set new personality");
a998454a
CB
720 }
721#endif
722
723 if (options->attach_flags & LXC_ATTACH_DROP_CAPABILITIES) {
724 ret = lxc_attach_drop_privs(init_ctx);
b75c344c
CB
725 if (ret < 0)
726 goto on_error;
ea918412 727
b75c344c 728 TRACE("Dropped capabilities");
a998454a
CB
729 }
730
731 /* Always set the environment (specify (LXC_ATTACH_KEEP_ENV, NULL, NULL)
732 * if you want this to be a no-op).
733 */
7385273f 734 ret = lxc_attach_set_environment(init_ctx,
735 options->env_policy,
a998454a
CB
736 options->extra_env_vars,
737 options->extra_keep_env);
b75c344c
CB
738 if (ret < 0)
739 goto on_error;
ea918412 740
b75c344c 741 TRACE("Set up environment");
a998454a 742
57de839f
CB
743 /* This remark only affects fully unprivileged containers:
744 * Receive fd for LSM security module before we set{g,u}id(). The reason
745 * is that on set{g,u}id() the kernel will a) make us undumpable and b)
746 * we will change our effective uid. This means our effective uid will
747 * be different from the effective uid of the process that created us
748 * which means that this processs no longer has capabilities in our
749 * namespace including CAP_SYS_PTRACE. This means we will not be able to
750 * read and /proc/<pid> files for the process anymore when /proc is
751 * mounted with hidepid={1,2}. So let's get the lsm label fd before the
752 * set{g,u}id().
753 */
754 if (needs_lsm) {
b75c344c 755 ret = lxc_abstract_unix_recv_fds(payload->ipc_socket, &lsm_fd, 1, NULL, 0);
9044b79e 756 if (ret <= 0) {
757 if (ret < 0)
758 SYSERROR("Failed to receive lsm label fd");
759
b75c344c 760 goto on_error;
9044b79e 761 }
762
57de839f
CB
763 TRACE("Received LSM label file descriptor %d from parent", lsm_fd);
764 }
765
08ea9270 766 if (options->stdin_fd > 0 && isatty(options->stdin_fd)) {
cd0a2b2f 767 ret = lxc_make_controlling_terminal(options->stdin_fd);
08ea9270
CB
768 if (ret < 0)
769 goto on_error;
770 }
771
b58214ac
CB
772 if (!lxc_setgroups(0, NULL) && errno != EPERM)
773 goto on_error;
774
936efc72
CB
775 if (options->namespaces & CLONE_NEWUSER) {
776 /* Check whether nsuid 0 has a mapping. */
777 ns_root_uid = get_ns_uid(0);
ea918412 778
936efc72
CB
779 /* Check whether nsgid 0 has a mapping. */
780 ns_root_gid = get_ns_gid(0);
a998454a 781
936efc72
CB
782 /* If there's no mapping for nsuid 0 try to retrieve the nsuid
783 * init was started with.
784 */
785 if (ns_root_uid == LXC_INVALID_UID)
786 lxc_attach_get_init_uidgid(&ns_root_uid, &ns_root_gid);
ea918412 787
936efc72
CB
788 if (ns_root_uid == LXC_INVALID_UID)
789 goto on_error;
a998454a 790
464c4611 791 if (!lxc_switch_uid_gid(ns_root_uid, ns_root_gid))
b75c344c 792 goto on_error;
a998454a
CB
793 }
794
936efc72
CB
795 /* Set {u,g}id. */
796 if (options->uid != LXC_INVALID_UID)
797 new_uid = options->uid;
798 else
799 new_uid = ns_root_uid;
800
801 if (options->gid != LXC_INVALID_GID)
802 new_gid = options->gid;
803 else
804 new_gid = ns_root_gid;
805
a998454a
CB
806 if ((init_ctx->container && init_ctx->container->lxc_conf &&
807 init_ctx->container->lxc_conf->no_new_privs) ||
808 (options->attach_flags & LXC_ATTACH_NO_NEW_PRIVS)) {
b81689a1
CB
809 ret = prctl(PR_SET_NO_NEW_PRIVS, prctl_arg(1), prctl_arg(0),
810 prctl_arg(0), prctl_arg(0));
b75c344c
CB
811 if (ret < 0)
812 goto on_error;
ea918412 813
b75c344c 814 TRACE("Set PR_SET_NO_NEW_PRIVS");
a998454a
CB
815 }
816
57de839f 817 if (needs_lsm) {
d3ba7c98 818 bool on_exec;
a998454a
CB
819
820 /* Change into our new LSM profile. */
d3ba7c98 821 on_exec = options->attach_flags & LXC_ATTACH_LSM_EXEC ? true : false;
ea918412 822
d3ba7c98 823 ret = lsm_process_label_set_at(lsm_fd, init_ctx->lsm_label, on_exec);
57de839f 824 close(lsm_fd);
b75c344c
CB
825 if (ret < 0)
826 goto on_error;
ea918412 827
d3ba7c98 828 TRACE("Set %s LSM label to \"%s\"", lsm_name(), init_ctx->lsm_label);
a998454a
CB
829 }
830
831 if (init_ctx->container && init_ctx->container->lxc_conf &&
c3e3c21a 832 init_ctx->container->lxc_conf->seccomp.seccomp) {
cdb2a47f
CB
833 struct lxc_conf *conf = init_ctx->container->lxc_conf;
834
835 ret = lxc_seccomp_load(conf);
b75c344c
CB
836 if (ret < 0)
837 goto on_error;
ea918412 838
b75c344c 839 TRACE("Loaded seccomp profile");
cdb2a47f 840
c3e3c21a
CB
841 ret = lxc_seccomp_send_notifier_fd(&conf->seccomp, payload->ipc_socket);
842 if (ret < 0)
843 goto on_error;
a998454a 844 }
ea918412 845
b75c344c 846 close(payload->ipc_socket);
ba2be1a8
CB
847 payload->ipc_socket = -EBADF;
848 lxc_proc_put_context_info(init_ctx);
d35b372a 849 payload->init_ctx = NULL;
a998454a
CB
850
851 /* The following is done after the communication socket is shut down.
852 * That way, all errors that might (though unlikely) occur up until this
853 * point will have their messages printed to the original stderr (if
854 * logging is so configured) and not the fd the user supplied, if any.
855 */
856
857 /* Fd handling for stdin, stdout and stderr; ignore errors here, user
858 * may want to make sure the fds are closed, for example.
859 */
08ea9270 860 if (options->stdin_fd >= 0 && options->stdin_fd != STDIN_FILENO)
00c72a93 861 (void)dup2(options->stdin_fd, STDIN_FILENO);
08ea9270
CB
862
863 if (options->stdout_fd >= 0 && options->stdout_fd != STDOUT_FILENO)
00c72a93 864 (void)dup2(options->stdout_fd, STDOUT_FILENO);
08ea9270
CB
865
866 if (options->stderr_fd >= 0 && options->stderr_fd != STDERR_FILENO)
00c72a93 867 (void)dup2(options->stderr_fd, STDERR_FILENO);
a998454a
CB
868
869 /* close the old fds */
08ea9270 870 if (options->stdin_fd > STDERR_FILENO)
a998454a 871 close(options->stdin_fd);
08ea9270
CB
872
873 if (options->stdout_fd > STDERR_FILENO)
a998454a 874 close(options->stdout_fd);
08ea9270
CB
875
876 if (options->stderr_fd > STDERR_FILENO)
a998454a
CB
877 close(options->stderr_fd);
878
427a8067
CB
879 /*
880 * Try to remove FD_CLOEXEC flag from stdin/stdout/stderr, but also
a998454a
CB
881 * here, ignore errors.
882 */
427a8067 883 for (int fd = STDIN_FILENO; fd <= STDERR_FILENO; fd++) {
3f62938a 884 ret = fd_cloexec(fd, false);
b75c344c
CB
885 if (ret < 0) {
886 SYSERROR("Failed to clear FD_CLOEXEC from file descriptor %d", fd);
887 goto on_error;
888 }
a998454a
CB
889 }
890
9e84479f
CB
891 if (options->attach_flags & LXC_ATTACH_TERMINAL) {
892 ret = lxc_terminal_prepare_login(payload->terminal_slave_fd);
ba2be1a8 893 if (ret < 0) {
9e84479f 894 SYSERROR("Failed to prepare terminal file descriptor %d", payload->terminal_slave_fd);
ba2be1a8
CB
895 goto on_error;
896 }
ea918412 897
9e84479f 898 TRACE("Prepared terminal file descriptor %d", payload->terminal_slave_fd);
ba2be1a8
CB
899 }
900
936efc72
CB
901 /* Avoid unnecessary syscalls. */
902 if (new_uid == ns_root_uid)
903 new_uid = LXC_INVALID_UID;
904
905 if (new_gid == ns_root_gid)
906 new_gid = LXC_INVALID_GID;
907
464c4611 908 if (!lxc_switch_uid_gid(new_uid, new_gid))
936efc72
CB
909 goto on_error;
910
a998454a 911 /* We're done, so we can now do whatever the user intended us to do. */
c7ac2e1c 912 _exit(payload->exec_function(payload->exec_payload));
b75c344c
CB
913
914on_error:
ba2be1a8 915 lxc_put_attach_clone_payload(payload);
c7ac2e1c 916 _exit(EXIT_FAILURE);
a998454a
CB
917}
918
9e84479f
CB
919static int lxc_attach_terminal(struct lxc_conf *conf,
920 struct lxc_terminal *terminal)
ba2be1a8
CB
921{
922 int ret;
923
9e84479f 924 lxc_terminal_init(terminal);
ba2be1a8 925
9e84479f 926 ret = lxc_terminal_create(terminal);
ba2be1a8 927 if (ret < 0) {
ea918412 928 ERROR("Failed to create terminal");
ba2be1a8
CB
929 return -1;
930 }
931
932 /* Shift ttys to container. */
9e84479f 933 ret = lxc_terminal_map_ids(conf, terminal);
ba2be1a8 934 if (ret < 0) {
9e84479f 935 ERROR("Failed to chown terminal");
ba2be1a8
CB
936 goto on_error;
937 }
938
939 return 0;
940
941on_error:
9e84479f
CB
942 lxc_terminal_delete(terminal);
943 lxc_terminal_conf_free(terminal);
ba2be1a8
CB
944 return -1;
945}
946
9e84479f
CB
947static int lxc_attach_terminal_mainloop_init(struct lxc_terminal *terminal,
948 struct lxc_epoll_descr *descr)
ba2be1a8
CB
949{
950 int ret;
951
952 ret = lxc_mainloop_open(descr);
953 if (ret < 0) {
954 ERROR("Failed to create mainloop");
955 return -1;
956 }
957
9e84479f 958 ret = lxc_terminal_mainloop_add(descr, terminal);
ba2be1a8
CB
959 if (ret < 0) {
960 ERROR("Failed to add handlers to mainloop");
961 lxc_mainloop_close(descr);
962 return -1;
963 }
964
965 return 0;
966}
967
9e84479f 968static inline void lxc_attach_terminal_close_master(struct lxc_terminal *terminal)
ba2be1a8 969{
19a3e906 970 close_prot_errno_disarm(terminal->master);
ba2be1a8
CB
971}
972
9e84479f 973static inline void lxc_attach_terminal_close_slave(struct lxc_terminal *terminal)
ba2be1a8 974{
19a3e906 975 close_prot_errno_disarm(terminal->slave);
ba2be1a8
CB
976}
977
9e84479f 978static inline void lxc_attach_terminal_close_peer(struct lxc_terminal *terminal)
ba2be1a8 979{
19a3e906 980 close_prot_errno_disarm(terminal->peer);
ba2be1a8
CB
981}
982
9e84479f 983static inline void lxc_attach_terminal_close_log(struct lxc_terminal *terminal)
ba2be1a8 984{
19a3e906 985 close_prot_errno_disarm(terminal->log_fd);
ba2be1a8
CB
986}
987
908fbc1a
CB
988int lxc_attach(struct lxc_container *container, lxc_attach_exec_t exec_function,
989 void *exec_payload, lxc_attach_options_t *options,
990 pid_t *attached_process)
9c4693b8 991{
877f3a04 992 int i, ret, status;
9c4693b8 993 int ipc_sockets[2];
6f4f1937 994 char *cwd, *new_cwd;
9b8e3c96 995 signed long personality;
ba2be1a8 996 pid_t attached_pid, init_pid, pid;
6f4f1937 997 struct lxc_proc_context_info *init_ctx;
9e84479f 998 struct lxc_terminal terminal;
1cce35e6 999 struct lxc_conf *conf;
908fbc1a 1000 char *name, *lxcpath;
a998454a 1001 struct attach_clone_payload payload = {0};
9c4693b8 1002
877f3a04
CB
1003 ret = access("/proc/self/ns", X_OK);
1004 if (ret) {
ea918412 1005 SYSERROR("Does this kernel version support namespaces?");
877f3a04
CB
1006 return -1;
1007 }
1008
908fbc1a 1009 if (!container)
540a2f70 1010 return ret_set_errno(-1, EINVAL);
908fbc1a
CB
1011
1012 if (!lxc_container_get(container))
540a2f70 1013 return ret_set_errno(-1, EINVAL);
908fbc1a
CB
1014
1015 name = container->name;
1016 lxcpath = container->config_path;
1017
9c4693b8
CS
1018 if (!options)
1019 options = &attach_static_default_options;
1020
1021 init_pid = lxc_cmd_get_init_pid(name, lxcpath);
1022 if (init_pid < 0) {
ae026f55 1023 ERROR("Failed to get init pid");
908fbc1a 1024 lxc_container_put(container);
9c4693b8
CS
1025 return -1;
1026 }
1027
1028 init_ctx = lxc_proc_get_context_info(init_pid);
1029 if (!init_ctx) {
6f4f1937 1030 ERROR("Failed to get context of init process: %ld", (long)init_pid);
908fbc1a 1031 lxc_container_put(container);
9c4693b8
CS
1032 return -1;
1033 }
1034
908fbc1a
CB
1035 init_ctx->container = container;
1036
9b8e3c96
SH
1037 personality = get_personality(name, lxcpath);
1038 if (init_ctx->personality < 0) {
6f4f1937 1039 ERROR("Failed to get personality of the container");
9b8e3c96
SH
1040 lxc_proc_put_context_info(init_ctx);
1041 return -1;
1042 }
1043 init_ctx->personality = personality;
1044
ba773996
CB
1045 if (!init_ctx->container->lxc_conf) {
1046 init_ctx->container->lxc_conf = lxc_conf_init();
62de1db6
CB
1047 if (!init_ctx->container->lxc_conf) {
1048 lxc_proc_put_context_info(init_ctx);
ea918412 1049 return -1;
62de1db6 1050 }
ba773996 1051 }
1cce35e6 1052 conf = init_ctx->container->lxc_conf;
ba773996 1053
bd4307f0 1054 if (!fetch_seccomp(init_ctx->container, options))
ae026f55 1055 WARN("Failed to get seccomp policy");
2c4ea790 1056
bd4307f0 1057 if (!no_new_privs(init_ctx->container, options))
ae026f55 1058 WARN("Could not determine whether PR_SET_NO_NEW_PRIVS is set");
2e812c16 1059
9c4693b8
CS
1060 cwd = getcwd(NULL, 0);
1061
8ce83369
CB
1062 /* Determine which namespaces the container was created with
1063 * by asking lxc-start, if necessary.
9c4693b8
CS
1064 */
1065 if (options->namespaces == -1) {
1066 options->namespaces = lxc_cmd_get_clone_flags(name, lxcpath);
1067 /* call failed */
1068 if (options->namespaces == -1) {
8ce83369 1069 ERROR("Failed to automatically determine the "
877f3a04 1070 "namespaces which the container uses");
9c4693b8 1071 free(cwd);
fe4de9a6 1072 lxc_proc_put_context_info(init_ctx);
9c4693b8
CS
1073 return -1;
1074 }
877f3a04
CB
1075
1076 for (i = 0; i < LXC_NS_MAX; i++) {
1077 if (ns_info[i].clone_flag & CLONE_NEWCGROUP)
1078 if (!(options->attach_flags & LXC_ATTACH_MOVE_TO_CGROUP) ||
1079 !cgns_supported())
1080 continue;
1081
1082 if (ns_info[i].clone_flag & options->namespaces)
1083 continue;
1084
1085 init_ctx->ns_inherited |= ns_info[i].clone_flag;
1086 }
1087 }
1088
0059379f 1089 pid = lxc_raw_getpid();
ea918412 1090
877f3a04 1091 for (i = 0; i < LXC_NS_MAX; i++) {
ea918412 1092 int j;
877f3a04
CB
1093
1094 if (options->namespaces & ns_info[i].clone_flag)
1095 init_ctx->ns_fd[i] = lxc_preserve_ns(init_pid, ns_info[i].proc_name);
1096 else if (init_ctx->ns_inherited & ns_info[i].clone_flag)
1097 init_ctx->ns_fd[i] = in_same_namespace(pid, init_pid, ns_info[i].proc_name);
1098 else
1099 continue;
ea918412 1100
877f3a04
CB
1101 if (init_ctx->ns_fd[i] >= 0)
1102 continue;
1103
1104 if (init_ctx->ns_fd[i] == -EINVAL) {
1105 DEBUG("Inheriting %s namespace from %d",
1106 ns_info[i].proc_name, pid);
1107 init_ctx->ns_inherited &= ~ns_info[i].clone_flag;
1108 continue;
1109 }
1110
1111 /* We failed to preserve the namespace. */
ea918412 1112 SYSERROR("Failed to attach to %s namespace of %d",
1113 ns_info[i].proc_name, pid);
1114
877f3a04
CB
1115 /* Close all already opened file descriptors before we return an
1116 * error, so we don't leak them.
1117 */
1118 for (j = 0; j < i; j++)
1119 close(init_ctx->ns_fd[j]);
1120
877f3a04
CB
1121 free(cwd);
1122 lxc_proc_put_context_info(init_ctx);
1123 return -1;
9c4693b8
CS
1124 }
1125
9e84479f
CB
1126 if (options->attach_flags & LXC_ATTACH_TERMINAL) {
1127 ret = lxc_attach_terminal(conf, &terminal);
ba2be1a8 1128 if (ret < 0) {
9e84479f 1129 ERROR("Failed to setup new terminal");
ba2be1a8
CB
1130 free(cwd);
1131 lxc_proc_put_context_info(init_ctx);
1132 return -1;
1133 }
1134
9e84479f 1135 terminal.log_fd = options->log_fd;
c948657b 1136 } else {
9e84479f 1137 lxc_terminal_init(&terminal);
ba2be1a8
CB
1138 }
1139
8ce83369
CB
1140 /* Create a socket pair for IPC communication; set SOCK_CLOEXEC in order
1141 * to make sure we don't irritate other threads that want to fork+exec
1142 * away
9c4693b8
CS
1143 *
1144 * IMPORTANT: if the initial process is multithreaded and another call
1145 * just fork()s away without exec'ing directly after, the socket fd will
1146 * exist in the forked process from the other thread and any close() in
8ce83369 1147 * our own child process will not really cause the socket to close
1d801260 1148 * properly, potentially causing the parent to hang.
9c4693b8
CS
1149 *
1150 * For this reason, while IPC is still active, we have to use shutdown()
8ce83369
CB
1151 * if the child exits prematurely in order to signal that the socket is
1152 * closed and cannot assume that the child exiting will automatically do
1153 * that.
9c4693b8
CS
1154 *
1155 * IPC mechanism: (X is receiver)
1156 * initial process intermediate attached
1157 * X <--- send pid of
1158 * attached proc,
1159 * then exit
1160 * send 0 ------------------------------------> X
1161 * [do initialization]
1162 * X <------------------------------------ send 1
1163 * [add to cgroup, ...]
1164 * send 2 ------------------------------------> X
81f466d0
CB
1165 * [set LXC_ATTACH_NO_NEW_PRIVS]
1166 * X <------------------------------------ send 3
1167 * [open LSM label fd]
1168 * send 4 ------------------------------------> X
1169 * [set LSM label]
9c4693b8
CS
1170 * close socket close socket
1171 * run program
1172 */
1173 ret = socketpair(PF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
1174 if (ret < 0) {
ae026f55 1175 SYSERROR("Could not set up required IPC mechanism for attaching");
9c4693b8 1176 free(cwd);
fe4de9a6 1177 lxc_proc_put_context_info(init_ctx);
9c4693b8
CS
1178 return -1;
1179 }
1180
e3f0e436
CB
1181 /* Create intermediate subprocess, two reasons:
1182 * 1. We can't setns() in the child itself, since we want to make
8ce83369 1183 * sure we are properly attached to the pidns.
e3f0e436 1184 * 2. Also, the initial thread has to put the attached process
8ce83369
CB
1185 * into the cgroup, which we can only do if we didn't already
1186 * setns() (otherwise, user namespaces will hate us).
9c4693b8
CS
1187 */
1188 pid = fork();
9c4693b8 1189 if (pid < 0) {
ae026f55 1190 SYSERROR("Failed to create first subprocess");
9c4693b8 1191 free(cwd);
fe4de9a6 1192 lxc_proc_put_context_info(init_ctx);
9c4693b8
CS
1193 return -1;
1194 }
1195
1196 if (pid) {
ba2be1a8 1197 int ret_parent = -1;
9c4693b8 1198 pid_t to_cleanup_pid = pid;
ba2be1a8 1199 struct lxc_epoll_descr descr = {0};
9c4693b8 1200
ba2be1a8 1201 /* close unneeded file descriptors */
9c4693b8
CS
1202 close(ipc_sockets[1]);
1203 free(cwd);
ba2be1a8 1204 lxc_proc_close_ns_fd(init_ctx);
9e84479f
CB
1205 if (options->attach_flags & LXC_ATTACH_TERMINAL)
1206 lxc_attach_terminal_close_slave(&terminal);
9c4693b8 1207
8ce83369 1208 /* Attach to cgroup, if requested. */
f4364484 1209 if (options->attach_flags & LXC_ATTACH_MOVE_TO_CGROUP) {
900b6606
CB
1210 /*
1211 * If this is the unified hierarchy cgroup_attach() is
1212 * enough.
1213 */
1214 ret = cgroup_attach(name, lxcpath, pid);
1215 if (ret) {
1216 __do_cgroup_exit struct cgroup_ops *cgroup_ops = NULL;
2202afc9 1217
900b6606
CB
1218 cgroup_ops = cgroup_init(conf);
1219 if (!cgroup_ops)
1220 goto on_error;
2202afc9 1221
900b6606
CB
1222 if (!cgroup_ops->attach(cgroup_ops, name, lxcpath, pid))
1223 goto on_error;
1224 }
2202afc9 1225 TRACE("Moved intermediate process %d into container's cgroups", pid);
f4364484
SG
1226 }
1227
bb2ada6f 1228 /* Setup /proc limits */
1cce35e6
CB
1229 if (!lxc_list_empty(&conf->procs)) {
1230 ret = setup_proc_filesystem(&conf->procs, pid);
bb2ada6f
CB
1231 if (ret < 0)
1232 goto on_error;
1233 }
1234
c6d09e15 1235 /* Setup resource limits */
1cce35e6
CB
1236 if (!lxc_list_empty(&conf->limits)) {
1237 ret = setup_resource_limits(&conf->limits, pid);
ba2be1a8
CB
1238 if (ret < 0)
1239 goto on_error;
1240 }
1241
9e84479f
CB
1242 if (options->attach_flags & LXC_ATTACH_TERMINAL) {
1243 ret = lxc_attach_terminal_mainloop_init(&terminal, &descr);
ba2be1a8 1244 if (ret < 0)
6f4f1937 1245 goto on_error;
ea918412 1246
9e84479f 1247 TRACE("Initialized terminal mainloop");
ba2be1a8 1248 }
c6d09e15 1249
8ce83369 1250 /* Let the child process know to go ahead. */
f4364484
SG
1251 status = 0;
1252 ret = lxc_write_nointr(ipc_sockets[0], &status, sizeof(status));
94ac256f 1253 if (ret != sizeof(status))
ba2be1a8 1254 goto close_mainloop;
ea918412 1255
94ac256f 1256 TRACE("Told intermediate process to start initializing");
f4364484 1257
8ce83369 1258 /* Get pid of attached process from intermediate process. */
94ac256f
CB
1259 ret = lxc_read_nointr(ipc_sockets[0], &attached_pid, sizeof(attached_pid));
1260 if (ret != sizeof(attached_pid))
ba2be1a8 1261 goto close_mainloop;
ea918412 1262
94ac256f 1263 TRACE("Received pid %d of attached process in parent pid namespace", attached_pid);
9c4693b8 1264
8ce83369 1265 /* Ignore SIGKILL (CTRL-C) and SIGQUIT (CTRL-\) - issue #313. */
62183f1a
SH
1266 if (options->stdin_fd == 0) {
1267 signal(SIGINT, SIG_IGN);
1268 signal(SIGQUIT, SIG_IGN);
1269 }
2eef2bda 1270
8ce83369 1271 /* Reap intermediate process. */
9c4693b8
CS
1272 ret = wait_for_pid(pid);
1273 if (ret < 0)
ba2be1a8 1274 goto close_mainloop;
ea918412 1275
94ac256f 1276 TRACE("Intermediate process %d exited", pid);
9c4693b8 1277
8ce83369 1278 /* We will always have to reap the attached process now. */
9c4693b8
CS
1279 to_cleanup_pid = attached_pid;
1280
81f466d0 1281 /* Open LSM fd and send it to child. */
6f4f1937
CB
1282 if ((options->namespaces & CLONE_NEWNS) &&
1283 (options->attach_flags & LXC_ATTACH_LSM) &&
1284 init_ctx->lsm_label) {
47ce2cb7
CB
1285 int labelfd;
1286 bool on_exec;
6f4f1937 1287
a7547c5c 1288 ret = -1;
47ce2cb7
CB
1289 on_exec = options->attach_flags & LXC_ATTACH_LSM_EXEC ? true : false;
1290 labelfd = lsm_process_label_fd_get(attached_pid, on_exec);
81f466d0 1291 if (labelfd < 0)
ba2be1a8 1292 goto close_mainloop;
ea918412 1293
94ac256f 1294 TRACE("Opened LSM label file descriptor %d", labelfd);
81f466d0
CB
1295
1296 /* Send child fd of the LSM security module to write to. */
ae467c54 1297 ret = lxc_abstract_unix_send_fds(ipc_sockets[0], &labelfd, 1, NULL, 0);
81f466d0 1298 if (ret <= 0) {
9044b79e 1299 if (ret < 0)
1300 SYSERROR("Failed to send lsm label fd");
1301
1302 close(labelfd);
ba2be1a8 1303 goto close_mainloop;
81f466d0 1304 }
9044b79e 1305
1306 close(labelfd);
94ac256f 1307 TRACE("Sent LSM label file descriptor %d to child", labelfd);
81f466d0
CB
1308 }
1309
2ac0f627
CB
1310 if (conf && conf->seccomp.seccomp) {
1311 ret = lxc_seccomp_recv_notifier_fd(&conf->seccomp, ipc_sockets[0]);
1312 if (ret < 0)
1313 goto close_mainloop;
cdb2a47f 1314
2ac0f627
CB
1315 ret = lxc_seccomp_add_notifier(name, lxcpath, &conf->seccomp);
1316 if (ret < 0)
1317 goto close_mainloop;
1318 }
cdb2a47f 1319
8ce83369
CB
1320 /* We're done, the child process should now execute whatever it
1321 * is that the user requested. The parent can now track it with
1322 * waitpid() or similar.
9c4693b8
CS
1323 */
1324
1325 *attached_process = attached_pid;
9c4693b8 1326
ba2be1a8 1327 /* Now shut down communication with child, we're done. */
9c4693b8
CS
1328 shutdown(ipc_sockets[0], SHUT_RDWR);
1329 close(ipc_sockets[0]);
ba2be1a8
CB
1330 ipc_sockets[0] = -1;
1331
1332 ret_parent = 0;
1333 to_cleanup_pid = -1;
ea918412 1334
9e84479f 1335 if (options->attach_flags & LXC_ATTACH_TERMINAL) {
ba2be1a8
CB
1336 ret = lxc_mainloop(&descr, -1);
1337 if (ret < 0) {
1338 ret_parent = -1;
1339 to_cleanup_pid = attached_pid;
1340 }
1341 }
1342
1343 close_mainloop:
9e84479f 1344 if (options->attach_flags & LXC_ATTACH_TERMINAL)
ba2be1a8
CB
1345 lxc_mainloop_close(&descr);
1346
1347 on_error:
1348 if (ipc_sockets[0] >= 0) {
1349 shutdown(ipc_sockets[0], SHUT_RDWR);
1350 close(ipc_sockets[0]);
1351 }
1352
1353 if (to_cleanup_pid > 0)
6f4f1937 1354 (void)wait_for_pid(to_cleanup_pid);
ba2be1a8 1355
9e84479f
CB
1356 if (options->attach_flags & LXC_ATTACH_TERMINAL) {
1357 lxc_terminal_delete(&terminal);
1358 lxc_terminal_conf_free(&terminal);
ba2be1a8 1359 }
ea918412 1360
fe4de9a6 1361 lxc_proc_put_context_info(init_ctx);
ba2be1a8 1362 return ret_parent;
9c4693b8
CS
1363 }
1364
ba2be1a8 1365 /* close unneeded file descriptors */
9c4693b8 1366 close(ipc_sockets[0]);
ba2be1a8 1367 ipc_sockets[0] = -EBADF;
ea918412 1368
9e84479f
CB
1369 if (options->attach_flags & LXC_ATTACH_TERMINAL) {
1370 lxc_attach_terminal_close_master(&terminal);
1371 lxc_attach_terminal_close_peer(&terminal);
1372 lxc_attach_terminal_close_log(&terminal);
ba2be1a8 1373 }
9c4693b8 1374
8ce83369 1375 /* Wait for the parent to have setup cgroups. */
94ac256f 1376 ret = lxc_read_nointr(ipc_sockets[1], &status, sizeof(status));
ba2be1a8 1377 if (ret != sizeof(status)) {
f4364484 1378 shutdown(ipc_sockets[1], SHUT_RDWR);
62de1db6 1379 lxc_proc_put_context_info(init_ctx);
c7ac2e1c 1380 _exit(EXIT_FAILURE);
f4364484 1381 }
ea918412 1382
94ac256f 1383 TRACE("Intermediate process starting to initialize");
f4364484 1384
8ce83369
CB
1385 /* Attach now, create another subprocess later, since pid namespaces
1386 * only really affect the children of the current process.
9c4693b8 1387 */
877f3a04 1388 ret = lxc_attach_to_ns(init_pid, init_ctx);
9c4693b8 1389 if (ret < 0) {
94ac256f 1390 ERROR("Failed to enter namespaces");
9c4693b8 1391 shutdown(ipc_sockets[1], SHUT_RDWR);
62de1db6 1392 lxc_proc_put_context_info(init_ctx);
c7ac2e1c 1393 _exit(EXIT_FAILURE);
9c4693b8 1394 }
ea918412 1395
877f3a04
CB
1396 /* close namespace file descriptors */
1397 lxc_proc_close_ns_fd(init_ctx);
9c4693b8 1398
8ce83369 1399 /* Attach succeeded, try to cwd. */
9c4693b8
CS
1400 if (options->initial_cwd)
1401 new_cwd = options->initial_cwd;
1402 else
1403 new_cwd = cwd;
d6d979bc
CB
1404 if (new_cwd) {
1405 ret = chdir(new_cwd);
1406 if (ret < 0)
1407 WARN("Could not change directory to \"%s\"", new_cwd);
1408 }
9c4693b8
CS
1409 free(cwd);
1410
a998454a
CB
1411 /* Create attached process. */
1412 payload.ipc_socket = ipc_sockets[1];
1413 payload.options = options;
1414 payload.init_ctx = init_ctx;
9e84479f 1415 payload.terminal_slave_fd = terminal.slave;
a998454a
CB
1416 payload.exec_function = exec_function;
1417 payload.exec_payload = exec_payload;
9c4693b8 1418
a59440be 1419 pid = lxc_raw_clone(CLONE_PARENT, NULL);
a998454a 1420 if (pid < 0) {
94ac256f 1421 SYSERROR("Failed to clone attached process");
9c4693b8 1422 shutdown(ipc_sockets[1], SHUT_RDWR);
62de1db6 1423 lxc_proc_put_context_info(init_ctx);
c7ac2e1c 1424 _exit(EXIT_FAILURE);
9c4693b8 1425 }
a998454a
CB
1426
1427 if (pid == 0) {
f157b056
CB
1428 if (options->attach_flags & LXC_ATTACH_TERMINAL) {
1429 ret = pthread_sigmask(SIG_SETMASK,
1430 &terminal.tty_state->oldmask, NULL);
1431 if (ret < 0) {
1432 SYSERROR("Failed to reset signal mask");
1433 _exit(EXIT_FAILURE);
1434 }
1435 }
1436
a998454a
CB
1437 ret = attach_child_main(&payload);
1438 if (ret < 0)
1439 ERROR("Failed to exec");
ea918412 1440
a998454a
CB
1441 _exit(EXIT_FAILURE);
1442 }
ea918412 1443
9e84479f
CB
1444 if (options->attach_flags & LXC_ATTACH_TERMINAL)
1445 lxc_attach_terminal_close_slave(&terminal);
9c4693b8 1446
8ce83369 1447 /* Tell grandparent the pid of the pid of the newly created child. */
9c4693b8
CS
1448 ret = lxc_write_nointr(ipc_sockets[1], &pid, sizeof(pid));
1449 if (ret != sizeof(pid)) {
8ce83369
CB
1450 /* If this really happens here, this is very unfortunate, since
1451 * the parent will not know the pid of the attached process and
1452 * will not be able to wait for it (and we won't either due to
1453 * CLONE_PARENT) so the parent won't be able to reap it and the
1454 * attached process will remain a zombie.
9c4693b8 1455 */
9c4693b8 1456 shutdown(ipc_sockets[1], SHUT_RDWR);
62de1db6 1457 lxc_proc_put_context_info(init_ctx);
c7ac2e1c 1458 _exit(EXIT_FAILURE);
9c4693b8 1459 }
ea918412 1460
94ac256f 1461 TRACE("Sending pid %d of attached process", pid);
9c4693b8 1462
8ce83369 1463 /* The rest is in the hands of the initial and the attached process. */
62de1db6 1464 lxc_proc_put_context_info(init_ctx);
57017714 1465 _exit(EXIT_SUCCESS);
9c4693b8
CS
1466}
1467
06346bb0 1468int lxc_attach_run_command(void *payload)
9c4693b8 1469{
06346bb0
CB
1470 int ret = -1;
1471 lxc_attach_command_t *cmd = payload;
9c4693b8 1472
06346bb0
CB
1473 ret = execvp(cmd->program, cmd->argv);
1474 if (ret < 0) {
1475 switch (errno) {
1476 case ENOEXEC:
1477 ret = 126;
cf0fd972 1478 break;
06346bb0
CB
1479 case ENOENT:
1480 ret = 127;
cf0fd972 1481 break;
06346bb0
CB
1482 }
1483 }
ea918412 1484
1485 SYSERROR("Failed to exec \"%s\"", cmd->program);
06346bb0 1486 return ret;
9c4693b8
CS
1487}
1488
1489int lxc_attach_run_shell(void* payload)
1490{
cd8f5663 1491 __do_free char *buf = NULL;
9c4693b8 1492 uid_t uid;
cb7aa5e8
DJ
1493 struct passwd pwent;
1494 struct passwd *pwentp = NULL;
9c4693b8 1495 char *user_shell;
cb7aa5e8
DJ
1496 size_t bufsize;
1497 int ret;
9c4693b8 1498
8ce83369 1499 /* Ignore payload parameter. */
9c4693b8
CS
1500 (void)payload;
1501
1502 uid = getuid();
cb7aa5e8
DJ
1503
1504 bufsize = sysconf(_SC_GETPW_R_SIZE_MAX);
1505 if (bufsize == -1)
1506 bufsize = 1024;
1507
1508 buf = malloc(bufsize);
1509 if (buf) {
1510 ret = getpwuid_r(uid, &pwent, buf, bufsize, &pwentp);
1511 if (!pwentp) {
1512 if (ret == 0)
ea918412 1513 WARN("Could not find matched password record");
cb7aa5e8
DJ
1514
1515 WARN("Failed to get password record - %u", uid);
1516 }
1517 }
9c4693b8 1518
8ce83369
CB
1519 /* This probably happens because of incompatible nss implementations in
1520 * host and container (remember, this code is still using the host's
1521 * glibc but our mount namespace is in the container) we may try to get
1522 * the information by spawning a [getent passwd uid] process and parsing
1523 * the result.
9c4693b8 1524 */
cb7aa5e8 1525 if (!pwentp)
9c4693b8
CS
1526 user_shell = lxc_attach_getpwshell(uid);
1527 else
cb7aa5e8 1528 user_shell = pwent.pw_shell;
ea918412 1529
9c4693b8 1530 if (user_shell)
acf47e1b 1531 execlp(user_shell, user_shell, (char *)NULL);
9c4693b8 1532
8ce83369
CB
1533 /* Executed if either no passwd entry or execvp fails, we will fall back
1534 * on /bin/sh as a default shell.
9c4693b8 1535 */
acf47e1b 1536 execlp("/bin/sh", "/bin/sh", (char *)NULL);
ea918412 1537
edeb1836 1538 SYSERROR("Failed to execute shell");
cb7aa5e8 1539 if (!pwentp)
edeb1836 1540 free(user_shell);
ea918412 1541
9c4693b8
CS
1542 return -1;
1543}