]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/attach.c
attach: fix return value & cleanups
[mirror_lxc.git] / src / lxc / attach.c
CommitLineData
e0732705
CS
1/*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
9afe19d6 7 * Daniel Lezcano <daniel.lezcano at free.fr>
e0732705
CS
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
250b1eec 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
e0732705
CS
22 */
23
24#define _GNU_SOURCE
e0732705
CS
25#include <errno.h>
26#include <fcntl.h>
e5adb2b5 27#include <termios.h>
c476bdce 28#include <grp.h>
6f4f1937
CB
29#include <pwd.h>
30#include <signal.h>
31#include <stdio.h>
32#include <stdlib.h>
33#include <string.h>
34#include <unistd.h>
35#include <linux/unistd.h>
36#include <sys/mount.h>
e0732705
CS
37#include <sys/param.h>
38#include <sys/prctl.h>
5ec27989 39#include <sys/socket.h>
1ba0013f 40#include <sys/syscall.h>
905022f7 41#include <sys/wait.h>
6f4f1937
CB
42
43#include <lxc/lxccontainer.h>
e0732705 44
955e2a02 45#ifndef HAVE_DECL_PR_CAPBSET_DROP
e0732705
CS
46#define PR_CAPBSET_DROP 24
47#endif
48
955e2a02
CB
49#ifndef HAVE_DECL_PR_SET_NO_NEW_PRIVS
50#define PR_SET_NO_NEW_PRIVS 38
51#endif
52
53#ifndef HAVE_DECL_PR_GET_NO_NEW_PRIVS
54#define PR_GET_NO_NEW_PRIVS 39
55#endif
56
81f466d0 57#include "af_unix.h"
e0732705
CS
58#include "attach.h"
59#include "caps.h"
9c4693b8 60#include "cgroup.h"
6f4f1937 61#include "commands.h"
2c4ea790 62#include "conf.h"
6f4f1937 63#include "config.h"
9b8e3c96 64#include "confile.h"
6f4f1937
CB
65#include "log.h"
66#include "lsm/lsm.h"
67#include "lxclock.h"
68#include "lxcseccomp.h"
ba2be1a8 69#include "mainloop.h"
6f4f1937 70#include "namespace.h"
0ed9b1bc 71#include "terminal.h"
6f4f1937 72#include "utils.h"
9c4693b8
CS
73
74#if HAVE_SYS_PERSONALITY_H
75#include <sys/personality.h>
76#endif
e0732705 77
a3da2f3b 78#ifndef SOCK_CLOEXEC
6f4f1937 79#define SOCK_CLOEXEC 02000000
a3da2f3b
SG
80#endif
81
d6a3c917
SG
82#ifndef MS_REC
83#define MS_REC 16384
84#endif
85
86#ifndef MS_SLAVE
6f4f1937 87#define MS_SLAVE (1 << 19)
d6a3c917
SG
88#endif
89
ac2cecc4 90lxc_log_define(attach, lxc);
e0732705 91
8ce83369 92/* /proc/pid-to-str/status\0 = (5 + 21 + 7 + 1) */
eab15c1e 93#define __PROC_STATUS_LEN (5 + (LXC_NUMSTRLEN64) + 7 + 1)
74a3920a 94static struct lxc_proc_context_info *lxc_proc_get_context_info(pid_t pid)
e0732705 95{
6f4f1937
CB
96 int ret;
97 bool found;
e0732705 98 FILE *proc_file;
8ce83369 99 char proc_fn[__PROC_STATUS_LEN];
e0732705 100 size_t line_bufsz = 0;
6f4f1937 101 char *line = NULL;
8ce83369 102 struct lxc_proc_context_info *info = NULL;
e0732705 103
8ce83369
CB
104 /* Read capabilities. */
105 ret = snprintf(proc_fn, __PROC_STATUS_LEN, "/proc/%d/status", pid);
106 if (ret < 0 || ret >= __PROC_STATUS_LEN)
107 goto on_error;
e0732705
CS
108
109 proc_file = fopen(proc_fn, "r");
110 if (!proc_file) {
ea918412 111 SYSERROR("Could not open %s", proc_fn);
8ce83369 112 goto on_error;
e0732705
CS
113 }
114
8ce83369
CB
115 info = calloc(1, sizeof(*info));
116 if (!info) {
ea918412 117 SYSERROR("Could not allocate memory");
17ac5301 118 fclose(proc_file);
8ce83369
CB
119 return NULL;
120 }
121
122 found = false;
ea918412 123
e0732705
CS
124 while (getline(&line, &line_bufsz, proc_file) != -1) {
125 ret = sscanf(line, "CapBnd: %llx", &info->capability_mask);
8ce83369
CB
126 if (ret != EOF && ret == 1) {
127 found = true;
e0732705
CS
128 break;
129 }
130 }
131
f10fad2f 132 free(line);
e0732705
CS
133 fclose(proc_file);
134
135 if (!found) {
ea918412 136 ERROR("Could not read capability bounding set from %s",
137 proc_fn);
8ce83369 138 goto on_error;
e0732705
CS
139 }
140
fe4de9a6 141 info->lsm_label = lsm_process_label_get(pid);
877f3a04
CB
142 info->ns_inherited = 0;
143 memset(info->ns_fd, -1, sizeof(int) * LXC_NS_MAX);
e0732705 144
e0732705
CS
145 return info;
146
8ce83369 147on_error:
460a1cf0 148 free(info);
e0732705
CS
149 return NULL;
150}
151
877f3a04
CB
152static inline void lxc_proc_close_ns_fd(struct lxc_proc_context_info *ctx)
153{
154 int i;
155
156 for (i = 0; i < LXC_NS_MAX; i++) {
157 if (ctx->ns_fd[i] < 0)
158 continue;
ea918412 159
877f3a04
CB
160 close(ctx->ns_fd[i]);
161 ctx->ns_fd[i] = -EBADF;
162 }
163}
164
fe4de9a6
DE
165static void lxc_proc_put_context_info(struct lxc_proc_context_info *ctx)
166{
f10fad2f 167 free(ctx->lsm_label);
08ea9270
CB
168 ctx->lsm_label = NULL;
169
170 if (ctx->container) {
2c4ea790 171 lxc_container_put(ctx->container);
08ea9270
CB
172 ctx->container = NULL;
173 }
174
877f3a04 175 lxc_proc_close_ns_fd(ctx);
fe4de9a6
DE
176 free(ctx);
177}
178
299d1198
CB
179/**
180 * in_same_namespace - Check whether two processes are in the same namespace.
181 * @pid1 - PID of the first process.
182 * @pid2 - PID of the second process.
183 * @ns - Name of the namespace to check. Must correspond to one of the names
184 * for the namespaces as shown in /proc/<pid/ns/
185 *
186 * If the two processes are not in the same namespace returns an fd to the
187 * namespace of the second process identified by @pid2. If the two processes are
188 * in the same namespace returns -EINVAL, -1 if an error occurred.
189 */
190static int in_same_namespace(pid_t pid1, pid_t pid2, const char *ns)
191{
192 int ns_fd1 = -1, ns_fd2 = -1, ret = -1;
ea918412 193 int saved_errno = errno;
299d1198
CB
194 struct stat ns_st1, ns_st2;
195
196 ns_fd1 = lxc_preserve_ns(pid1, ns);
134284c3
CB
197 if (ns_fd1 < 0) {
198 /* The kernel does not support this namespace. This is not an
199 * error.
200 */
201 if (errno == ENOENT)
202 return -EINVAL;
203
ea918412 204 saved_errno = errno;
299d1198 205 goto out;
134284c3 206 }
299d1198
CB
207
208 ns_fd2 = lxc_preserve_ns(pid2, ns);
ea918412 209 if (ns_fd2 < 0) {
210 saved_errno = errno;
299d1198 211 goto out;
ea918412 212 }
299d1198
CB
213
214 ret = fstat(ns_fd1, &ns_st1);
ea918412 215 if (ret < 0) {
216 saved_errno = errno;
299d1198 217 goto out;
ea918412 218 }
299d1198
CB
219
220 ret = fstat(ns_fd2, &ns_st2);
ea918412 221 if (ret < 0) {
222 saved_errno = errno;
299d1198 223 goto out;
ea918412 224 }
299d1198
CB
225
226 /* processes are in the same namespace */
ea918412 227 if ((ns_st1.st_dev == ns_st2.st_dev ) && (ns_st1.st_ino == ns_st2.st_ino)) {
228 ret = -EINVAL;
299d1198 229 goto out;
ea918412 230 }
299d1198
CB
231
232 /* processes are in different namespaces */
233 ret = ns_fd2;
234 ns_fd2 = -1;
235
236out:
299d1198
CB
237 if (ns_fd1 >= 0)
238 close(ns_fd1);
ea918412 239
299d1198
CB
240 if (ns_fd2 >= 0)
241 close(ns_fd2);
242
ea918412 243 errno = saved_errno;
299d1198
CB
244 return ret;
245}
246
877f3a04 247static int lxc_attach_to_ns(pid_t pid, struct lxc_proc_context_info *ctx)
99d50954 248{
ea918412 249 int i;
99d50954 250
26818618 251 for (i = 0; i < LXC_NS_MAX; i++) {
877f3a04 252 if (ctx->ns_fd[i] < 0)
26818618
CB
253 continue;
254
ea918412 255 if (setns(ctx->ns_fd[i], ns_info[i].clone_flag) < 0) {
299d1198 256 SYSERROR("Failed to attach to %s namespace of %d",
ea918412 257 ns_info[i].proc_name, pid);
99d50954
CS
258 return -1;
259 }
260
299d1198 261 DEBUG("Attached to %s namespace of %d", ns_info[i].proc_name, pid);
99d50954
CS
262 }
263
264 return 0;
265}
266
74a3920a 267static int lxc_attach_remount_sys_proc(void)
7a0b0b56
CS
268{
269 int ret;
270
271 ret = unshare(CLONE_NEWNS);
272 if (ret < 0) {
ea918412 273 SYSERROR("Failed to unshare mount namespace");
7a0b0b56
CS
274 return -1;
275 }
276
2c6f3fc9 277 if (detect_shared_rootfs()) {
6f4f1937 278 if (mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL)) {
ea918412 279 SYSERROR("Failed to make / rslave");
2c6f3fc9
SH
280 ERROR("Continuing...");
281 }
282 }
283
8ce83369 284 /* Assume /proc is always mounted, so remount it. */
7a0b0b56
CS
285 ret = umount2("/proc", MNT_DETACH);
286 if (ret < 0) {
ea918412 287 SYSERROR("Failed to unmount /proc");
7a0b0b56
CS
288 return -1;
289 }
290
291 ret = mount("none", "/proc", "proc", 0, NULL);
292 if (ret < 0) {
ea918412 293 SYSERROR("Failed to remount /proc");
7a0b0b56
CS
294 return -1;
295 }
296
8ce83369
CB
297 /* Try to umount /sys. If it's not a mount point, we'll get EINVAL, then
298 * we ignore it because it may not have been mounted in the first place.
7a0b0b56
CS
299 */
300 ret = umount2("/sys", MNT_DETACH);
301 if (ret < 0 && errno != EINVAL) {
ea918412 302 SYSERROR("Failed to unmount /sys");
7a0b0b56
CS
303 return -1;
304 } else if (ret == 0) {
8ce83369 305 /* Remount it. */
7a0b0b56
CS
306 ret = mount("none", "/sys", "sysfs", 0, NULL);
307 if (ret < 0) {
ea918412 308 SYSERROR("Failed to remount /sys");
7a0b0b56
CS
309 return -1;
310 }
311 }
312
313 return 0;
314}
315
74a3920a 316static int lxc_attach_drop_privs(struct lxc_proc_context_info *ctx)
e0732705 317{
6f4f1937 318 int cap, last_cap;
e0732705 319
6f4f1937 320 last_cap = lxc_caps_last_cap();
e0732705
CS
321 for (cap = 0; cap <= last_cap; cap++) {
322 if (ctx->capability_mask & (1LL << cap))
323 continue;
324
325 if (prctl(PR_CAPBSET_DROP, cap, 0, 0, 0)) {
94ac256f 326 SYSERROR("Failed to drop capability %d", cap);
e0732705
CS
327 return -1;
328 }
ea918412 329
94ac256f 330 TRACE("Dropped capability %d", cap);
e0732705
CS
331 }
332
333 return 0;
334}
905022f7 335
7385273f 336static int lxc_attach_set_environment(struct lxc_proc_context_info *init_ctx,
337 enum lxc_attach_env_policy_t policy,
6f4f1937 338 char **extra_env, char **extra_keep)
b3a39ba6 339{
3d55242a 340 int ret;
7385273f 341 struct lxc_list *iterator;
342
799f96fd 343 if (policy == LXC_ATTACH_CLEAR_ENV) {
3d5e9f48 344 int path_kept = 0;
6f4f1937 345 char **extra_keep_store = NULL;
3d5e9f48
CS
346
347 if (extra_keep) {
348 size_t count, i;
349
3d55242a
CB
350 for (count = 0; extra_keep[count]; count++)
351 ;
3d5e9f48
CS
352
353 extra_keep_store = calloc(count, sizeof(char *));
3d55242a 354 if (!extra_keep_store)
3d5e9f48 355 return -1;
3d55242a 356
3d5e9f48
CS
357 for (i = 0; i < count; i++) {
358 char *v = getenv(extra_keep[i]);
359 if (v) {
360 extra_keep_store[i] = strdup(v);
361 if (!extra_keep_store[i]) {
3d5e9f48
CS
362 while (i > 0)
363 free(extra_keep_store[--i]);
ea918412 364
3d5e9f48
CS
365 free(extra_keep_store);
366 return -1;
367 }
3d55242a 368
3d5e9f48
CS
369 if (strcmp(extra_keep[i], "PATH") == 0)
370 path_kept = 1;
371 }
3d5e9f48
CS
372 }
373 }
374
799f96fd 375 if (clearenv()) {
a9cab7e3 376 if (extra_keep_store) {
3d55242a
CB
377 char **p;
378
a9cab7e3
CS
379 for (p = extra_keep_store; *p; p++)
380 free(*p);
3d55242a 381
a9cab7e3
CS
382 free(extra_keep_store);
383 }
3d55242a 384
ea918412 385 ERROR("Failed to clear environment");
3d5e9f48
CS
386 return -1;
387 }
388
389 if (extra_keep_store) {
390 size_t i;
6f4f1937 391
3d5e9f48 392 for (i = 0; extra_keep[i]; i++) {
acd4922e 393 if (extra_keep_store[i]) {
3d55242a
CB
394 ret = setenv(extra_keep[i], extra_keep_store[i], 1);
395 if (ret < 0)
a24c5678 396 SYSWARN("Failed to set environment variable");
acd4922e 397 }
ea918412 398
3d5e9f48
CS
399 free(extra_keep_store[i]);
400 }
ea918412 401
3d5e9f48
CS
402 free(extra_keep_store);
403 }
404
8ce83369
CB
405 /* Always set a default path; shells and execlp tend to be fine
406 * without it, but there is a disturbing number of C programs
407 * out there that just assume that getenv("PATH") is never NULL
408 * and then die a painful segfault death.
409 */
3d55242a
CB
410 if (!path_kept) {
411 ret = setenv("PATH", "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", 1);
412 if (ret < 0)
a24c5678 413 SYSWARN("Failed to set environment variable");
3d55242a 414 }
b3a39ba6
DW
415 }
416
3d55242a
CB
417 ret = putenv("container=lxc");
418 if (ret < 0) {
a24c5678 419 SYSWARN("Failed to set environment variable");
b3a39ba6
DW
420 return -1;
421 }
422
7385273f 423 /* Set container environment variables.*/
424 if (init_ctx && init_ctx->container && init_ctx->container->lxc_conf) {
425 lxc_list_for_each(iterator, &init_ctx->container->lxc_conf->environment) {
3d55242a
CB
426 char *env_tmp;
427
428 env_tmp = strdup((char *)iterator->elem);
429 if (!env_tmp)
7385273f 430 return -1;
7385273f 431
3d55242a
CB
432 ret = putenv(env_tmp);
433 if (ret < 0) {
434 SYSERROR("Failed to set environment variable: %s", (char *)iterator->elem);
7385273f 435 return -1;
436 }
437 }
438 }
439
8ce83369 440 /* Set extra environment variables. */
3d5e9f48
CS
441 if (extra_env) {
442 for (; *extra_env; extra_env++) {
3d55242a 443 char *p;
ea918412 444
8ce83369
CB
445 /* We just assume the user knows what they are doing, so
446 * we don't do any checks.
447 */
3d55242a
CB
448 p = strdup(*extra_env);
449 if (!p)
3d5e9f48 450 return -1;
3d55242a
CB
451
452 ret = putenv(p);
453 if (ret < 0)
a24c5678 454 SYSWARN("Failed to set environment variable");
3d5e9f48
CS
455 }
456 }
457
b3a39ba6
DW
458 return 0;
459}
460
74a3920a 461static char *lxc_attach_getpwshell(uid_t uid)
905022f7 462{
6f4f1937 463 int fd, ret;
905022f7
CS
464 pid_t pid;
465 int pipes[2];
3fa23ac3
CB
466 FILE *pipe_f;
467 bool found = false;
468 size_t line_bufsz = 0;
469 char *line = NULL, *result = NULL;
905022f7 470
8ce83369
CB
471 /* We need to fork off a process that runs the getent program, and we
472 * need to capture its output, so we use a pipe for that purpose.
905022f7 473 */
3fa23ac3 474 ret = pipe2(pipes, O_CLOEXEC);
905022f7
CS
475 if (ret < 0)
476 return NULL;
477
478 pid = fork();
479 if (pid < 0) {
480 close(pipes[0]);
481 close(pipes[1]);
482 return NULL;
483 }
484
3fa23ac3 485 if (!pid) {
905022f7
CS
486 char uid_buf[32];
487 char *arguments[] = {
488 "getent",
489 "passwd",
490 uid_buf,
491 NULL
492 };
493
494 close(pipes[0]);
495
8ce83369 496 /* We want to capture stdout. */
3fa23ac3 497 ret = dup2(pipes[1], STDOUT_FILENO);
905022f7 498 close(pipes[1]);
3fa23ac3 499 if (ret < 0)
ea918412 500 _exit(EXIT_FAILURE);
905022f7 501
8ce83369
CB
502 /* Get rid of stdin/stderr, so we try to associate it with
503 * /dev/null.
905022f7 504 */
3fa23ac3 505 fd = open_devnull();
905022f7 506 if (fd < 0) {
3fa23ac3
CB
507 close(STDIN_FILENO);
508 close(STDERR_FILENO);
905022f7 509 } else {
3fa23ac3
CB
510 (void)dup3(fd, STDIN_FILENO, O_CLOEXEC);
511 (void)dup3(fd, STDOUT_FILENO, O_CLOEXEC);
905022f7
CS
512 close(fd);
513 }
514
8ce83369 515 /* Finish argument list. */
3fa23ac3
CB
516 ret = snprintf(uid_buf, sizeof(uid_buf), "%ld", (long)uid);
517 if (ret <= 0 || ret >= sizeof(uid_buf))
ea918412 518 _exit(EXIT_FAILURE);
905022f7 519
8ce83369 520 /* Try to run getent program. */
3fa23ac3 521 (void)execvp("getent", arguments);
ea918412 522 _exit(EXIT_FAILURE);
905022f7 523 }
3fa23ac3
CB
524
525 close(pipes[1]);
526
527 pipe_f = fdopen(pipes[0], "r");
528 while (getline(&line, &line_bufsz, pipe_f) != -1) {
529 int i;
530 long value;
531 char *token;
532 char *endptr = NULL, *saveptr = NULL;
533
534 /* If we already found something, just continue to read
535 * until the pipe doesn't deliver any more data, but
536 * don't modify the existing data structure.
537 */
538 if (found)
539 continue;
540
541 /* Trim line on the right hand side. */
542 for (i = strlen(line); i > 0 && (line[i - 1] == '\n' || line[i - 1] == '\r'); --i)
543 line[i - 1] = '\0';
544
545 /* Split into tokens: first: user name. */
546 token = strtok_r(line, ":", &saveptr);
547 if (!token)
548 continue;
549
550 /* next: dummy password field */
551 token = strtok_r(NULL, ":", &saveptr);
552 if (!token)
553 continue;
554
555 /* next: user id */
556 token = strtok_r(NULL, ":", &saveptr);
557 value = token ? strtol(token, &endptr, 10) : 0;
558 if (!token || !endptr || *endptr || value == LONG_MIN ||
ea918412 559 value == LONG_MAX)
3fa23ac3
CB
560 continue;
561
562 /* dummy sanity check: user id matches */
563 if ((uid_t)value != uid)
564 continue;
565
566 /* skip fields: gid, gecos, dir, go to next field 'shell' */
567 for (i = 0; i < 4; i++) {
568 token = strtok_r(NULL, ":", &saveptr);
569 if (!token)
570 continue;
571 }
ea918412 572
3fa23ac3
CB
573 if (!token)
574 continue;
ea918412 575
3fa23ac3
CB
576 free(result);
577 result = strdup(token);
578
579 /* Sanity check that there are no fields after that. */
580 token = strtok_r(NULL, ":", &saveptr);
581 if (token)
582 continue;
583
584 found = true;
585 }
ea918412 586
3fa23ac3
CB
587 free(line);
588 fclose(pipe_f);
589
590 ret = wait_for_pid(pid);
591 if (ret < 0) {
592 free(result);
593 return NULL;
594 }
595
596 if (!found) {
597 free(result);
598 return NULL;
599 }
600
601 return result;
905022f7 602}
cb3e61fa 603
6f4f1937 604static void lxc_attach_get_init_uidgid(uid_t *init_uid, gid_t *init_gid)
cb3e61fa
CS
605{
606 FILE *proc_file;
8ce83369
CB
607 char proc_fn[__PROC_STATUS_LEN];
608 int ret;
cb3e61fa
CS
609 char *line = NULL;
610 size_t line_bufsz = 0;
cb3e61fa
CS
611 long value = -1;
612 uid_t uid = (uid_t)-1;
613 gid_t gid = (gid_t)-1;
614
7fb45c93
CB
615 ret = snprintf(proc_fn, __PROC_STATUS_LEN, "/proc/%d/status", 1);
616 if (ret < 0 || ret >= __PROC_STATUS_LEN)
617 return;
cb3e61fa
CS
618
619 proc_file = fopen(proc_fn, "r");
620 if (!proc_file)
621 return;
622
623 while (getline(&line, &line_bufsz, proc_file) != -1) {
8ce83369
CB
624 /* Format is: real, effective, saved set user, fs we only care
625 * about real uid.
cb3e61fa
CS
626 */
627 ret = sscanf(line, "Uid: %ld", &value);
8ce83369 628 if (ret != EOF && ret == 1) {
6f4f1937 629 uid = (uid_t)value;
cb3e61fa
CS
630 } else {
631 ret = sscanf(line, "Gid: %ld", &value);
8ce83369 632 if (ret != EOF && ret == 1)
6f4f1937 633 gid = (gid_t)value;
cb3e61fa 634 }
ea918412 635
cb3e61fa
CS
636 if (uid != (uid_t)-1 && gid != (gid_t)-1)
637 break;
638 }
639
640 fclose(proc_file);
641 free(line);
642
8ce83369 643 /* Only override arguments if we found something. */
cb3e61fa
CS
644 if (uid != (uid_t)-1)
645 *init_uid = uid;
ea918412 646
cb3e61fa
CS
647 if (gid != (gid_t)-1)
648 *init_gid = gid;
649
650 /* TODO: we should also parse supplementary groups and use
8ce83369
CB
651 * setgroups() to set them.
652 */
cb3e61fa 653}
9c4693b8 654
8ce83369 655/* Help the optimizer along if it doesn't know that exit always exits. */
6f4f1937
CB
656#define rexit(c) \
657 do { \
658 int __c = (c); \
659 _exit(__c); \
660 return __c; \
661 } while (0)
9c4693b8 662
8ce83369 663/* Define default options if no options are supplied by the user. */
9c4693b8
CS
664static lxc_attach_options_t attach_static_default_options = LXC_ATTACH_OPTIONS_DEFAULT;
665
d4db3d14 666static bool fetch_seccomp(struct lxc_container *c, lxc_attach_options_t *options)
2c4ea790 667{
d4db3d14
CB
668 int ret;
669 bool bret;
bd7b4e28 670 char *path;
2eef2bda 671
6f4f1937
CB
672 if (!(options->namespaces & CLONE_NEWNS) ||
673 !(options->attach_flags & LXC_ATTACH_LSM)) {
bd4307f0
CB
674 free(c->lxc_conf->seccomp);
675 c->lxc_conf->seccomp = NULL;
2c4ea790 676 return true;
bd4307f0 677 }
bd7b4e28 678
2e812c16 679 /* Remove current setting. */
d4db3d14 680 if (!c->set_config_item(c, "lxc.seccomp.profile", "") &&
ea918412 681 !c->set_config_item(c, "lxc.seccomp", ""))
2c4ea790 682 return false;
bd7b4e28 683
8ce83369 684 /* Fetch the current profile path over the cmd interface. */
0b427da0 685 path = c->get_running_config_item(c, "lxc.seccomp.profile");
bd7b4e28 686 if (!path) {
d4db3d14 687 INFO("Failed to retrieve lxc.seccomp.profile");
ea918412 688
0b427da0 689 path = c->get_running_config_item(c, "lxc.seccomp");
d4db3d14
CB
690 if (!path) {
691 INFO("Failed to retrieve lxc.seccomp");
692 return true;
693 }
bd7b4e28
SG
694 }
695
8ce83369 696 /* Copy the value into the new lxc_conf. */
d4db3d14 697 bret = c->set_config_item(c, "lxc.seccomp.profile", path);
bd7b4e28 698 free(path);
d4db3d14
CB
699 if (!bret)
700 return false;
bd7b4e28 701
8ce83369 702 /* Attempt to parse the resulting config. */
d4db3d14
CB
703 ret = lxc_read_seccomp_config(c->lxc_conf);
704 if (ret < 0) {
705 ERROR("Failed to retrieve seccomp policy");
2c4ea790
SH
706 return false;
707 }
708
d4db3d14 709 INFO("Retrieved seccomp policy");
2e812c16
CB
710 return true;
711}
712
6f4f1937 713static bool no_new_privs(struct lxc_container *c, lxc_attach_options_t *options)
2e812c16 714{
bcbef733 715 bool bret;
2e812c16
CB
716 char *val;
717
2e812c16 718 /* Remove current setting. */
bcbef733
CB
719 if (!c->set_config_item(c, "lxc.no_new_privs", "")) {
720 INFO("Failed to unset lxc.no_new_privs");
2e812c16 721 return false;
bcbef733 722 }
2e812c16
CB
723
724 /* Retrieve currently active setting. */
725 val = c->get_running_config_item(c, "lxc.no_new_privs");
726 if (!val) {
bcbef733 727 INFO("Failed to retrieve lxc.no_new_privs");
2e812c16
CB
728 return false;
729 }
730
731 /* Set currently active setting. */
bcbef733 732 bret = c->set_config_item(c, "lxc.no_new_privs", val);
2e812c16 733 free(val);
bcbef733 734 return bret;
2c4ea790
SH
735}
736
9b8e3c96
SH
737static signed long get_personality(const char *name, const char *lxcpath)
738{
6f4f1937 739 char *p;
9b8e3c96
SH
740 signed long ret;
741
6f4f1937 742 p = lxc_cmd_get_config_item(name, "lxc.arch", lxcpath);
9b8e3c96
SH
743 if (!p)
744 return -1;
6f4f1937 745
9b8e3c96
SH
746 ret = lxc_config_parse_arch(p);
747 free(p);
6f4f1937 748
9b8e3c96
SH
749 return ret;
750}
751
a998454a
CB
752struct attach_clone_payload {
753 int ipc_socket;
9e84479f 754 int terminal_slave_fd;
a998454a
CB
755 lxc_attach_options_t *options;
756 struct lxc_proc_context_info *init_ctx;
757 lxc_attach_exec_t exec_function;
758 void *exec_payload;
759};
760
ba2be1a8
CB
761static void lxc_put_attach_clone_payload(struct attach_clone_payload *p)
762{
763 if (p->ipc_socket >= 0) {
764 shutdown(p->ipc_socket, SHUT_RDWR);
765 close(p->ipc_socket);
766 p->ipc_socket = -EBADF;
767 }
768
9e84479f
CB
769 if (p->terminal_slave_fd >= 0) {
770 close(p->terminal_slave_fd);
771 p->terminal_slave_fd = -EBADF;
ba2be1a8
CB
772 }
773
b21da190 774 if (p->init_ctx) {
ba2be1a8 775 lxc_proc_put_context_info(p->init_ctx);
b21da190
CB
776 p->init_ctx = NULL;
777 }
ba2be1a8
CB
778}
779
a998454a
CB
780static int attach_child_main(struct attach_clone_payload *payload)
781{
57de839f 782 int fd, lsm_fd, ret;
a998454a
CB
783 uid_t new_uid;
784 gid_t new_gid;
a998454a
CB
785 lxc_attach_options_t* options = payload->options;
786 struct lxc_proc_context_info* init_ctx = payload->init_ctx;
57de839f
CB
787 bool needs_lsm = (options->namespaces & CLONE_NEWNS) &&
788 (options->attach_flags & LXC_ATTACH_LSM) &&
789 init_ctx->lsm_label;
a998454a
CB
790
791 /* A description of the purpose of this functionality is provided in the
792 * lxc-attach(1) manual page. We have to remount here and not in the
793 * parent process, otherwise /proc may not properly reflect the new pid
794 * namespace.
795 */
796 if (!(options->namespaces & CLONE_NEWNS) &&
797 (options->attach_flags & LXC_ATTACH_REMOUNT_PROC_SYS)) {
798 ret = lxc_attach_remount_sys_proc();
b75c344c
CB
799 if (ret < 0)
800 goto on_error;
ea918412 801
b75c344c 802 TRACE("Remounted \"/proc\" and \"/sys\"");
a998454a
CB
803 }
804
b75c344c 805/* Now perform additional attachments. */
a998454a 806#if HAVE_SYS_PERSONALITY_H
a998454a 807 if (options->attach_flags & LXC_ATTACH_SET_PERSONALITY) {
b75c344c
CB
808 long new_personality;
809
810 if (options->personality < 0)
811 new_personality = init_ctx->personality;
812 else
813 new_personality = options->personality;
ea918412 814
a998454a 815 ret = personality(new_personality);
b75c344c
CB
816 if (ret < 0)
817 goto on_error;
ea918412 818
b75c344c 819 TRACE("Set new personality");
a998454a
CB
820 }
821#endif
822
823 if (options->attach_flags & LXC_ATTACH_DROP_CAPABILITIES) {
824 ret = lxc_attach_drop_privs(init_ctx);
b75c344c
CB
825 if (ret < 0)
826 goto on_error;
ea918412 827
b75c344c 828 TRACE("Dropped capabilities");
a998454a
CB
829 }
830
831 /* Always set the environment (specify (LXC_ATTACH_KEEP_ENV, NULL, NULL)
832 * if you want this to be a no-op).
833 */
7385273f 834 ret = lxc_attach_set_environment(init_ctx,
835 options->env_policy,
a998454a
CB
836 options->extra_env_vars,
837 options->extra_keep_env);
b75c344c
CB
838 if (ret < 0)
839 goto on_error;
ea918412 840
b75c344c 841 TRACE("Set up environment");
a998454a 842
57de839f
CB
843 /* This remark only affects fully unprivileged containers:
844 * Receive fd for LSM security module before we set{g,u}id(). The reason
845 * is that on set{g,u}id() the kernel will a) make us undumpable and b)
846 * we will change our effective uid. This means our effective uid will
847 * be different from the effective uid of the process that created us
848 * which means that this processs no longer has capabilities in our
849 * namespace including CAP_SYS_PTRACE. This means we will not be able to
850 * read and /proc/<pid> files for the process anymore when /proc is
851 * mounted with hidepid={1,2}. So let's get the lsm label fd before the
852 * set{g,u}id().
853 */
854 if (needs_lsm) {
b75c344c 855 ret = lxc_abstract_unix_recv_fds(payload->ipc_socket, &lsm_fd, 1, NULL, 0);
9044b79e 856 if (ret <= 0) {
857 if (ret < 0)
858 SYSERROR("Failed to receive lsm label fd");
859
b75c344c 860 goto on_error;
9044b79e 861 }
862
57de839f
CB
863 TRACE("Received LSM label file descriptor %d from parent", lsm_fd);
864 }
865
08ea9270 866 if (options->stdin_fd > 0 && isatty(options->stdin_fd)) {
cd0a2b2f 867 ret = lxc_make_controlling_terminal(options->stdin_fd);
08ea9270
CB
868 if (ret < 0)
869 goto on_error;
870 }
871
a998454a
CB
872 /* Set {u,g}id. */
873 new_uid = 0;
874 new_gid = 0;
ea918412 875
a998454a
CB
876 /* Ignore errors, we will fall back to root in that case (/proc was not
877 * mounted etc.).
878 */
879 if (options->namespaces & CLONE_NEWUSER)
880 lxc_attach_get_init_uidgid(&new_uid, &new_gid);
881
882 if (options->uid != (uid_t)-1)
883 new_uid = options->uid;
ea918412 884
a998454a
CB
885 if (options->gid != (gid_t)-1)
886 new_gid = options->gid;
887
a998454a 888 /* Try to set the {u,g}id combination. */
b75c344c
CB
889 if (new_uid != 0 || new_gid != 0 || options->namespaces & CLONE_NEWUSER) {
890 ret = lxc_switch_uid_gid(new_uid, new_gid);
891 if (ret < 0)
892 goto on_error;
a998454a
CB
893 }
894
24927339 895 ret = lxc_setgroups(0, NULL);
96ec54ac 896 if (ret < 0 && errno != EPERM)
24927339
CB
897 goto on_error;
898
a998454a
CB
899 if ((init_ctx->container && init_ctx->container->lxc_conf &&
900 init_ctx->container->lxc_conf->no_new_privs) ||
901 (options->attach_flags & LXC_ATTACH_NO_NEW_PRIVS)) {
b75c344c
CB
902 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
903 if (ret < 0)
904 goto on_error;
ea918412 905
b75c344c 906 TRACE("Set PR_SET_NO_NEW_PRIVS");
a998454a
CB
907 }
908
57de839f 909 if (needs_lsm) {
d3ba7c98 910 bool on_exec;
a998454a
CB
911
912 /* Change into our new LSM profile. */
d3ba7c98 913 on_exec = options->attach_flags & LXC_ATTACH_LSM_EXEC ? true : false;
ea918412 914
d3ba7c98 915 ret = lsm_process_label_set_at(lsm_fd, init_ctx->lsm_label, on_exec);
57de839f 916 close(lsm_fd);
b75c344c
CB
917 if (ret < 0)
918 goto on_error;
ea918412 919
d3ba7c98 920 TRACE("Set %s LSM label to \"%s\"", lsm_name(), init_ctx->lsm_label);
a998454a
CB
921 }
922
923 if (init_ctx->container && init_ctx->container->lxc_conf &&
b75c344c
CB
924 init_ctx->container->lxc_conf->seccomp) {
925 ret = lxc_seccomp_load(init_ctx->container->lxc_conf);
926 if (ret < 0)
927 goto on_error;
ea918412 928
b75c344c 929 TRACE("Loaded seccomp profile");
a998454a 930 }
ea918412 931
b75c344c
CB
932 shutdown(payload->ipc_socket, SHUT_RDWR);
933 close(payload->ipc_socket);
ba2be1a8
CB
934 payload->ipc_socket = -EBADF;
935 lxc_proc_put_context_info(init_ctx);
d35b372a 936 payload->init_ctx = NULL;
a998454a
CB
937
938 /* The following is done after the communication socket is shut down.
939 * That way, all errors that might (though unlikely) occur up until this
940 * point will have their messages printed to the original stderr (if
941 * logging is so configured) and not the fd the user supplied, if any.
942 */
943
944 /* Fd handling for stdin, stdout and stderr; ignore errors here, user
945 * may want to make sure the fds are closed, for example.
946 */
08ea9270 947 if (options->stdin_fd >= 0 && options->stdin_fd != STDIN_FILENO)
00c72a93 948 (void)dup2(options->stdin_fd, STDIN_FILENO);
08ea9270
CB
949
950 if (options->stdout_fd >= 0 && options->stdout_fd != STDOUT_FILENO)
00c72a93 951 (void)dup2(options->stdout_fd, STDOUT_FILENO);
08ea9270
CB
952
953 if (options->stderr_fd >= 0 && options->stderr_fd != STDERR_FILENO)
00c72a93 954 (void)dup2(options->stderr_fd, STDERR_FILENO);
a998454a
CB
955
956 /* close the old fds */
08ea9270 957 if (options->stdin_fd > STDERR_FILENO)
a998454a 958 close(options->stdin_fd);
08ea9270
CB
959
960 if (options->stdout_fd > STDERR_FILENO)
a998454a 961 close(options->stdout_fd);
08ea9270
CB
962
963 if (options->stderr_fd > STDERR_FILENO)
a998454a
CB
964 close(options->stderr_fd);
965
966 /* Try to remove FD_CLOEXEC flag from stdin/stdout/stderr, but also
967 * here, ignore errors.
968 */
b75c344c 969 for (fd = STDIN_FILENO; fd <= STDERR_FILENO; fd++) {
3f62938a 970 ret = fd_cloexec(fd, false);
b75c344c
CB
971 if (ret < 0) {
972 SYSERROR("Failed to clear FD_CLOEXEC from file descriptor %d", fd);
973 goto on_error;
974 }
a998454a
CB
975 }
976
9e84479f
CB
977 if (options->attach_flags & LXC_ATTACH_TERMINAL) {
978 ret = lxc_terminal_prepare_login(payload->terminal_slave_fd);
ba2be1a8 979 if (ret < 0) {
9e84479f 980 SYSERROR("Failed to prepare terminal file descriptor %d", payload->terminal_slave_fd);
ba2be1a8
CB
981 goto on_error;
982 }
ea918412 983
9e84479f 984 TRACE("Prepared terminal file descriptor %d", payload->terminal_slave_fd);
ba2be1a8
CB
985 }
986
a998454a
CB
987 /* We're done, so we can now do whatever the user intended us to do. */
988 rexit(payload->exec_function(payload->exec_payload));
b75c344c
CB
989
990on_error:
ba2be1a8 991 lxc_put_attach_clone_payload(payload);
b75c344c 992 rexit(EXIT_FAILURE);
a998454a
CB
993}
994
9e84479f
CB
995static int lxc_attach_terminal(struct lxc_conf *conf,
996 struct lxc_terminal *terminal)
ba2be1a8
CB
997{
998 int ret;
999
9e84479f 1000 lxc_terminal_init(terminal);
ba2be1a8 1001
9e84479f 1002 ret = lxc_terminal_create(terminal);
ba2be1a8 1003 if (ret < 0) {
ea918412 1004 ERROR("Failed to create terminal");
ba2be1a8
CB
1005 return -1;
1006 }
1007
1008 /* Shift ttys to container. */
9e84479f 1009 ret = lxc_terminal_map_ids(conf, terminal);
ba2be1a8 1010 if (ret < 0) {
9e84479f 1011 ERROR("Failed to chown terminal");
ba2be1a8
CB
1012 goto on_error;
1013 }
1014
1015 return 0;
1016
1017on_error:
9e84479f
CB
1018 lxc_terminal_delete(terminal);
1019 lxc_terminal_conf_free(terminal);
ba2be1a8
CB
1020 return -1;
1021}
1022
9e84479f
CB
1023static int lxc_attach_terminal_mainloop_init(struct lxc_terminal *terminal,
1024 struct lxc_epoll_descr *descr)
ba2be1a8
CB
1025{
1026 int ret;
1027
1028 ret = lxc_mainloop_open(descr);
1029 if (ret < 0) {
1030 ERROR("Failed to create mainloop");
1031 return -1;
1032 }
1033
9e84479f 1034 ret = lxc_terminal_mainloop_add(descr, terminal);
ba2be1a8
CB
1035 if (ret < 0) {
1036 ERROR("Failed to add handlers to mainloop");
1037 lxc_mainloop_close(descr);
1038 return -1;
1039 }
1040
1041 return 0;
1042}
1043
9e84479f 1044static inline void lxc_attach_terminal_close_master(struct lxc_terminal *terminal)
ba2be1a8 1045{
9e84479f 1046 if (terminal->master < 0)
ba2be1a8
CB
1047 return;
1048
9e84479f
CB
1049 close(terminal->master);
1050 terminal->master = -EBADF;
ba2be1a8
CB
1051}
1052
9e84479f 1053static inline void lxc_attach_terminal_close_slave(struct lxc_terminal *terminal)
ba2be1a8 1054{
9e84479f 1055 if (terminal->slave < 0)
ba2be1a8
CB
1056 return;
1057
9e84479f
CB
1058 close(terminal->slave);
1059 terminal->slave = -EBADF;
ba2be1a8
CB
1060}
1061
9e84479f 1062static inline void lxc_attach_terminal_close_peer(struct lxc_terminal *terminal)
ba2be1a8 1063{
9e84479f 1064 if (terminal->peer < 0)
ba2be1a8
CB
1065 return;
1066
9e84479f
CB
1067 close(terminal->peer);
1068 terminal->peer = -EBADF;
ba2be1a8
CB
1069}
1070
9e84479f 1071static inline void lxc_attach_terminal_close_log(struct lxc_terminal *terminal)
ba2be1a8 1072{
9e84479f 1073 if (terminal->log_fd < 0)
ba2be1a8
CB
1074 return;
1075
9e84479f
CB
1076 close(terminal->log_fd);
1077 terminal->log_fd = -EBADF;
ba2be1a8
CB
1078}
1079
6f4f1937
CB
1080int lxc_attach(const char *name, const char *lxcpath,
1081 lxc_attach_exec_t exec_function, void *exec_payload,
1082 lxc_attach_options_t *options, pid_t *attached_process)
9c4693b8 1083{
877f3a04 1084 int i, ret, status;
9c4693b8 1085 int ipc_sockets[2];
6f4f1937 1086 char *cwd, *new_cwd;
9b8e3c96 1087 signed long personality;
ba2be1a8 1088 pid_t attached_pid, init_pid, pid;
6f4f1937 1089 struct lxc_proc_context_info *init_ctx;
9e84479f 1090 struct lxc_terminal terminal;
1cce35e6 1091 struct lxc_conf *conf;
a998454a 1092 struct attach_clone_payload payload = {0};
9c4693b8 1093
877f3a04
CB
1094 ret = access("/proc/self/ns", X_OK);
1095 if (ret) {
ea918412 1096 SYSERROR("Does this kernel version support namespaces?");
877f3a04
CB
1097 return -1;
1098 }
1099
9c4693b8
CS
1100 if (!options)
1101 options = &attach_static_default_options;
1102
1103 init_pid = lxc_cmd_get_init_pid(name, lxcpath);
1104 if (init_pid < 0) {
ae026f55 1105 ERROR("Failed to get init pid");
9c4693b8
CS
1106 return -1;
1107 }
1108
1109 init_ctx = lxc_proc_get_context_info(init_pid);
1110 if (!init_ctx) {
6f4f1937 1111 ERROR("Failed to get context of init process: %ld", (long)init_pid);
9c4693b8
CS
1112 return -1;
1113 }
1114
9b8e3c96
SH
1115 personality = get_personality(name, lxcpath);
1116 if (init_ctx->personality < 0) {
6f4f1937 1117 ERROR("Failed to get personality of the container");
9b8e3c96
SH
1118 lxc_proc_put_context_info(init_ctx);
1119 return -1;
1120 }
1121 init_ctx->personality = personality;
1122
ff07d7bb 1123 init_ctx->container = lxc_container_new(name, lxcpath);
62de1db6
CB
1124 if (!init_ctx->container) {
1125 lxc_proc_put_context_info(init_ctx);
ff07d7bb 1126 return -1;
62de1db6 1127 }
ff07d7bb 1128
ba773996
CB
1129 if (!init_ctx->container->lxc_conf) {
1130 init_ctx->container->lxc_conf = lxc_conf_init();
62de1db6
CB
1131 if (!init_ctx->container->lxc_conf) {
1132 lxc_proc_put_context_info(init_ctx);
ea918412 1133 return -1;
62de1db6 1134 }
ba773996 1135 }
1cce35e6 1136 conf = init_ctx->container->lxc_conf;
ba773996 1137
bd4307f0 1138 if (!fetch_seccomp(init_ctx->container, options))
ae026f55 1139 WARN("Failed to get seccomp policy");
2c4ea790 1140
bd4307f0 1141 if (!no_new_privs(init_ctx->container, options))
ae026f55 1142 WARN("Could not determine whether PR_SET_NO_NEW_PRIVS is set");
2e812c16 1143
9c4693b8
CS
1144 cwd = getcwd(NULL, 0);
1145
8ce83369
CB
1146 /* Determine which namespaces the container was created with
1147 * by asking lxc-start, if necessary.
9c4693b8
CS
1148 */
1149 if (options->namespaces == -1) {
1150 options->namespaces = lxc_cmd_get_clone_flags(name, lxcpath);
1151 /* call failed */
1152 if (options->namespaces == -1) {
8ce83369 1153 ERROR("Failed to automatically determine the "
877f3a04 1154 "namespaces which the container uses");
9c4693b8 1155 free(cwd);
fe4de9a6 1156 lxc_proc_put_context_info(init_ctx);
9c4693b8
CS
1157 return -1;
1158 }
877f3a04
CB
1159
1160 for (i = 0; i < LXC_NS_MAX; i++) {
1161 if (ns_info[i].clone_flag & CLONE_NEWCGROUP)
1162 if (!(options->attach_flags & LXC_ATTACH_MOVE_TO_CGROUP) ||
1163 !cgns_supported())
1164 continue;
1165
1166 if (ns_info[i].clone_flag & options->namespaces)
1167 continue;
1168
1169 init_ctx->ns_inherited |= ns_info[i].clone_flag;
1170 }
1171 }
1172
0059379f 1173 pid = lxc_raw_getpid();
ea918412 1174
877f3a04 1175 for (i = 0; i < LXC_NS_MAX; i++) {
ea918412 1176 int j;
877f3a04
CB
1177
1178 if (options->namespaces & ns_info[i].clone_flag)
1179 init_ctx->ns_fd[i] = lxc_preserve_ns(init_pid, ns_info[i].proc_name);
1180 else if (init_ctx->ns_inherited & ns_info[i].clone_flag)
1181 init_ctx->ns_fd[i] = in_same_namespace(pid, init_pid, ns_info[i].proc_name);
1182 else
1183 continue;
ea918412 1184
877f3a04
CB
1185 if (init_ctx->ns_fd[i] >= 0)
1186 continue;
1187
1188 if (init_ctx->ns_fd[i] == -EINVAL) {
1189 DEBUG("Inheriting %s namespace from %d",
1190 ns_info[i].proc_name, pid);
1191 init_ctx->ns_inherited &= ~ns_info[i].clone_flag;
1192 continue;
1193 }
1194
1195 /* We failed to preserve the namespace. */
ea918412 1196 SYSERROR("Failed to attach to %s namespace of %d",
1197 ns_info[i].proc_name, pid);
1198
877f3a04
CB
1199 /* Close all already opened file descriptors before we return an
1200 * error, so we don't leak them.
1201 */
1202 for (j = 0; j < i; j++)
1203 close(init_ctx->ns_fd[j]);
1204
877f3a04
CB
1205 free(cwd);
1206 lxc_proc_put_context_info(init_ctx);
1207 return -1;
9c4693b8
CS
1208 }
1209
9e84479f
CB
1210 if (options->attach_flags & LXC_ATTACH_TERMINAL) {
1211 ret = lxc_attach_terminal(conf, &terminal);
ba2be1a8 1212 if (ret < 0) {
9e84479f 1213 ERROR("Failed to setup new terminal");
ba2be1a8
CB
1214 free(cwd);
1215 lxc_proc_put_context_info(init_ctx);
1216 return -1;
1217 }
1218
9e84479f 1219 terminal.log_fd = options->log_fd;
c948657b 1220 } else {
9e84479f 1221 lxc_terminal_init(&terminal);
ba2be1a8
CB
1222 }
1223
8ce83369
CB
1224 /* Create a socket pair for IPC communication; set SOCK_CLOEXEC in order
1225 * to make sure we don't irritate other threads that want to fork+exec
1226 * away
9c4693b8
CS
1227 *
1228 * IMPORTANT: if the initial process is multithreaded and another call
1229 * just fork()s away without exec'ing directly after, the socket fd will
1230 * exist in the forked process from the other thread and any close() in
8ce83369
CB
1231 * our own child process will not really cause the socket to close
1232 * properly, potentiall causing the parent to hang.
9c4693b8
CS
1233 *
1234 * For this reason, while IPC is still active, we have to use shutdown()
8ce83369
CB
1235 * if the child exits prematurely in order to signal that the socket is
1236 * closed and cannot assume that the child exiting will automatically do
1237 * that.
9c4693b8
CS
1238 *
1239 * IPC mechanism: (X is receiver)
1240 * initial process intermediate attached
1241 * X <--- send pid of
1242 * attached proc,
1243 * then exit
1244 * send 0 ------------------------------------> X
1245 * [do initialization]
1246 * X <------------------------------------ send 1
1247 * [add to cgroup, ...]
1248 * send 2 ------------------------------------> X
81f466d0
CB
1249 * [set LXC_ATTACH_NO_NEW_PRIVS]
1250 * X <------------------------------------ send 3
1251 * [open LSM label fd]
1252 * send 4 ------------------------------------> X
1253 * [set LSM label]
9c4693b8
CS
1254 * close socket close socket
1255 * run program
1256 */
1257 ret = socketpair(PF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
1258 if (ret < 0) {
ae026f55 1259 SYSERROR("Could not set up required IPC mechanism for attaching");
9c4693b8 1260 free(cwd);
fe4de9a6 1261 lxc_proc_put_context_info(init_ctx);
9c4693b8
CS
1262 return -1;
1263 }
1264
e3f0e436
CB
1265 /* Create intermediate subprocess, two reasons:
1266 * 1. We can't setns() in the child itself, since we want to make
8ce83369 1267 * sure we are properly attached to the pidns.
e3f0e436 1268 * 2. Also, the initial thread has to put the attached process
8ce83369
CB
1269 * into the cgroup, which we can only do if we didn't already
1270 * setns() (otherwise, user namespaces will hate us).
9c4693b8
CS
1271 */
1272 pid = fork();
9c4693b8 1273 if (pid < 0) {
ae026f55 1274 SYSERROR("Failed to create first subprocess");
9c4693b8 1275 free(cwd);
fe4de9a6 1276 lxc_proc_put_context_info(init_ctx);
9c4693b8
CS
1277 return -1;
1278 }
1279
1280 if (pid) {
ba2be1a8 1281 int ret_parent = -1;
9c4693b8 1282 pid_t to_cleanup_pid = pid;
ba2be1a8 1283 struct lxc_epoll_descr descr = {0};
9c4693b8 1284
ba2be1a8 1285 /* close unneeded file descriptors */
9c4693b8
CS
1286 close(ipc_sockets[1]);
1287 free(cwd);
ba2be1a8 1288 lxc_proc_close_ns_fd(init_ctx);
9e84479f
CB
1289 if (options->attach_flags & LXC_ATTACH_TERMINAL)
1290 lxc_attach_terminal_close_slave(&terminal);
9c4693b8 1291
8ce83369 1292 /* Attach to cgroup, if requested. */
f4364484 1293 if (options->attach_flags & LXC_ATTACH_MOVE_TO_CGROUP) {
2202afc9
CB
1294 struct cgroup_ops *cgroup_ops;
1295
1296 cgroup_ops = cgroup_init(NULL);
1297 if (!cgroup_ops)
1298 goto on_error;
1299
1300 if (!cgroup_ops->attach(cgroup_ops, name, lxcpath, pid))
8ce83369 1301 goto on_error;
2202afc9
CB
1302
1303 cgroup_exit(cgroup_ops);
1304 TRACE("Moved intermediate process %d into container's cgroups", pid);
f4364484
SG
1305 }
1306
bb2ada6f 1307 /* Setup /proc limits */
1cce35e6
CB
1308 if (!lxc_list_empty(&conf->procs)) {
1309 ret = setup_proc_filesystem(&conf->procs, pid);
bb2ada6f
CB
1310 if (ret < 0)
1311 goto on_error;
1312 }
1313
c6d09e15 1314 /* Setup resource limits */
1cce35e6
CB
1315 if (!lxc_list_empty(&conf->limits)) {
1316 ret = setup_resource_limits(&conf->limits, pid);
ba2be1a8
CB
1317 if (ret < 0)
1318 goto on_error;
1319 }
1320
9e84479f
CB
1321 if (options->attach_flags & LXC_ATTACH_TERMINAL) {
1322 ret = lxc_attach_terminal_mainloop_init(&terminal, &descr);
ba2be1a8 1323 if (ret < 0)
6f4f1937 1324 goto on_error;
ea918412 1325
9e84479f 1326 TRACE("Initialized terminal mainloop");
ba2be1a8 1327 }
c6d09e15 1328
8ce83369 1329 /* Let the child process know to go ahead. */
f4364484
SG
1330 status = 0;
1331 ret = lxc_write_nointr(ipc_sockets[0], &status, sizeof(status));
94ac256f 1332 if (ret != sizeof(status))
ba2be1a8 1333 goto close_mainloop;
ea918412 1334
94ac256f 1335 TRACE("Told intermediate process to start initializing");
f4364484 1336
8ce83369 1337 /* Get pid of attached process from intermediate process. */
94ac256f
CB
1338 ret = lxc_read_nointr(ipc_sockets[0], &attached_pid, sizeof(attached_pid));
1339 if (ret != sizeof(attached_pid))
ba2be1a8 1340 goto close_mainloop;
ea918412 1341
94ac256f 1342 TRACE("Received pid %d of attached process in parent pid namespace", attached_pid);
9c4693b8 1343
8ce83369 1344 /* Ignore SIGKILL (CTRL-C) and SIGQUIT (CTRL-\) - issue #313. */
62183f1a
SH
1345 if (options->stdin_fd == 0) {
1346 signal(SIGINT, SIG_IGN);
1347 signal(SIGQUIT, SIG_IGN);
1348 }
2eef2bda 1349
8ce83369 1350 /* Reap intermediate process. */
9c4693b8
CS
1351 ret = wait_for_pid(pid);
1352 if (ret < 0)
ba2be1a8 1353 goto close_mainloop;
ea918412 1354
94ac256f 1355 TRACE("Intermediate process %d exited", pid);
9c4693b8 1356
8ce83369 1357 /* We will always have to reap the attached process now. */
9c4693b8
CS
1358 to_cleanup_pid = attached_pid;
1359
81f466d0 1360 /* Open LSM fd and send it to child. */
6f4f1937
CB
1361 if ((options->namespaces & CLONE_NEWNS) &&
1362 (options->attach_flags & LXC_ATTACH_LSM) &&
1363 init_ctx->lsm_label) {
94ac256f 1364 int ret = -1;
47ce2cb7
CB
1365 int labelfd;
1366 bool on_exec;
6f4f1937 1367
47ce2cb7
CB
1368 on_exec = options->attach_flags & LXC_ATTACH_LSM_EXEC ? true : false;
1369 labelfd = lsm_process_label_fd_get(attached_pid, on_exec);
81f466d0 1370 if (labelfd < 0)
ba2be1a8 1371 goto close_mainloop;
ea918412 1372
94ac256f 1373 TRACE("Opened LSM label file descriptor %d", labelfd);
81f466d0
CB
1374
1375 /* Send child fd of the LSM security module to write to. */
ae467c54 1376 ret = lxc_abstract_unix_send_fds(ipc_sockets[0], &labelfd, 1, NULL, 0);
81f466d0 1377 if (ret <= 0) {
9044b79e 1378 if (ret < 0)
1379 SYSERROR("Failed to send lsm label fd");
1380
1381 close(labelfd);
ba2be1a8 1382 goto close_mainloop;
81f466d0 1383 }
9044b79e 1384
1385 close(labelfd);
94ac256f 1386 TRACE("Sent LSM label file descriptor %d to child", labelfd);
81f466d0
CB
1387 }
1388
8ce83369
CB
1389 /* We're done, the child process should now execute whatever it
1390 * is that the user requested. The parent can now track it with
1391 * waitpid() or similar.
9c4693b8
CS
1392 */
1393
1394 *attached_process = attached_pid;
9c4693b8 1395
ba2be1a8 1396 /* Now shut down communication with child, we're done. */
9c4693b8
CS
1397 shutdown(ipc_sockets[0], SHUT_RDWR);
1398 close(ipc_sockets[0]);
ba2be1a8
CB
1399 ipc_sockets[0] = -1;
1400
1401 ret_parent = 0;
1402 to_cleanup_pid = -1;
ea918412 1403
9e84479f 1404 if (options->attach_flags & LXC_ATTACH_TERMINAL) {
ba2be1a8
CB
1405 ret = lxc_mainloop(&descr, -1);
1406 if (ret < 0) {
1407 ret_parent = -1;
1408 to_cleanup_pid = attached_pid;
1409 }
1410 }
1411
1412 close_mainloop:
9e84479f 1413 if (options->attach_flags & LXC_ATTACH_TERMINAL)
ba2be1a8
CB
1414 lxc_mainloop_close(&descr);
1415
1416 on_error:
1417 if (ipc_sockets[0] >= 0) {
1418 shutdown(ipc_sockets[0], SHUT_RDWR);
1419 close(ipc_sockets[0]);
1420 }
1421
1422 if (to_cleanup_pid > 0)
6f4f1937 1423 (void)wait_for_pid(to_cleanup_pid);
ba2be1a8 1424
9e84479f
CB
1425 if (options->attach_flags & LXC_ATTACH_TERMINAL) {
1426 lxc_terminal_delete(&terminal);
1427 lxc_terminal_conf_free(&terminal);
ba2be1a8 1428 }
ea918412 1429
fe4de9a6 1430 lxc_proc_put_context_info(init_ctx);
ba2be1a8 1431 return ret_parent;
9c4693b8
CS
1432 }
1433
ba2be1a8 1434 /* close unneeded file descriptors */
9c4693b8 1435 close(ipc_sockets[0]);
ba2be1a8 1436 ipc_sockets[0] = -EBADF;
ea918412 1437
9e84479f
CB
1438 if (options->attach_flags & LXC_ATTACH_TERMINAL) {
1439 lxc_attach_terminal_close_master(&terminal);
1440 lxc_attach_terminal_close_peer(&terminal);
1441 lxc_attach_terminal_close_log(&terminal);
ba2be1a8 1442 }
9c4693b8 1443
8ce83369 1444 /* Wait for the parent to have setup cgroups. */
94ac256f 1445 ret = lxc_read_nointr(ipc_sockets[1], &status, sizeof(status));
ba2be1a8 1446 if (ret != sizeof(status)) {
f4364484 1447 shutdown(ipc_sockets[1], SHUT_RDWR);
62de1db6 1448 lxc_proc_put_context_info(init_ctx);
f4364484
SG
1449 rexit(-1);
1450 }
ea918412 1451
94ac256f 1452 TRACE("Intermediate process starting to initialize");
f4364484 1453
8ce83369
CB
1454 /* Attach now, create another subprocess later, since pid namespaces
1455 * only really affect the children of the current process.
9c4693b8 1456 */
877f3a04 1457 ret = lxc_attach_to_ns(init_pid, init_ctx);
9c4693b8 1458 if (ret < 0) {
94ac256f 1459 ERROR("Failed to enter namespaces");
9c4693b8 1460 shutdown(ipc_sockets[1], SHUT_RDWR);
62de1db6 1461 lxc_proc_put_context_info(init_ctx);
9c4693b8
CS
1462 rexit(-1);
1463 }
ea918412 1464
877f3a04
CB
1465 /* close namespace file descriptors */
1466 lxc_proc_close_ns_fd(init_ctx);
9c4693b8 1467
8ce83369 1468 /* Attach succeeded, try to cwd. */
9c4693b8
CS
1469 if (options->initial_cwd)
1470 new_cwd = options->initial_cwd;
1471 else
1472 new_cwd = cwd;
d6d979bc
CB
1473 if (new_cwd) {
1474 ret = chdir(new_cwd);
1475 if (ret < 0)
1476 WARN("Could not change directory to \"%s\"", new_cwd);
1477 }
9c4693b8
CS
1478 free(cwd);
1479
a998454a
CB
1480 /* Create attached process. */
1481 payload.ipc_socket = ipc_sockets[1];
1482 payload.options = options;
1483 payload.init_ctx = init_ctx;
9e84479f 1484 payload.terminal_slave_fd = terminal.slave;
a998454a
CB
1485 payload.exec_function = exec_function;
1486 payload.exec_payload = exec_payload;
9c4693b8 1487
a998454a
CB
1488 pid = lxc_raw_clone(CLONE_PARENT);
1489 if (pid < 0) {
94ac256f 1490 SYSERROR("Failed to clone attached process");
9c4693b8 1491 shutdown(ipc_sockets[1], SHUT_RDWR);
62de1db6 1492 lxc_proc_put_context_info(init_ctx);
9c4693b8
CS
1493 rexit(-1);
1494 }
a998454a
CB
1495
1496 if (pid == 0) {
1497 ret = attach_child_main(&payload);
1498 if (ret < 0)
1499 ERROR("Failed to exec");
ea918412 1500
a998454a
CB
1501 _exit(EXIT_FAILURE);
1502 }
ea918412 1503
9e84479f
CB
1504 if (options->attach_flags & LXC_ATTACH_TERMINAL)
1505 lxc_attach_terminal_close_slave(&terminal);
9c4693b8 1506
8ce83369 1507 /* Tell grandparent the pid of the pid of the newly created child. */
9c4693b8
CS
1508 ret = lxc_write_nointr(ipc_sockets[1], &pid, sizeof(pid));
1509 if (ret != sizeof(pid)) {
8ce83369
CB
1510 /* If this really happens here, this is very unfortunate, since
1511 * the parent will not know the pid of the attached process and
1512 * will not be able to wait for it (and we won't either due to
1513 * CLONE_PARENT) so the parent won't be able to reap it and the
1514 * attached process will remain a zombie.
9c4693b8 1515 */
9c4693b8 1516 shutdown(ipc_sockets[1], SHUT_RDWR);
62de1db6 1517 lxc_proc_put_context_info(init_ctx);
9c4693b8
CS
1518 rexit(-1);
1519 }
ea918412 1520
94ac256f 1521 TRACE("Sending pid %d of attached process", pid);
9c4693b8 1522
8ce83369 1523 /* The rest is in the hands of the initial and the attached process. */
62de1db6 1524 lxc_proc_put_context_info(init_ctx);
9c4693b8
CS
1525 rexit(0);
1526}
1527
9c4693b8
CS
1528int lxc_attach_run_command(void* payload)
1529{
1530 lxc_attach_command_t* cmd = (lxc_attach_command_t*)payload;
1531
1532 execvp(cmd->program, cmd->argv);
ea918412 1533
1534 SYSERROR("Failed to exec \"%s\"", cmd->program);
9c4693b8
CS
1535 return -1;
1536}
1537
1538int lxc_attach_run_shell(void* payload)
1539{
1540 uid_t uid;
cb7aa5e8
DJ
1541 struct passwd pwent;
1542 struct passwd *pwentp = NULL;
9c4693b8 1543 char *user_shell;
cb7aa5e8
DJ
1544 char *buf;
1545 size_t bufsize;
1546 int ret;
9c4693b8 1547
8ce83369 1548 /* Ignore payload parameter. */
9c4693b8
CS
1549 (void)payload;
1550
1551 uid = getuid();
cb7aa5e8
DJ
1552
1553 bufsize = sysconf(_SC_GETPW_R_SIZE_MAX);
1554 if (bufsize == -1)
1555 bufsize = 1024;
1556
1557 buf = malloc(bufsize);
1558 if (buf) {
1559 ret = getpwuid_r(uid, &pwent, buf, bufsize, &pwentp);
1560 if (!pwentp) {
1561 if (ret == 0)
ea918412 1562 WARN("Could not find matched password record");
cb7aa5e8
DJ
1563
1564 WARN("Failed to get password record - %u", uid);
1565 }
1566 }
9c4693b8 1567
8ce83369
CB
1568 /* This probably happens because of incompatible nss implementations in
1569 * host and container (remember, this code is still using the host's
1570 * glibc but our mount namespace is in the container) we may try to get
1571 * the information by spawning a [getent passwd uid] process and parsing
1572 * the result.
9c4693b8 1573 */
cb7aa5e8 1574 if (!pwentp)
9c4693b8
CS
1575 user_shell = lxc_attach_getpwshell(uid);
1576 else
cb7aa5e8 1577 user_shell = pwent.pw_shell;
ea918412 1578
9c4693b8 1579 if (user_shell)
acf47e1b 1580 execlp(user_shell, user_shell, (char *)NULL);
9c4693b8 1581
8ce83369
CB
1582 /* Executed if either no passwd entry or execvp fails, we will fall back
1583 * on /bin/sh as a default shell.
9c4693b8 1584 */
acf47e1b 1585 execlp("/bin/sh", "/bin/sh", (char *)NULL);
ea918412 1586
edeb1836 1587 SYSERROR("Failed to execute shell");
cb7aa5e8 1588 if (!pwentp)
edeb1836 1589 free(user_shell);
ea918412 1590
cb7aa5e8 1591 free(buf);
9c4693b8
CS
1592 return -1;
1593}