]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/attach.c
attach: mount a sane prox for LSM setup
[mirror_lxc.git] / src / lxc / attach.c
CommitLineData
e0732705
CS
1/*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
9afe19d6 7 * Daniel Lezcano <daniel.lezcano at free.fr>
e0732705
CS
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
250b1eec 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
e0732705
CS
22 */
23
24#define _GNU_SOURCE
25#include <unistd.h>
26#include <stdio.h>
27#include <string.h>
28#include <stdlib.h>
2eef2bda 29#include <signal.h>
e0732705
CS
30#include <errno.h>
31#include <fcntl.h>
c476bdce 32#include <grp.h>
e0732705
CS
33#include <sys/param.h>
34#include <sys/prctl.h>
7a0b0b56 35#include <sys/mount.h>
5ec27989 36#include <sys/socket.h>
1ba0013f 37#include <sys/syscall.h>
905022f7 38#include <sys/wait.h>
910bb4fa 39#include <linux/unistd.h>
905022f7 40#include <pwd.h>
e0732705
CS
41
42#if !HAVE_DECL_PR_CAPBSET_DROP
43#define PR_CAPBSET_DROP 24
44#endif
45
46#include "namespace.h"
47#include "log.h"
48#include "attach.h"
49#include "caps.h"
e0732705 50#include "config.h"
6a44839f 51#include "utils.h"
9c4693b8
CS
52#include "commands.h"
53#include "cgroup.h"
025ed0f3 54#include "lxclock.h"
2c4ea790
SH
55#include "conf.h"
56#include "lxcseccomp.h"
57#include <lxc/lxccontainer.h>
fe4de9a6 58#include "lsm/lsm.h"
9b8e3c96 59#include "confile.h"
9c4693b8
CS
60
61#if HAVE_SYS_PERSONALITY_H
62#include <sys/personality.h>
63#endif
e0732705 64
a3da2f3b
SG
65#ifndef SOCK_CLOEXEC
66# define SOCK_CLOEXEC 02000000
67#endif
68
d6a3c917
SG
69#ifndef MS_REC
70#define MS_REC 16384
71#endif
72
73#ifndef MS_SLAVE
74#define MS_SLAVE (1<<19)
75#endif
76
e0732705
CS
77lxc_log_define(lxc_attach, lxc);
78
74a3920a 79static struct lxc_proc_context_info *lxc_proc_get_context_info(pid_t pid)
e0732705
CS
80{
81 struct lxc_proc_context_info *info = calloc(1, sizeof(*info));
82 FILE *proc_file;
83 char proc_fn[MAXPATHLEN];
460a1cf0 84 char *line = NULL;
e0732705 85 size_t line_bufsz = 0;
460a1cf0 86 int ret, found;
e0732705
CS
87
88 if (!info) {
89 SYSERROR("Could not allocate memory.");
90 return NULL;
91 }
92
93 /* read capabilities */
94 snprintf(proc_fn, MAXPATHLEN, "/proc/%d/status", pid);
95
96 proc_file = fopen(proc_fn, "r");
97 if (!proc_file) {
98 SYSERROR("Could not open %s", proc_fn);
99 goto out_error;
100 }
101
102 found = 0;
103 while (getline(&line, &line_bufsz, proc_file) != -1) {
104 ret = sscanf(line, "CapBnd: %llx", &info->capability_mask);
105 if (ret != EOF && ret > 0) {
106 found = 1;
107 break;
108 }
109 }
110
f10fad2f 111 free(line);
e0732705
CS
112 fclose(proc_file);
113
114 if (!found) {
115 SYSERROR("Could not read capability bounding set from %s", proc_fn);
116 errno = ENOENT;
117 goto out_error;
118 }
119
fe4de9a6 120 info->lsm_label = lsm_process_label_get(pid);
e0732705 121
e0732705
CS
122 return info;
123
124out_error:
460a1cf0 125 free(info);
e0732705
CS
126 return NULL;
127}
128
fe4de9a6
DE
129static void lxc_proc_put_context_info(struct lxc_proc_context_info *ctx)
130{
f10fad2f 131 free(ctx->lsm_label);
2c4ea790
SH
132 if (ctx->container)
133 lxc_container_put(ctx->container);
fe4de9a6
DE
134 free(ctx);
135}
136
74a3920a 137static int lxc_attach_to_ns(pid_t pid, int which)
99d50954
CS
138{
139 char path[MAXPATHLEN];
fc763ab7
CS
140 /* according to <http://article.gmane.org/gmane.linux.kernel.containers.lxc.devel/1429>,
141 * the file for user namepsaces in /proc/$pid/ns will be called
142 * 'user' once the kernel supports it
143 */
f4364484 144 static char *ns[] = { "user", "mnt", "pid", "uts", "ipc", "net" };
fc763ab7 145 static int flags[] = {
f4364484
SG
146 CLONE_NEWUSER, CLONE_NEWNS, CLONE_NEWPID, CLONE_NEWUTS, CLONE_NEWIPC,
147 CLONE_NEWNET
fc763ab7
CS
148 };
149 static const int size = sizeof(ns) / sizeof(char *);
99d50954 150 int fd[size];
fc763ab7
CS
151 int i, j, saved_errno;
152
99d50954
CS
153
154 snprintf(path, MAXPATHLEN, "/proc/%d/ns", pid);
155 if (access(path, X_OK)) {
156 ERROR("Does this kernel version support 'attach' ?");
157 return -1;
158 }
159
160 for (i = 0; i < size; i++) {
fc763ab7
CS
161 /* ignore if we are not supposed to attach to that
162 * namespace
163 */
164 if (which != -1 && !(which & flags[i])) {
165 fd[i] = -1;
166 continue;
167 }
168
99d50954 169 snprintf(path, MAXPATHLEN, "/proc/%d/ns/%s", pid, ns[i]);
9c4693b8 170 fd[i] = open(path, O_RDONLY | O_CLOEXEC);
99d50954 171 if (fd[i] < 0) {
fc763ab7
CS
172 saved_errno = errno;
173
174 /* close all already opened file descriptors before
175 * we return an error, so we don't leak them
176 */
177 for (j = 0; j < i; j++)
178 close(fd[j]);
179
180 errno = saved_errno;
99d50954
CS
181 SYSERROR("failed to open '%s'", path);
182 return -1;
183 }
184 }
185
186 for (i = 0; i < size; i++) {
fc763ab7
CS
187 if (fd[i] >= 0 && setns(fd[i], 0) != 0) {
188 saved_errno = errno;
189
190 for (j = i; j < size; j++)
191 close(fd[j]);
192
193 errno = saved_errno;
99d50954
CS
194 SYSERROR("failed to set namespace '%s'", ns[i]);
195 return -1;
196 }
197
198 close(fd[i]);
199 }
200
201 return 0;
202}
203
74a3920a 204static int lxc_attach_remount_sys_proc(void)
7a0b0b56
CS
205{
206 int ret;
207
208 ret = unshare(CLONE_NEWNS);
209 if (ret < 0) {
210 SYSERROR("failed to unshare mount namespace");
211 return -1;
212 }
213
2c6f3fc9
SH
214 if (detect_shared_rootfs()) {
215 if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL)) {
216 SYSERROR("Failed to make / rslave");
217 ERROR("Continuing...");
218 }
219 }
220
7a0b0b56
CS
221 /* assume /proc is always mounted, so remount it */
222 ret = umount2("/proc", MNT_DETACH);
223 if (ret < 0) {
224 SYSERROR("failed to unmount /proc");
225 return -1;
226 }
227
228 ret = mount("none", "/proc", "proc", 0, NULL);
229 if (ret < 0) {
230 SYSERROR("failed to remount /proc");
231 return -1;
232 }
233
234 /* try to umount /sys - if it's not a mount point,
235 * we'll get EINVAL, then we ignore it because it
236 * may not have been mounted in the first place
237 */
238 ret = umount2("/sys", MNT_DETACH);
239 if (ret < 0 && errno != EINVAL) {
240 SYSERROR("failed to unmount /sys");
241 return -1;
242 } else if (ret == 0) {
243 /* remount it */
244 ret = mount("none", "/sys", "sysfs", 0, NULL);
245 if (ret < 0) {
246 SYSERROR("failed to remount /sys");
247 return -1;
248 }
249 }
250
251 return 0;
252}
253
74a3920a 254static int lxc_attach_drop_privs(struct lxc_proc_context_info *ctx)
e0732705
CS
255{
256 int last_cap = lxc_caps_last_cap();
257 int cap;
258
259 for (cap = 0; cap <= last_cap; cap++) {
260 if (ctx->capability_mask & (1LL << cap))
261 continue;
262
263 if (prctl(PR_CAPBSET_DROP, cap, 0, 0, 0)) {
264 SYSERROR("failed to remove capability id %d", cap);
265 return -1;
266 }
267 }
268
269 return 0;
270}
905022f7 271
74a3920a 272static int lxc_attach_set_environment(enum lxc_attach_env_policy_t policy, char** extra_env, char** extra_keep)
b3a39ba6 273{
799f96fd 274 if (policy == LXC_ATTACH_CLEAR_ENV) {
3d5e9f48 275 char **extra_keep_store = NULL;
3d5e9f48
CS
276 int path_kept = 0;
277
278 if (extra_keep) {
279 size_t count, i;
280
281 for (count = 0; extra_keep[count]; count++);
282
283 extra_keep_store = calloc(count, sizeof(char *));
284 if (!extra_keep_store) {
285 SYSERROR("failed to allocate memory for storing current "
286 "environment variable values that will be kept");
287 return -1;
288 }
289 for (i = 0; i < count; i++) {
290 char *v = getenv(extra_keep[i]);
291 if (v) {
292 extra_keep_store[i] = strdup(v);
293 if (!extra_keep_store[i]) {
294 SYSERROR("failed to allocate memory for storing current "
295 "environment variable values that will be kept");
296 while (i > 0)
297 free(extra_keep_store[--i]);
298 free(extra_keep_store);
299 return -1;
300 }
301 if (strcmp(extra_keep[i], "PATH") == 0)
302 path_kept = 1;
303 }
304 /* calloc sets entire array to zero, so we don't
305 * need an else */
306 }
307 }
308
799f96fd 309 if (clearenv()) {
a9cab7e3 310 char **p;
799f96fd 311 SYSERROR("failed to clear environment");
a9cab7e3
CS
312 if (extra_keep_store) {
313 for (p = extra_keep_store; *p; p++)
314 free(*p);
315 free(extra_keep_store);
316 }
3d5e9f48
CS
317 return -1;
318 }
319
320 if (extra_keep_store) {
321 size_t i;
322 for (i = 0; extra_keep[i]; i++) {
acd4922e
SG
323 if (extra_keep_store[i]) {
324 if (setenv(extra_keep[i], extra_keep_store[i], 1) < 0)
325 SYSERROR("Unable to set environment variable");
326 }
3d5e9f48
CS
327 free(extra_keep_store[i]);
328 }
329 free(extra_keep_store);
330 }
331
332 /* always set a default path; shells and execlp tend
333 * to be fine without it, but there is a disturbing
334 * number of C programs out there that just assume
335 * that getenv("PATH") is never NULL and then die a
336 * painful segfault death. */
cfa70b88 337 if (!path_kept)
511a6936 338 setenv("PATH", "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", 1);
b3a39ba6
DW
339 }
340
341 if (putenv("container=lxc")) {
342 SYSERROR("failed to set environment variable");
343 return -1;
344 }
345
3d5e9f48
CS
346 /* set extra environment variables */
347 if (extra_env) {
348 for (; *extra_env; extra_env++) {
349 /* duplicate the string, just to be on
350 * the safe side, because putenv does not
351 * do it for us */
352 char *p = strdup(*extra_env);
353 /* we just assume the user knows what they
354 * are doing, so we don't do any checks */
355 if (!p) {
356 SYSERROR("failed to allocate memory for additional environment "
357 "variables");
358 return -1;
359 }
360 putenv(p);
361 }
362 }
363
b3a39ba6
DW
364 return 0;
365}
366
74a3920a 367static char *lxc_attach_getpwshell(uid_t uid)
905022f7
CS
368{
369 /* local variables */
370 pid_t pid;
371 int pipes[2];
372 int ret;
373 int fd;
374 char *result = NULL;
375
376 /* we need to fork off a process that runs the
377 * getent program, and we need to capture its
378 * output, so we use a pipe for that purpose
379 */
380 ret = pipe(pipes);
381 if (ret < 0)
382 return NULL;
383
384 pid = fork();
385 if (pid < 0) {
386 close(pipes[0]);
387 close(pipes[1]);
388 return NULL;
389 }
390
391 if (pid) {
392 /* parent process */
393 FILE *pipe_f;
394 char *line = NULL;
395 size_t line_bufsz = 0;
396 int found = 0;
397 int status;
398
399 close(pipes[1]);
400
401 pipe_f = fdopen(pipes[0], "r");
402 while (getline(&line, &line_bufsz, pipe_f) != -1) {
403 char *token;
404 char *saveptr = NULL;
405 long value;
406 char *endptr = NULL;
407 int i;
408
409 /* if we already found something, just continue
410 * to read until the pipe doesn't deliver any more
411 * data, but don't modify the existing data
412 * structure
413 */
414 if (found)
415 continue;
416
417 /* trim line on the right hand side */
bbb8a488 418 for (i = strlen(line); i > 0 && (line[i - 1] == '\n' || line[i - 1] == '\r'); --i)
905022f7
CS
419 line[i - 1] = '\0';
420
421 /* split into tokens: first user name */
422 token = strtok_r(line, ":", &saveptr);
423 if (!token)
424 continue;
425 /* next: dummy password field */
426 token = strtok_r(NULL, ":", &saveptr);
427 if (!token)
428 continue;
429 /* next: user id */
430 token = strtok_r(NULL, ":", &saveptr);
431 value = token ? strtol(token, &endptr, 10) : 0;
432 if (!token || !endptr || *endptr || value == LONG_MIN || value == LONG_MAX)
433 continue;
434 /* dummy sanity check: user id matches */
435 if ((uid_t) value != uid)
436 continue;
437 /* skip fields: gid, gecos, dir, go to next field 'shell' */
438 for (i = 0; i < 4; i++) {
439 token = strtok_r(NULL, ":", &saveptr);
440 if (!token)
441 break;
442 }
443 if (!token)
444 continue;
f10fad2f 445 free(result);
905022f7
CS
446 result = strdup(token);
447
448 /* sanity check that there are no fields after that */
449 token = strtok_r(NULL, ":", &saveptr);
450 if (token)
451 continue;
452
453 found = 1;
454 }
455
456 free(line);
457 fclose(pipe_f);
458 again:
459 if (waitpid(pid, &status, 0) < 0) {
460 if (errno == EINTR)
461 goto again;
462 return NULL;
463 }
464
465 /* some sanity checks: if anything even hinted at going
466 * wrong: we can't be sure we have a valid result, so
467 * we assume we don't
468 */
469
470 if (!WIFEXITED(status))
471 return NULL;
472
473 if (WEXITSTATUS(status) != 0)
474 return NULL;
475
476 if (!found)
477 return NULL;
478
479 return result;
480 } else {
481 /* child process */
482 char uid_buf[32];
483 char *arguments[] = {
484 "getent",
485 "passwd",
486 uid_buf,
487 NULL
488 };
489
490 close(pipes[0]);
491
492 /* we want to capture stdout */
493 dup2(pipes[1], 1);
494 close(pipes[1]);
495
496 /* get rid of stdin/stderr, so we try to associate it
497 * with /dev/null
498 */
499 fd = open("/dev/null", O_RDWR);
500 if (fd < 0) {
501 close(0);
502 close(2);
503 } else {
504 dup2(fd, 0);
505 dup2(fd, 2);
506 close(fd);
507 }
508
509 /* finish argument list */
510 ret = snprintf(uid_buf, sizeof(uid_buf), "%ld", (long) uid);
511 if (ret <= 0)
512 exit(-1);
513
514 /* try to run getent program */
515 (void) execvp("getent", arguments);
516 exit(-1);
517 }
518}
cb3e61fa 519
74a3920a 520static void lxc_attach_get_init_uidgid(uid_t* init_uid, gid_t* init_gid)
cb3e61fa
CS
521{
522 FILE *proc_file;
523 char proc_fn[MAXPATHLEN];
524 char *line = NULL;
525 size_t line_bufsz = 0;
526 int ret;
527 long value = -1;
528 uid_t uid = (uid_t)-1;
529 gid_t gid = (gid_t)-1;
530
531 /* read capabilities */
532 snprintf(proc_fn, MAXPATHLEN, "/proc/%d/status", 1);
533
534 proc_file = fopen(proc_fn, "r");
535 if (!proc_file)
536 return;
537
538 while (getline(&line, &line_bufsz, proc_file) != -1) {
539 /* format is: real, effective, saved set user, fs
540 * we only care about real uid
541 */
542 ret = sscanf(line, "Uid: %ld", &value);
543 if (ret != EOF && ret > 0) {
544 uid = (uid_t) value;
545 } else {
546 ret = sscanf(line, "Gid: %ld", &value);
547 if (ret != EOF && ret > 0)
548 gid = (gid_t) value;
549 }
550 if (uid != (uid_t)-1 && gid != (gid_t)-1)
551 break;
552 }
553
554 fclose(proc_file);
555 free(line);
556
557 /* only override arguments if we found something */
558 if (uid != (uid_t)-1)
559 *init_uid = uid;
560 if (gid != (gid_t)-1)
561 *init_gid = gid;
562
563 /* TODO: we should also parse supplementary groups and use
564 * setgroups() to set them */
565}
9c4693b8
CS
566
567struct attach_clone_payload {
568 int ipc_socket;
569 lxc_attach_options_t* options;
570 struct lxc_proc_context_info* init_ctx;
571 lxc_attach_exec_t exec_function;
572 void* exec_payload;
573};
574
575static int attach_child_main(void* data);
576
577/* help the optimizer along if it doesn't know that exit always exits */
5dcc1ca6 578#define rexit(c) do { int __c = (c); _exit(__c); return __c; } while(0)
9c4693b8
CS
579
580/* define default options if no options are supplied by the user */
581static lxc_attach_options_t attach_static_default_options = LXC_ATTACH_OPTIONS_DEFAULT;
582
2c4ea790
SH
583static bool fetch_seccomp(const char *name, const char *lxcpath,
584 struct lxc_proc_context_info *i, lxc_attach_options_t *options)
585{
586 struct lxc_container *c;
2eef2bda 587
2c4ea790
SH
588 if (!(options->namespaces & CLONE_NEWNS) || !(options->attach_flags & LXC_ATTACH_LSM))
589 return true;
590
591 c = lxc_container_new(name, lxcpath);
592 if (!c)
593 return false;
594 i->container = c;
595 if (!c->lxc_conf)
596 return false;
597 if (lxc_read_seccomp_config(c->lxc_conf) < 0) {
442f5c0f 598 ERROR("Error reading seccomp policy");
2c4ea790
SH
599 return false;
600 }
601
602 return true;
603}
604
9b8e3c96
SH
605static signed long get_personality(const char *name, const char *lxcpath)
606{
0d7cf7e9 607 char *p = lxc_cmd_get_config_item(name, "lxc.arch", lxcpath);
9b8e3c96
SH
608 signed long ret;
609
610 if (!p)
611 return -1;
612 ret = lxc_config_parse_arch(p);
613 free(p);
614 return ret;
615}
616
9c4693b8
CS
617int lxc_attach(const char* name, const char* lxcpath, lxc_attach_exec_t exec_function, void* exec_payload, lxc_attach_options_t* options, pid_t* attached_process)
618{
619 int ret, status;
f4364484 620 pid_t init_pid, pid, attached_pid, expected;
9c4693b8
CS
621 struct lxc_proc_context_info *init_ctx;
622 char* cwd;
623 char* new_cwd;
624 int ipc_sockets[2];
9b8e3c96 625 signed long personality;
9c4693b8
CS
626
627 if (!options)
628 options = &attach_static_default_options;
629
630 init_pid = lxc_cmd_get_init_pid(name, lxcpath);
631 if (init_pid < 0) {
632 ERROR("failed to get the init pid");
633 return -1;
634 }
635
636 init_ctx = lxc_proc_get_context_info(init_pid);
637 if (!init_ctx) {
638 ERROR("failed to get context of the init process, pid = %ld", (long)init_pid);
639 return -1;
640 }
641
9b8e3c96
SH
642 personality = get_personality(name, lxcpath);
643 if (init_ctx->personality < 0) {
644 ERROR("Failed to get personality of the container");
645 lxc_proc_put_context_info(init_ctx);
646 return -1;
647 }
648 init_ctx->personality = personality;
649
2c4ea790
SH
650 if (!fetch_seccomp(name, lxcpath, init_ctx, options))
651 WARN("Failed to get seccomp policy");
652
9c4693b8
CS
653 cwd = getcwd(NULL, 0);
654
655 /* determine which namespaces the container was created with
656 * by asking lxc-start, if necessary
657 */
658 if (options->namespaces == -1) {
659 options->namespaces = lxc_cmd_get_clone_flags(name, lxcpath);
660 /* call failed */
661 if (options->namespaces == -1) {
662 ERROR("failed to automatically determine the "
663 "namespaces which the container unshared");
664 free(cwd);
fe4de9a6 665 lxc_proc_put_context_info(init_ctx);
9c4693b8
CS
666 return -1;
667 }
668 }
669
670 /* create a socket pair for IPC communication; set SOCK_CLOEXEC in order
671 * to make sure we don't irritate other threads that want to fork+exec away
672 *
673 * IMPORTANT: if the initial process is multithreaded and another call
674 * just fork()s away without exec'ing directly after, the socket fd will
675 * exist in the forked process from the other thread and any close() in
676 * our own child process will not really cause the socket to close properly,
677 * potentiall causing the parent to hang.
678 *
679 * For this reason, while IPC is still active, we have to use shutdown()
680 * if the child exits prematurely in order to signal that the socket
681 * is closed and cannot assume that the child exiting will automatically
682 * do that.
683 *
684 * IPC mechanism: (X is receiver)
685 * initial process intermediate attached
686 * X <--- send pid of
687 * attached proc,
688 * then exit
689 * send 0 ------------------------------------> X
690 * [do initialization]
691 * X <------------------------------------ send 1
692 * [add to cgroup, ...]
693 * send 2 ------------------------------------> X
694 * close socket close socket
695 * run program
696 */
697 ret = socketpair(PF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
698 if (ret < 0) {
699 SYSERROR("could not set up required IPC mechanism for attaching");
700 free(cwd);
fe4de9a6 701 lxc_proc_put_context_info(init_ctx);
9c4693b8
CS
702 return -1;
703 }
704
705 /* create intermediate subprocess, three reasons:
706 * 1. runs all pthread_atfork handlers and the
707 * child will no longer be threaded
708 * (we can't properly setns() in a threaded process)
709 * 2. we can't setns() in the child itself, since
710 * we want to make sure we are properly attached to
711 * the pidns
712 * 3. also, the initial thread has to put the attached
713 * process into the cgroup, which we can only do if
714 * we didn't already setns() (otherwise, user
715 * namespaces will hate us)
716 */
717 pid = fork();
718
719 if (pid < 0) {
720 SYSERROR("failed to create first subprocess");
721 free(cwd);
fe4de9a6 722 lxc_proc_put_context_info(init_ctx);
9c4693b8
CS
723 return -1;
724 }
725
726 if (pid) {
727 pid_t to_cleanup_pid = pid;
9c4693b8 728
ec64264d 729 /* initial thread, we close the socket that is for the
9c4693b8
CS
730 * subprocesses
731 */
732 close(ipc_sockets[1]);
733 free(cwd);
734
f4364484
SG
735 /* attach to cgroup, if requested */
736 if (options->attach_flags & LXC_ATTACH_MOVE_TO_CGROUP) {
4fb3cba5 737 if (!cgroup_attach(name, lxcpath, pid))
f4364484 738 goto cleanup_error;
f4364484
SG
739 }
740
741 /* Let the child process know to go ahead */
742 status = 0;
743 ret = lxc_write_nointr(ipc_sockets[0], &status, sizeof(status));
744 if (ret <= 0) {
745 ERROR("error using IPC to notify attached process for initialization (0)");
746 goto cleanup_error;
747 }
748
9c4693b8
CS
749 /* get pid from intermediate process */
750 ret = lxc_read_nointr_expect(ipc_sockets[0], &attached_pid, sizeof(attached_pid), NULL);
751 if (ret <= 0) {
752 if (ret != 0)
753 ERROR("error using IPC to receive pid of attached process");
754 goto cleanup_error;
755 }
756
2eef2bda 757 /* ignore SIGKILL (CTRL-C) and SIGQUIT (CTRL-\) - issue #313 */
62183f1a
SH
758 if (options->stdin_fd == 0) {
759 signal(SIGINT, SIG_IGN);
760 signal(SIGQUIT, SIG_IGN);
761 }
2eef2bda 762
9c4693b8
CS
763 /* reap intermediate process */
764 ret = wait_for_pid(pid);
765 if (ret < 0)
766 goto cleanup_error;
767
768 /* we will always have to reap the grandchild now */
769 to_cleanup_pid = attached_pid;
770
771 /* tell attached process it may start initializing */
772 status = 0;
773 ret = lxc_write_nointr(ipc_sockets[0], &status, sizeof(status));
774 if (ret <= 0) {
775 ERROR("error using IPC to notify attached process for initialization (0)");
776 goto cleanup_error;
777 }
778
779 /* wait for the attached process to finish initializing */
780 expected = 1;
781 ret = lxc_read_nointr_expect(ipc_sockets[0], &status, sizeof(status), &expected);
782 if (ret <= 0) {
783 if (ret != 0)
784 ERROR("error using IPC to receive notification from attached process (1)");
785 goto cleanup_error;
786 }
787
9c4693b8
CS
788 /* tell attached process we're done */
789 status = 2;
790 ret = lxc_write_nointr(ipc_sockets[0], &status, sizeof(status));
791 if (ret <= 0) {
792 ERROR("error using IPC to notify attached process for initialization (2)");
793 goto cleanup_error;
794 }
795
796 /* now shut down communication with child, we're done */
797 shutdown(ipc_sockets[0], SHUT_RDWR);
798 close(ipc_sockets[0]);
fe4de9a6 799 lxc_proc_put_context_info(init_ctx);
9c4693b8
CS
800
801 /* we're done, the child process should now execute whatever
802 * it is that the user requested. The parent can now track it
803 * with waitpid() or similar.
804 */
805
806 *attached_process = attached_pid;
807 return 0;
808
809 cleanup_error:
810 /* first shut down the socket, then wait for the pid,
811 * otherwise the pid we're waiting for may never exit
812 */
813 shutdown(ipc_sockets[0], SHUT_RDWR);
814 close(ipc_sockets[0]);
815 if (to_cleanup_pid)
816 (void) wait_for_pid(to_cleanup_pid);
fe4de9a6 817 lxc_proc_put_context_info(init_ctx);
9c4693b8
CS
818 return -1;
819 }
820
821 /* first subprocess begins here, we close the socket that is for the
822 * initial thread
823 */
824 close(ipc_sockets[0]);
825
f4364484
SG
826 /* Wait for the parent to have setup cgroups */
827 expected = 0;
828 status = -1;
829 ret = lxc_read_nointr_expect(ipc_sockets[1], &status, sizeof(status), &expected);
830 if (ret <= 0) {
831 ERROR("error communicating with child process");
832 shutdown(ipc_sockets[1], SHUT_RDWR);
833 rexit(-1);
834 }
835
9c4693b8
CS
836 /* attach now, create another subprocess later, since pid namespaces
837 * only really affect the children of the current process
838 */
839 ret = lxc_attach_to_ns(init_pid, options->namespaces);
840 if (ret < 0) {
841 ERROR("failed to enter the namespace");
842 shutdown(ipc_sockets[1], SHUT_RDWR);
843 rexit(-1);
844 }
845
846 /* attach succeeded, try to cwd */
847 if (options->initial_cwd)
848 new_cwd = options->initial_cwd;
849 else
850 new_cwd = cwd;
851 ret = chdir(new_cwd);
852 if (ret < 0)
853 WARN("could not change directory to '%s'", new_cwd);
854 free(cwd);
855
856 /* now create the real child process */
857 {
858 struct attach_clone_payload payload = {
859 .ipc_socket = ipc_sockets[1],
860 .options = options,
861 .init_ctx = init_ctx,
862 .exec_function = exec_function,
863 .exec_payload = exec_payload
864 };
865 /* We use clone_parent here to make this subprocess a direct child of
866 * the initial process. Then this intermediate process can exit and
867 * the parent can directly track the attached process.
868 */
869 pid = lxc_clone(attach_child_main, &payload, CLONE_PARENT);
870 }
871
872 /* shouldn't happen, clone() should always return positive pid */
873 if (pid <= 0) {
874 SYSERROR("failed to create subprocess");
875 shutdown(ipc_sockets[1], SHUT_RDWR);
876 rexit(-1);
877 }
878
879 /* tell grandparent the pid of the pid of the newly created child */
880 ret = lxc_write_nointr(ipc_sockets[1], &pid, sizeof(pid));
881 if (ret != sizeof(pid)) {
882 /* if this really happens here, this is very unfortunate, since the
883 * parent will not know the pid of the attached process and will
884 * not be able to wait for it (and we won't either due to CLONE_PARENT)
885 * so the parent won't be able to reap it and the attached process
886 * will remain a zombie
887 */
888 ERROR("error using IPC to notify main process of pid of the attached process");
889 shutdown(ipc_sockets[1], SHUT_RDWR);
890 rexit(-1);
891 }
892
893 /* the rest is in the hands of the initial and the attached process */
894 rexit(0);
895}
896
74a3920a 897static int attach_child_main(void* data)
9c4693b8
CS
898{
899 struct attach_clone_payload* payload = (struct attach_clone_payload*)data;
900 int ipc_socket = payload->ipc_socket;
901 lxc_attach_options_t* options = payload->options;
902 struct lxc_proc_context_info* init_ctx = payload->init_ctx;
1a2e58cf 903#if HAVE_SYS_PERSONALITY_H
9c4693b8 904 long new_personality;
1a2e58cf 905#endif
9c4693b8
CS
906 int ret;
907 int status;
908 int expected;
909 long flags;
910 int fd;
911 uid_t new_uid;
912 gid_t new_gid;
913
914 /* wait for the initial thread to signal us that it's ready
915 * for us to start initializing
916 */
917 expected = 0;
918 status = -1;
919 ret = lxc_read_nointr_expect(ipc_socket, &status, sizeof(status), &expected);
920 if (ret <= 0) {
921 ERROR("error using IPC to receive notification from initial process (0)");
922 shutdown(ipc_socket, SHUT_RDWR);
923 rexit(-1);
924 }
925
9c4693b8
CS
926 /* A description of the purpose of this functionality is
927 * provided in the lxc-attach(1) manual page. We have to
928 * remount here and not in the parent process, otherwise
929 * /proc may not properly reflect the new pid namespace.
930 */
931 if (!(options->namespaces & CLONE_NEWNS) && (options->attach_flags & LXC_ATTACH_REMOUNT_PROC_SYS)) {
932 ret = lxc_attach_remount_sys_proc();
933 if (ret < 0) {
934 shutdown(ipc_socket, SHUT_RDWR);
935 rexit(-1);
936 }
937 }
938
939 /* now perform additional attachments*/
940#if HAVE_SYS_PERSONALITY_H
941 if (options->personality < 0)
942 new_personality = init_ctx->personality;
943 else
944 new_personality = options->personality;
945
946 if (options->attach_flags & LXC_ATTACH_SET_PERSONALITY) {
947 ret = personality(new_personality);
948 if (ret < 0) {
949 SYSERROR("could not ensure correct architecture");
950 shutdown(ipc_socket, SHUT_RDWR);
951 rexit(-1);
952 }
953 }
954#endif
955
956 if (options->attach_flags & LXC_ATTACH_DROP_CAPABILITIES) {
957 ret = lxc_attach_drop_privs(init_ctx);
958 if (ret < 0) {
959 ERROR("could not drop privileges");
960 shutdown(ipc_socket, SHUT_RDWR);
961 rexit(-1);
962 }
963 }
964
965 /* always set the environment (specify (LXC_ATTACH_KEEP_ENV, NULL, NULL) if you want this to be a no-op) */
966 ret = lxc_attach_set_environment(options->env_policy, options->extra_env_vars, options->extra_keep_env);
967 if (ret < 0) {
968 ERROR("could not set initial environment for attached process");
969 shutdown(ipc_socket, SHUT_RDWR);
970 rexit(-1);
971 }
972
973 /* set user / group id */
974 new_uid = 0;
975 new_gid = 0;
976 /* ignore errors, we will fall back to root in that case
977 * (/proc was not mounted etc.)
978 */
979 if (options->namespaces & CLONE_NEWUSER)
980 lxc_attach_get_init_uidgid(&new_uid, &new_gid);
981
982 if (options->uid != (uid_t)-1)
983 new_uid = options->uid;
984 if (options->gid != (gid_t)-1)
985 new_gid = options->gid;
986
82e28fe0 987 /* setup the control tty */
d3b63011 988 if (options->stdin_fd && isatty(options->stdin_fd)) {
82e28fe0
SG
989 if (setsid() < 0) {
990 SYSERROR("unable to setsid");
991 shutdown(ipc_socket, SHUT_RDWR);
992 rexit(-1);
993 }
994
995 if (ioctl(options->stdin_fd, TIOCSCTTY, (char *)NULL) < 0) {
996 SYSERROR("unable to TIOCSTTY");
997 shutdown(ipc_socket, SHUT_RDWR);
998 rexit(-1);
999 }
1000 }
1001
9c4693b8 1002 /* try to set the uid/gid combination */
c476bdce
SH
1003 if ((new_gid != 0 || options->namespaces & CLONE_NEWUSER)) {
1004 if (setgid(new_gid) || setgroups(0, NULL)) {
1005 SYSERROR("switching to container gid");
1006 shutdown(ipc_socket, SHUT_RDWR);
1007 rexit(-1);
1008 }
9c4693b8
CS
1009 }
1010 if ((new_uid != 0 || options->namespaces & CLONE_NEWUSER) && setuid(new_uid)) {
1011 SYSERROR("switching to container uid");
1012 shutdown(ipc_socket, SHUT_RDWR);
1013 rexit(-1);
1014 }
1015
1016 /* tell initial process it may now put us into the cgroups */
1017 status = 1;
1018 ret = lxc_write_nointr(ipc_socket, &status, sizeof(status));
1019 if (ret != sizeof(status)) {
1020 ERROR("error using IPC to notify initial process for initialization (1)");
1021 shutdown(ipc_socket, SHUT_RDWR);
1022 rexit(-1);
1023 }
1024
1025 /* wait for the initial thread to signal us that it has done
1026 * everything for us when it comes to cgroups etc.
1027 */
1028 expected = 2;
1029 status = -1;
1030 ret = lxc_read_nointr_expect(ipc_socket, &status, sizeof(status), &expected);
1031 if (ret <= 0) {
1032 ERROR("error using IPC to receive final notification from initial process (2)");
1033 shutdown(ipc_socket, SHUT_RDWR);
1034 rexit(-1);
1035 }
1036
1037 shutdown(ipc_socket, SHUT_RDWR);
1038 close(ipc_socket);
72863294
DE
1039
1040 /* set new apparmor profile/selinux context */
1041 if ((options->namespaces & CLONE_NEWNS) && (options->attach_flags & LXC_ATTACH_LSM)) {
1042 int on_exec;
ced03a01 1043 int proc_mounted;
72863294
DE
1044
1045 on_exec = options->attach_flags & LXC_ATTACH_LSM_EXEC ? 1 : 0;
ced03a01
SH
1046 proc_mounted = mount_proc_if_needed("/");
1047 if (proc_mounted == -1) {
1048 ERROR("Error mounting a sane /proc");
1049 rexit(-1);
1050 }
7aff4f43
SH
1051 ret = lsm_process_label_set(init_ctx->lsm_label,
1052 init_ctx->container->lxc_conf, 0, on_exec);
ced03a01
SH
1053 if (proc_mounted)
1054 umount("/proc");
72863294
DE
1055 if (ret < 0) {
1056 rexit(-1);
1057 }
1058 }
2c4ea790
SH
1059
1060 if (init_ctx->container && init_ctx->container->lxc_conf &&
1061 lxc_seccomp_load(init_ctx->container->lxc_conf) != 0) {
1062 ERROR("Loading seccomp policy");
1063 rexit(-1);
1064 }
1065
fe4de9a6 1066 lxc_proc_put_context_info(init_ctx);
9c4693b8
CS
1067
1068 /* The following is done after the communication socket is
1069 * shut down. That way, all errors that might (though
1070 * unlikely) occur up until this point will have their messages
1071 * printed to the original stderr (if logging is so configured)
1072 * and not the fd the user supplied, if any.
1073 */
1074
1075 /* fd handling for stdin, stdout and stderr;
1076 * ignore errors here, user may want to make sure
1077 * the fds are closed, for example */
1078 if (options->stdin_fd >= 0 && options->stdin_fd != 0)
1079 dup2(options->stdin_fd, 0);
1080 if (options->stdout_fd >= 0 && options->stdout_fd != 1)
1081 dup2(options->stdout_fd, 1);
1082 if (options->stderr_fd >= 0 && options->stderr_fd != 2)
1083 dup2(options->stderr_fd, 2);
1084
1085 /* close the old fds */
1086 if (options->stdin_fd > 2)
1087 close(options->stdin_fd);
1088 if (options->stdout_fd > 2)
1089 close(options->stdout_fd);
1090 if (options->stderr_fd > 2)
1091 close(options->stderr_fd);
1092
1093 /* try to remove CLOEXEC flag from stdin/stdout/stderr,
1094 * but also here, ignore errors */
1095 for (fd = 0; fd <= 2; fd++) {
1096 flags = fcntl(fd, F_GETFL);
1097 if (flags < 0)
1098 continue;
71b2940d
SG
1099 if (flags & FD_CLOEXEC) {
1100 if (fcntl(fd, F_SETFL, flags & ~FD_CLOEXEC) < 0) {
1101 SYSERROR("Unable to clear CLOEXEC from fd");
1102 }
1103 }
9c4693b8
CS
1104 }
1105
1106 /* we're done, so we can now do whatever the user intended us to do */
1107 rexit(payload->exec_function(payload->exec_payload));
1108}
1109
1110int lxc_attach_run_command(void* payload)
1111{
1112 lxc_attach_command_t* cmd = (lxc_attach_command_t*)payload;
1113
1114 execvp(cmd->program, cmd->argv);
1115 SYSERROR("failed to exec '%s'", cmd->program);
1116 return -1;
1117}
1118
1119int lxc_attach_run_shell(void* payload)
1120{
1121 uid_t uid;
1122 struct passwd *passwd;
1123 char *user_shell;
1124
1125 /* ignore payload parameter */
1126 (void)payload;
1127
1128 uid = getuid();
1129 passwd = getpwuid(uid);
1130
1131 /* this probably happens because of incompatible nss
1132 * implementations in host and container (remember, this
1133 * code is still using the host's glibc but our mount
1134 * namespace is in the container)
1135 * we may try to get the information by spawning a
1136 * [getent passwd uid] process and parsing the result
1137 */
1138 if (!passwd)
1139 user_shell = lxc_attach_getpwshell(uid);
1140 else
1141 user_shell = passwd->pw_shell;
1142
1143 if (user_shell)
1144 execlp(user_shell, user_shell, NULL);
1145
1146 /* executed if either no passwd entry or execvp fails,
1147 * we will fall back on /bin/sh as a default shell
1148 */
1149 execlp("/bin/sh", "/bin/sh", NULL);
1150 SYSERROR("failed to exec shell");
1151 return -1;
1152}