2 * lxc: linux Container library
4 * (C) Copyright IBM Corp. 2007, 2008
7 * Daniel Lezcano <daniel.lezcano at free.fr>
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
31 #include <sys/param.h>
32 #include <sys/prctl.h>
33 #include <sys/mount.h>
34 #include <sys/syscall.h>
36 #include <linux/unistd.h>
39 #if !HAVE_DECL_PR_CAPBSET_DROP
40 #define PR_CAPBSET_DROP 24
43 #include "namespace.h"
50 lxc_log_define(lxc_attach
, lxc
);
52 /* Define setns() if missing from the C library */
54 static int setns(int fd
, int nstype
)
57 return syscall(__NR_setns
, fd
, nstype
);
65 /* Define unshare() if missing from the C library */
67 static int unshare(int flags
)
70 return syscall(__NR_unshare
, flags
);
78 /* Define getline() if missing from the C library */
81 #include <../include/getline.h>
85 struct lxc_proc_context_info
*lxc_proc_get_context_info(pid_t pid
)
87 struct lxc_proc_context_info
*info
= calloc(1, sizeof(*info
));
89 char proc_fn
[MAXPATHLEN
];
91 size_t line_bufsz
= 0;
95 SYSERROR("Could not allocate memory.");
99 /* read capabilities */
100 snprintf(proc_fn
, MAXPATHLEN
, "/proc/%d/status", pid
);
102 proc_file
= fopen(proc_fn
, "r");
104 SYSERROR("Could not open %s", proc_fn
);
109 while (getline(&line
, &line_bufsz
, proc_file
) != -1) {
110 ret
= sscanf(line
, "CapBnd: %llx", &info
->capability_mask
);
111 if (ret
!= EOF
&& ret
> 0) {
121 SYSERROR("Could not read capability bounding set from %s", proc_fn
);
126 /* read personality */
127 snprintf(proc_fn
, MAXPATHLEN
, "/proc/%d/personality", pid
);
129 proc_file
= fopen(proc_fn
, "r");
131 SYSERROR("Could not open %s", proc_fn
);
135 ret
= fscanf(proc_file
, "%lx", &info
->personality
);
138 if (ret
== EOF
|| ret
== 0) {
139 SYSERROR("Could not read personality from %s", proc_fn
);
143 info
->aa_profile
= aa_get_profile(pid
);
152 int lxc_attach_to_ns(pid_t pid
, int which
)
154 char path
[MAXPATHLEN
];
155 /* according to <http://article.gmane.org/gmane.linux.kernel.containers.lxc.devel/1429>,
156 * the file for user namepsaces in /proc/$pid/ns will be called
157 * 'user' once the kernel supports it
159 static char *ns
[] = { "mnt", "pid", "uts", "ipc", "user", "net" };
160 static int flags
[] = {
161 CLONE_NEWNS
, CLONE_NEWPID
, CLONE_NEWUTS
, CLONE_NEWIPC
,
162 CLONE_NEWUSER
, CLONE_NEWNET
164 static const int size
= sizeof(ns
) / sizeof(char *);
166 int i
, j
, saved_errno
;
169 snprintf(path
, MAXPATHLEN
, "/proc/%d/ns", pid
);
170 if (access(path
, X_OK
)) {
171 ERROR("Does this kernel version support 'attach' ?");
175 for (i
= 0; i
< size
; i
++) {
176 /* ignore if we are not supposed to attach to that
179 if (which
!= -1 && !(which
& flags
[i
])) {
184 snprintf(path
, MAXPATHLEN
, "/proc/%d/ns/%s", pid
, ns
[i
]);
185 fd
[i
] = open(path
, O_RDONLY
);
189 /* close all already opened file descriptors before
190 * we return an error, so we don't leak them
192 for (j
= 0; j
< i
; j
++)
196 SYSERROR("failed to open '%s'", path
);
201 for (i
= 0; i
< size
; i
++) {
202 if (fd
[i
] >= 0 && setns(fd
[i
], 0) != 0) {
205 for (j
= i
; j
< size
; j
++)
209 SYSERROR("failed to set namespace '%s'", ns
[i
]);
219 int lxc_attach_remount_sys_proc()
223 ret
= unshare(CLONE_NEWNS
);
225 SYSERROR("failed to unshare mount namespace");
229 /* assume /proc is always mounted, so remount it */
230 ret
= umount2("/proc", MNT_DETACH
);
232 SYSERROR("failed to unmount /proc");
236 ret
= mount("none", "/proc", "proc", 0, NULL
);
238 SYSERROR("failed to remount /proc");
242 /* try to umount /sys - if it's not a mount point,
243 * we'll get EINVAL, then we ignore it because it
244 * may not have been mounted in the first place
246 ret
= umount2("/sys", MNT_DETACH
);
247 if (ret
< 0 && errno
!= EINVAL
) {
248 SYSERROR("failed to unmount /sys");
250 } else if (ret
== 0) {
252 ret
= mount("none", "/sys", "sysfs", 0, NULL
);
254 SYSERROR("failed to remount /sys");
262 int lxc_attach_drop_privs(struct lxc_proc_context_info
*ctx
)
264 int last_cap
= lxc_caps_last_cap();
267 for (cap
= 0; cap
<= last_cap
; cap
++) {
268 if (ctx
->capability_mask
& (1LL << cap
))
271 if (prctl(PR_CAPBSET_DROP
, cap
, 0, 0, 0)) {
272 SYSERROR("failed to remove capability id %d", cap
);
280 int lxc_attach_set_environment(enum lxc_attach_env_policy_t policy
, char** extra_env
, char** extra_keep
)
282 /* TODO: implement extra_env, extra_keep
284 * - extra_env is an array of strings of the form
285 * "VAR=VALUE", which are to be set (after clearing or not,
286 * depending on the value of the policy variable)
287 * - extra_keep is an array of strings of the form
288 * "VAR", which are extra environment variables to be kept
289 * around after clearing (if that is done, otherwise, the
295 if (policy
== LXC_ATTACH_CLEAR_ENV
) {
297 SYSERROR("failed to clear environment");
298 /* don't error out though */
302 if (putenv("container=lxc")) {
303 SYSERROR("failed to set environment variable");
310 char *lxc_attach_getpwshell(uid_t uid
)
312 /* local variables */
319 /* we need to fork off a process that runs the
320 * getent program, and we need to capture its
321 * output, so we use a pipe for that purpose
338 size_t line_bufsz
= 0;
344 pipe_f
= fdopen(pipes
[0], "r");
345 while (getline(&line
, &line_bufsz
, pipe_f
) != -1) {
347 char *saveptr
= NULL
;
352 /* if we already found something, just continue
353 * to read until the pipe doesn't deliver any more
354 * data, but don't modify the existing data
360 /* trim line on the right hand side */
361 for (i
= strlen(line
); i
> 0 && (line
[i
- 1] == '\n' || line
[i
- 1] == '\r'); --i
)
364 /* split into tokens: first user name */
365 token
= strtok_r(line
, ":", &saveptr
);
368 /* next: dummy password field */
369 token
= strtok_r(NULL
, ":", &saveptr
);
373 token
= strtok_r(NULL
, ":", &saveptr
);
374 value
= token
? strtol(token
, &endptr
, 10) : 0;
375 if (!token
|| !endptr
|| *endptr
|| value
== LONG_MIN
|| value
== LONG_MAX
)
377 /* dummy sanity check: user id matches */
378 if ((uid_t
) value
!= uid
)
380 /* skip fields: gid, gecos, dir, go to next field 'shell' */
381 for (i
= 0; i
< 4; i
++) {
382 token
= strtok_r(NULL
, ":", &saveptr
);
390 result
= strdup(token
);
392 /* sanity check that there are no fields after that */
393 token
= strtok_r(NULL
, ":", &saveptr
);
403 if (waitpid(pid
, &status
, 0) < 0) {
409 /* some sanity checks: if anything even hinted at going
410 * wrong: we can't be sure we have a valid result, so
414 if (!WIFEXITED(status
))
417 if (WEXITSTATUS(status
) != 0)
427 char *arguments
[] = {
436 /* we want to capture stdout */
440 /* get rid of stdin/stderr, so we try to associate it
443 fd
= open("/dev/null", O_RDWR
);
453 /* finish argument list */
454 ret
= snprintf(uid_buf
, sizeof(uid_buf
), "%ld", (long) uid
);
458 /* try to run getent program */
459 (void) execvp("getent", arguments
);
464 void lxc_attach_get_init_uidgid(uid_t
* init_uid
, gid_t
* init_gid
)
467 char proc_fn
[MAXPATHLEN
];
469 size_t line_bufsz
= 0;
472 uid_t uid
= (uid_t
)-1;
473 gid_t gid
= (gid_t
)-1;
475 /* read capabilities */
476 snprintf(proc_fn
, MAXPATHLEN
, "/proc/%d/status", 1);
478 proc_file
= fopen(proc_fn
, "r");
482 while (getline(&line
, &line_bufsz
, proc_file
) != -1) {
483 /* format is: real, effective, saved set user, fs
484 * we only care about real uid
486 ret
= sscanf(line
, "Uid: %ld", &value
);
487 if (ret
!= EOF
&& ret
> 0) {
490 ret
= sscanf(line
, "Gid: %ld", &value
);
491 if (ret
!= EOF
&& ret
> 0)
494 if (uid
!= (uid_t
)-1 && gid
!= (gid_t
)-1)
501 /* only override arguments if we found something */
502 if (uid
!= (uid_t
)-1)
504 if (gid
!= (gid_t
)-1)
507 /* TODO: we should also parse supplementary groups and use
508 * setgroups() to set them */