2 * lxc: linux Container library
4 * (C) Copyright IBM Corp. 2007, 2008
7 * Daniel Lezcano <daniel.lezcano at free.fr>
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
31 #include <sys/param.h>
32 #include <sys/prctl.h>
33 #include <sys/mount.h>
34 #include <sys/syscall.h>
36 #include <linux/unistd.h>
39 #if !HAVE_DECL_PR_CAPBSET_DROP
40 #define PR_CAPBSET_DROP 24
43 #include "namespace.h"
50 lxc_log_define(lxc_attach
, lxc
);
52 /* Define setns() if missing from the C library */
54 static int setns(int fd
, int nstype
)
57 return syscall(__NR_setns
, fd
, nstype
);
65 /* Define unshare() if missing from the C library */
67 static int unshare(int flags
)
70 return syscall(__NR_unshare
, flags
);
78 /* Define getline() if missing from the C library */
81 #include <../include/getline.h>
85 struct lxc_proc_context_info
*lxc_proc_get_context_info(pid_t pid
)
87 struct lxc_proc_context_info
*info
= calloc(1, sizeof(*info
));
89 char proc_fn
[MAXPATHLEN
];
91 size_t line_bufsz
= 0;
95 SYSERROR("Could not allocate memory.");
99 /* read capabilities */
100 snprintf(proc_fn
, MAXPATHLEN
, "/proc/%d/status", pid
);
102 proc_file
= fopen(proc_fn
, "r");
104 SYSERROR("Could not open %s", proc_fn
);
109 while (getline(&line
, &line_bufsz
, proc_file
) != -1) {
110 ret
= sscanf(line
, "CapBnd: %llx", &info
->capability_mask
);
111 if (ret
!= EOF
&& ret
> 0) {
122 SYSERROR("Could not read capability bounding set from %s", proc_fn
);
127 /* read personality */
128 snprintf(proc_fn
, MAXPATHLEN
, "/proc/%d/personality", pid
);
130 proc_file
= fopen(proc_fn
, "r");
132 SYSERROR("Could not open %s", proc_fn
);
136 ret
= fscanf(proc_file
, "%lx", &info
->personality
);
139 if (ret
== EOF
|| ret
== 0) {
140 SYSERROR("Could not read personality from %s", proc_fn
);
144 info
->aa_profile
= aa_get_profile(pid
);
153 int lxc_attach_to_ns(pid_t pid
, int which
)
155 char path
[MAXPATHLEN
];
156 /* according to <http://article.gmane.org/gmane.linux.kernel.containers.lxc.devel/1429>,
157 * the file for user namepsaces in /proc/$pid/ns will be called
158 * 'user' once the kernel supports it
160 static char *ns
[] = { "mnt", "pid", "uts", "ipc", "user", "net" };
161 static int flags
[] = {
162 CLONE_NEWNS
, CLONE_NEWPID
, CLONE_NEWUTS
, CLONE_NEWIPC
,
163 CLONE_NEWUSER
, CLONE_NEWNET
165 static const int size
= sizeof(ns
) / sizeof(char *);
167 int i
, j
, saved_errno
;
170 snprintf(path
, MAXPATHLEN
, "/proc/%d/ns", pid
);
171 if (access(path
, X_OK
)) {
172 ERROR("Does this kernel version support 'attach' ?");
176 for (i
= 0; i
< size
; i
++) {
177 /* ignore if we are not supposed to attach to that
180 if (which
!= -1 && !(which
& flags
[i
])) {
185 snprintf(path
, MAXPATHLEN
, "/proc/%d/ns/%s", pid
, ns
[i
]);
186 fd
[i
] = open(path
, O_RDONLY
);
190 /* close all already opened file descriptors before
191 * we return an error, so we don't leak them
193 for (j
= 0; j
< i
; j
++)
197 SYSERROR("failed to open '%s'", path
);
202 for (i
= 0; i
< size
; i
++) {
203 if (fd
[i
] >= 0 && setns(fd
[i
], 0) != 0) {
206 for (j
= i
; j
< size
; j
++)
210 SYSERROR("failed to set namespace '%s'", ns
[i
]);
220 int lxc_attach_remount_sys_proc()
224 ret
= unshare(CLONE_NEWNS
);
226 SYSERROR("failed to unshare mount namespace");
230 /* assume /proc is always mounted, so remount it */
231 ret
= umount2("/proc", MNT_DETACH
);
233 SYSERROR("failed to unmount /proc");
237 ret
= mount("none", "/proc", "proc", 0, NULL
);
239 SYSERROR("failed to remount /proc");
243 /* try to umount /sys - if it's not a mount point,
244 * we'll get EINVAL, then we ignore it because it
245 * may not have been mounted in the first place
247 ret
= umount2("/sys", MNT_DETACH
);
248 if (ret
< 0 && errno
!= EINVAL
) {
249 SYSERROR("failed to unmount /sys");
251 } else if (ret
== 0) {
253 ret
= mount("none", "/sys", "sysfs", 0, NULL
);
255 SYSERROR("failed to remount /sys");
263 int lxc_attach_drop_privs(struct lxc_proc_context_info
*ctx
)
265 int last_cap
= lxc_caps_last_cap();
268 for (cap
= 0; cap
<= last_cap
; cap
++) {
269 if (ctx
->capability_mask
& (1LL << cap
))
272 if (prctl(PR_CAPBSET_DROP
, cap
, 0, 0, 0)) {
273 SYSERROR("failed to remove capability id %d", cap
);
281 int lxc_attach_set_environment(enum lxc_attach_env_policy_t policy
, char** extra_env
, char** extra_keep
)
283 /* TODO: implement extra_env, extra_keep
285 * - extra_env is an array of strings of the form
286 * "VAR=VALUE", which are to be set (after clearing or not,
287 * depending on the value of the policy variable)
288 * - extra_keep is an array of strings of the form
289 * "VAR", which are extra environment variables to be kept
290 * around after clearing (if that is done, otherwise, the
296 if (policy
== LXC_ATTACH_CLEAR_ENV
) {
298 SYSERROR("failed to clear environment");
299 /* don't error out though */
303 if (putenv("container=lxc")) {
304 SYSERROR("failed to set environment variable");
311 char *lxc_attach_getpwshell(uid_t uid
)
313 /* local variables */
320 /* we need to fork off a process that runs the
321 * getent program, and we need to capture its
322 * output, so we use a pipe for that purpose
339 size_t line_bufsz
= 0;
345 pipe_f
= fdopen(pipes
[0], "r");
346 while (getline(&line
, &line_bufsz
, pipe_f
) != -1) {
348 char *saveptr
= NULL
;
353 /* if we already found something, just continue
354 * to read until the pipe doesn't deliver any more
355 * data, but don't modify the existing data
361 /* trim line on the right hand side */
362 for (i
= strlen(line
); i
> 0 && (line
[i
- 1] == '\n' || line
[i
- 1] == '\r'); --i
)
365 /* split into tokens: first user name */
366 token
= strtok_r(line
, ":", &saveptr
);
369 /* next: dummy password field */
370 token
= strtok_r(NULL
, ":", &saveptr
);
374 token
= strtok_r(NULL
, ":", &saveptr
);
375 value
= token
? strtol(token
, &endptr
, 10) : 0;
376 if (!token
|| !endptr
|| *endptr
|| value
== LONG_MIN
|| value
== LONG_MAX
)
378 /* dummy sanity check: user id matches */
379 if ((uid_t
) value
!= uid
)
381 /* skip fields: gid, gecos, dir, go to next field 'shell' */
382 for (i
= 0; i
< 4; i
++) {
383 token
= strtok_r(NULL
, ":", &saveptr
);
391 result
= strdup(token
);
393 /* sanity check that there are no fields after that */
394 token
= strtok_r(NULL
, ":", &saveptr
);
404 if (waitpid(pid
, &status
, 0) < 0) {
410 /* some sanity checks: if anything even hinted at going
411 * wrong: we can't be sure we have a valid result, so
415 if (!WIFEXITED(status
))
418 if (WEXITSTATUS(status
) != 0)
428 char *arguments
[] = {
437 /* we want to capture stdout */
441 /* get rid of stdin/stderr, so we try to associate it
444 fd
= open("/dev/null", O_RDWR
);
454 /* finish argument list */
455 ret
= snprintf(uid_buf
, sizeof(uid_buf
), "%ld", (long) uid
);
459 /* try to run getent program */
460 (void) execvp("getent", arguments
);
465 void lxc_attach_get_init_uidgid(uid_t
* init_uid
, gid_t
* init_gid
)
468 char proc_fn
[MAXPATHLEN
];
470 size_t line_bufsz
= 0;
473 uid_t uid
= (uid_t
)-1;
474 gid_t gid
= (gid_t
)-1;
476 /* read capabilities */
477 snprintf(proc_fn
, MAXPATHLEN
, "/proc/%d/status", 1);
479 proc_file
= fopen(proc_fn
, "r");
483 while (getline(&line
, &line_bufsz
, proc_file
) != -1) {
484 /* format is: real, effective, saved set user, fs
485 * we only care about real uid
487 ret
= sscanf(line
, "Uid: %ld", &value
);
488 if (ret
!= EOF
&& ret
> 0) {
491 ret
= sscanf(line
, "Gid: %ld", &value
);
492 if (ret
!= EOF
&& ret
> 0)
495 if (uid
!= (uid_t
)-1 && gid
!= (gid_t
)-1)
502 /* only override arguments if we found something */
503 if (uid
!= (uid_t
)-1)
505 if (gid
!= (gid_t
)-1)
508 /* TODO: we should also parse supplementary groups and use
509 * setgroups() to set them */