* (C) Copyright IBM Corp. 2007, 2008
*
* Authors:
- * Daniel Lezcano <dlezcano at fr.ibm.com>
+ * Daniel Lezcano <daniel.lezcano at free.fr>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#define _GNU_SOURCE
#include <stdio.h>
#include <errno.h>
#include <string.h>
#include <dirent.h>
-#include <mntent.h>
#include <unistd.h>
#include <sys/wait.h>
+#include <sys/syscall.h>
+#include <time.h>
+
+#if HAVE_IFADDRS_H
+#include <ifaddrs.h>
+#else
+#include <../include/ifaddrs.h>
+#endif
#if HAVE_PTY_H
#include <pty.h>
#include "log.h"
#include "lxc.h" /* for lxc_cgroup_set() */
#include "caps.h" /* for lxc_caps_last_cap() */
-
-#if HAVE_APPARMOR
-#include <apparmor.h>
-#endif
+#include "bdev.h"
+#include "cgroup.h"
+#include "lxclock.h"
+#include "lsm/lsm.h"
#if HAVE_SYS_CAPABILITY_H
#include <sys/capability.h>
#include <sys/personality.h>
#endif
+#if IS_BIONIC
+#include <../include/lxcmntent.h>
+#else
+#include <mntent.h>
+#endif
+
#include "lxcseccomp.h"
lxc_log_define(lxc_conf, lxc);
#define MAXMTULEN 16
#define MAXLINELEN 128
-#ifndef MS_DIRSYNC
-#define MS_DIRSYNC 128
-#endif
-
-#ifndef MS_REC
-#define MS_REC 16384
-#endif
-
-#ifndef MNT_DETACH
-#define MNT_DETACH 2
-#endif
-
-#ifndef MS_SLAVE
-#define MS_SLAVE (1<<19)
-#endif
-
-#ifndef MS_RELATIME
-#define MS_RELATIME (1 << 21)
-#endif
-
-#ifndef MS_STRICTATIME
-#define MS_STRICTATIME (1 << 24)
-#endif
-
#if HAVE_SYS_CAPABILITY_H
#ifndef CAP_SETFCAP
#define CAP_SETFCAP 31
#define LO_FLAGS_AUTOCLEAR 4
#endif
-char *lxchook_names[NUM_LXC_HOOKS] = {
- "pre-start", "pre-mount", "mount", "start", "post-stop" };
-
+/* Define pivot_root() if missing from the C library */
+#ifndef HAVE_PIVOT_ROOT
+static int pivot_root(const char * new_root, const char * put_old)
+{
+#ifdef __NR_pivot_root
+return syscall(__NR_pivot_root, new_root, put_old);
+#else
+errno = ENOSYS;
+return -1;
+#endif
+}
+#else
extern int pivot_root(const char * new_root, const char * put_old);
+#endif
+
+/* Define sethostname() if missing from the C library */
+#ifndef HAVE_SETHOSTNAME
+static int sethostname(const char * name, size_t len)
+{
+#ifdef __NR_sethostname
+return syscall(__NR_sethostname, name, len);
+#else
+errno = ENOSYS;
+return -1;
+#endif
+}
+#endif
+
+/* Define __S_ISTYPE if missing from the C library */
+#ifndef __S_ISTYPE
+#define __S_ISTYPE(mode, mask) (((mode) & S_IFMT) == (mask))
+#endif
+
+char *lxchook_names[NUM_LXC_HOOKS] = {
+ "pre-start", "pre-mount", "mount", "autodev", "start", "post-stop", "clone" };
typedef int (*instanciate_cb)(struct lxc_handler *, struct lxc_netdev *);
static struct caps_opt caps_opt[] = {};
#endif
+static char padchar[] =
+"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+
+static char *mkifname(char *template)
+{
+ char *name = NULL;
+ int i = 0;
+ FILE *urandom;
+ unsigned int seed;
+ struct ifaddrs *ifaddr, *ifa;
+ int ifexists = 0;
+
+ /* Get all the network interfaces */
+ getifaddrs(&ifaddr);
+
+ /* Initialize the random number generator */
+ process_lock();
+ urandom = fopen ("/dev/urandom", "r");
+ process_unlock();
+ if (urandom != NULL) {
+ if (fread (&seed, sizeof(seed), 1, urandom) <= 0)
+ seed = time(0);
+ process_lock();
+ fclose(urandom);
+ process_unlock();
+ }
+ else
+ seed = time(0);
+
+#ifndef HAVE_RAND_R
+ srand(seed);
+#endif
+
+ /* Generate random names until we find one that doesn't exist */
+ while(1) {
+ ifexists = 0;
+ name = strdup(template);
+
+ if (name == NULL)
+ return NULL;
+
+ for (i = 0; i < strlen(name); i++) {
+ if (name[i] == 'X') {
+#ifdef HAVE_RAND_R
+ name[i] = padchar[rand_r(&seed) % (strlen(padchar) - 1)];
+#else
+ name[i] = padchar[rand() % (strlen(padchar) - 1)];
+#endif
+ }
+ }
+
+ for (ifa = ifaddr; ifa != NULL; ifa = ifa->ifa_next) {
+ if (strcmp(ifa->ifa_name, name) == 0) {
+ ifexists = 1;
+ break;
+ }
+ }
+
+ if (ifexists == 0)
+ break;
+
+ free(name);
+ }
+
+ freeifaddrs(ifaddr);
+ return name;
+}
+
static int run_buffer(char *buffer)
{
FILE *f;
char *output;
+ int ret;
+ process_lock();
f = popen(buffer, "r");
+ process_unlock();
if (!f) {
SYSERROR("popen failed");
return -1;
output = malloc(LXC_LOG_BUFFER_SIZE);
if (!output) {
ERROR("failed to allocate memory for script output");
+ process_lock();
+ pclose(f);
+ process_unlock();
return -1;
}
free(output);
- if (pclose(f) == -1) {
+ process_lock();
+ ret = pclose(f);
+ process_unlock();
+ if (ret == -1) {
SYSERROR("Script exited on error");
return -1;
+ } else if (WIFEXITED(ret) && WEXITSTATUS(ret) != 0) {
+ ERROR("Script exited with status %d", WEXITSTATUS(ret));
+ return -1;
+ } else if (WIFSIGNALED(ret)) {
+ ERROR("Script terminated by signal %d (%s)", WTERMSIG(ret),
+ strsignal(WTERMSIG(ret)));
+ return -1;
}
return 0;
}
+static int run_script_argv(const char *name, const char *section,
+ const char *script, const char *hook, const char *lxcpath,
+ char **argsin)
+{
+ int ret, i;
+ char *buffer;
+ size_t size = 0;
+
+ INFO("Executing script '%s' for container '%s', config section '%s'",
+ script, name, section);
+
+ for (i=0; argsin && argsin[i]; i++)
+ size += strlen(argsin[i]) + 1;
+
+ size += strlen(hook) + 1;
+
+ size += strlen(script);
+ size += strlen(name);
+ size += strlen(section);
+ size += 3;
+
+ if (size > INT_MAX)
+ return -1;
+
+ buffer = alloca(size);
+ if (!buffer) {
+ ERROR("failed to allocate memory");
+ return -1;
+ }
+
+ ret = snprintf(buffer, size, "%s %s %s %s", script, name, section, hook);
+ if (ret < 0 || ret >= size) {
+ ERROR("Script name too long");
+ return -1;
+ }
+
+ for (i=0; argsin && argsin[i]; i++) {
+ int len = size-ret;
+ int rc;
+ rc = snprintf(buffer + ret, len, " %s", argsin[i]);
+ if (rc < 0 || rc >= len) {
+ ERROR("Script args too long");
+ return -1;
+ }
+ ret += rc;
+ }
+
+ return run_buffer(buffer);
+}
+
static int run_script(const char *name, const char *section,
const char *script, ...)
{
ret = snprintf(buffer, size, "%s %s %s", script, name, section);
if (ret < 0 || ret >= size) {
ERROR("Script name too long");
- free(buffer);
return -1;
}
int rc;
rc = snprintf(buffer + ret, len, " %s", p);
if (rc < 0 || rc >= len) {
- free(buffer);
ERROR("Script args too long");
return -1;
}
int rfd;
int ret = -1;
+ process_lock();
rfd = open(rootfs, O_RDWR);
+ process_unlock();
if (rfd < 0) {
SYSERROR("failed to open '%s'", rootfs);
return -1;
ret = 0;
out:
+ process_lock();
close(rfd);
+ process_unlock();
return ret;
}
DIR *dir;
char path[MAXPATHLEN];
+ process_lock();
dir = opendir("/dev");
+ process_unlock();
if (!dir) {
SYSERROR("failed to open '/dev'");
return -1;
if (rc < 0 || rc >= MAXPATHLEN)
continue;
+ process_lock();
fd = open(path, O_RDWR);
+ process_unlock();
if (fd < 0)
continue;
if (ioctl(fd, LOOP_GET_STATUS64, &loinfo) == 0) {
+ process_lock();
close(fd);
+ process_unlock();
continue;
}
if (errno != ENXIO) {
WARN("unexpected error for ioctl on '%s': %m",
direntp->d_name);
+ process_lock();
+ close(fd);
+ process_unlock();
continue;
}
ret = setup_lodev(rootfs, fd, &loinfo);
if (!ret)
ret = mount_unknow_fs(path, target, 0);
+ process_lock();
close(fd);
+ process_unlock();
break;
}
+ process_lock();
if (closedir(dir))
WARN("failed to close directory");
+ process_unlock();
return ret;
}
/*
* pin_rootfs
- * if rootfs is a directory, then open ${rootfs}.hold for writing for the
- * duration of the container run, to prevent the container from marking the
- * underlying fs readonly on shutdown.
+ * if rootfs is a directory, then open ${rootfs}/lxc.hold for writing for
+ * the duration of the container run, to prevent the container from marking
+ * the underlying fs readonly on shutdown. unlink the file immediately so
+ * no name pollution is happens
* return -1 on error.
* return -2 if nothing needed to be pinned.
* return an open fd (>=0) if we pinned it.
int ret, fd;
if (rootfs == NULL || strlen(rootfs) == 0)
- return 0;
+ return -2;
- if (!realpath(rootfs, absrootfs)) {
- SYSERROR("failed to get real path for '%s'", rootfs);
- return -1;
- }
+ if (!realpath(rootfs, absrootfs))
+ return -2;
- if (access(absrootfs, F_OK)) {
- SYSERROR("'%s' is not accessible", absrootfs);
+ if (access(absrootfs, F_OK))
return -1;
- }
- if (stat(absrootfs, &s)) {
- SYSERROR("failed to stat '%s'", absrootfs);
+ if (stat(absrootfs, &s))
return -1;
- }
- if (!__S_ISTYPE(s.st_mode, S_IFDIR))
+ if (!S_ISDIR(s.st_mode))
return -2;
- ret = snprintf(absrootfspin, MAXPATHLEN, "%s%s", absrootfs, ".hold");
- if (ret >= MAXPATHLEN) {
- SYSERROR("pathname too long for rootfs hold file");
+ ret = snprintf(absrootfspin, MAXPATHLEN, "%s/lxc.hold", absrootfs);
+ if (ret >= MAXPATHLEN)
return -1;
- }
+ process_lock();
fd = open(absrootfspin, O_CREAT | O_RDWR, S_IWUSR|S_IRUSR);
- INFO("opened %s as fd %d\n", absrootfspin, fd);
+ process_unlock();
+ if (fd < 0)
+ return fd;
+ (void)unlink(absrootfspin);
return fd;
}
+static int lxc_mount_auto_mounts(struct lxc_conf *conf, int flags, struct cgroup_process_info *cgroup_info)
+{
+ char *path = NULL;
+ char *dev_null = NULL;
+ int r;
+
+ dev_null = lxc_append_paths(conf->rootfs.mount, "/dev/null");
+ if (!dev_null) {
+ SYSERROR("memory allocation error");
+ goto cleanup;
+ }
+
+ if (flags & LXC_AUTO_PROC) {
+ path = lxc_append_paths(conf->rootfs.mount, "/proc");
+ if (!path) {
+ SYSERROR("memory allocation error trying to automatically mount /proc");
+ goto cleanup;
+ }
+
+ r = mount("proc", path, "proc", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL);
+ if (r < 0) {
+ SYSERROR("error mounting /proc");
+ goto cleanup;
+ }
+
+ free(path);
+ path = NULL;
+ }
+
+ if (flags & LXC_AUTO_PROC_SYSRQ) {
+ path = lxc_append_paths(conf->rootfs.mount, "/proc/sysrq-trigger");
+ if (!path) {
+ SYSERROR("memory allocation error trying to automatically mount /proc");
+ goto cleanup;
+ }
+
+ /* safety measure, mount /dev/null over /proc/sysrq-trigger,
+ * otherwise, a container may trigger a host reboot or such
+ */
+ r = mount(dev_null, path, NULL, MS_BIND, NULL);
+ if (r < 0)
+ WARN("error mounting /dev/null over /proc/sysrq-trigger: %s", strerror(errno));
+
+ free(path);
+ path = NULL;
+ }
+
+ if (flags & LXC_AUTO_SYS) {
+ path = lxc_append_paths(conf->rootfs.mount, "/sys");
+ if (!path) {
+ SYSERROR("memory allocation error trying to automatically mount /sys");
+ goto cleanup;
+ }
+
+ r = mount("sysfs", path, "sysfs", MS_RDONLY, NULL);
+ if (r < 0) {
+ SYSERROR("error mounting /sys");
+ goto cleanup;
+ }
+
+ free(path);
+ path = NULL;
+ }
+
+ if (flags & LXC_AUTO_CGROUP) {
+ r = lxc_setup_mount_cgroup(conf->rootfs.mount, cgroup_info);
+ if (r < 0) {
+ SYSERROR("error mounting /sys/fs/cgroup");
+ goto cleanup;
+ }
+ }
+
+ free(dev_null);
+ free(path);
+
+ return 0;
+
+cleanup:
+ free(dev_null);
+ free(path);
+ return -1;
+}
+
static int mount_rootfs(const char *rootfs, const char *target)
{
char absrootfs[MAXPATHLEN];
ERROR("pathname too long for ttys");
return -1;
}
+ process_lock();
ret = creat(lxcpath, 0660);
+ process_unlock();
if (ret==-1 && errno != EEXIST) {
SYSERROR("error creating %s\n", lxcpath);
return -1;
}
- close(ret);
+ process_lock();
+ if (ret >= 0)
+ close(ret);
+ process_unlock();
ret = unlink(path);
if (ret && errno != ENOENT) {
SYSERROR("error unlinking %s\n", path);
} else {
/* If we populated /dev, then we need to create /dev/ttyN */
if (access(path, F_OK)) {
+ process_lock();
ret = creat(path, 0660);
+ process_unlock();
if (ret==-1) {
SYSERROR("error creating %s\n", path);
/* this isn't fatal, continue */
- } else
+ } else {
+ process_lock();
close(ret);
+ process_unlock();
+ }
}
if (mount(pty_info->name, path, "none", MS_BIND, 0)) {
WARN("failed to mount '%s'->'%s'",
static int setup_rootfs_pivot_root_cb(char *buffer, void *data)
{
struct lxc_list *mountlist, *listentry, *iterator;
- char *pivotdir, *mountpoint, *mountentry;
+ char *pivotdir, *mountpoint, *mountentry, *saveptr = NULL;
int found;
void **cbparm;
pivotdir = cbparm[1];
/* parse entry, first field is mountname, ignore */
- mountpoint = strtok(mountentry, " ");
+ mountpoint = strtok_r(mountentry, " ", &saveptr);
if (!mountpoint)
return -1;
/* second field is mountpoint */
- mountpoint = strtok(NULL, " ");
+ mountpoint = strtok_r(NULL, " ", &saveptr);
if (!mountpoint)
return -1;
listentry->elem = strdup(mountpoint);
if (!listentry->elem) {
SYSERROR("strdup failed");
+ free(listentry);
return -1;
}
lxc_list_add_tail(mountlist, listentry);
return 0;
}
-/*
- * Try to run MAKEDEV console in the container. If something fails,
- * continue anyway as it should not be detrimental to the container.
- * This makes sure that things like /dev/vcs* exist.
- * (Pass devpath in to reduce stack usage)
- */
-static void run_makedev(char *devpath)
-{
- int curd;
- int ret;
-
- curd = open(".", O_RDONLY);
- if (curd < 0)
- return;
- ret = chdir(devpath);
- if (ret) {
- close(curd);
- return;
- }
- if (run_buffer("/sbin/MAKEDEV console"))
- INFO("Error running MAKEDEV console in %s", devpath);
- ret = fchdir(curd);
- if (ret)
- INFO("Error returning to original directory: expect breakage");
- close(curd);
-}
-
struct lxc_devs {
char *name;
mode_t mode;
if (ret < 0 || ret >= MAXPATHLEN) {
ERROR("Error calculating container /dev location");
return -1;
- } else
- run_makedev(path);
+ }
INFO("Populating /dev under %s\n", root);
cmask = umask(S_IXUSR | S_IXGRP | S_IXOTH);
int i;
char *p2;
+ process_lock();
f = fopen("/proc/self/mountinfo", "r");
+ process_unlock();
if (!f)
return 0;
while ((p = fgets(buf, LINELEN, f))) {
if (strcmp(p+1, "/") == 0) {
// this is '/'. is it shared?
p = index(p2+1, ' ');
- if (strstr(p, "shared:"))
+ if (p && strstr(p, "shared:")) {
+ process_lock();
+ fclose(f);
+ process_unlock();
return 1;
+ }
}
}
+ process_lock();
fclose(f);
+ process_unlock();
return 0;
}
{
const struct lxc_rootfs *rootfs = &conf->rootfs;
- if (!rootfs->path)
+ if (!rootfs->path) {
+ if (mount("", "/", NULL, MS_SLAVE|MS_REC, 0)) {
+ SYSERROR("Failed to make / rslave");
+ return -1;
+ }
return 0;
+ }
if (access(rootfs->mount, F_OK)) {
SYSERROR("failed to access to '%s', check it is present",
}
}
+ // First try mounting rootfs using a bdev
+ struct bdev *bdev = bdev_init(rootfs->path, rootfs->mount, NULL);
+ if (bdev && bdev->ops->mount(bdev) == 0) {
+ bdev_put(bdev);
+ DEBUG("mounted '%s' on '%s'", rootfs->path, rootfs->mount);
+ return 0;
+ }
+ if (bdev)
+ bdev_put(bdev);
if (mount_rootfs(rootfs->path, rootfs->mount)) {
ERROR("failed to mount rootfs");
return -1;
return 0;
}
- if (console->peer == -1) {
- INFO("no console output required");
+ if (console->master < 0) {
+ INFO("no console");
return 0;
}
return -1;
}
+ process_lock();
ret = creat(lxcpath, 0660);
+ process_unlock();
if (ret==-1 && errno != EEXIST) {
SYSERROR("error %d creating %s\n", errno, lxcpath);
return -1;
}
- close(ret);
+ process_lock();
+ if (ret >= 0)
+ close(ret);
+ process_unlock();
- if (console->peer == -1) {
- INFO("no console output required");
+ if (console->master < 0) {
+ INFO("no console");
return 0;
}
return 0;
}
-int setup_cgroup(const char *name, struct lxc_list *cgroups)
-{
- struct lxc_list *iterator;
- struct lxc_cgroup *cg;
- int ret = -1;
-
- if (lxc_list_empty(cgroups))
- return 0;
-
- lxc_list_for_each(iterator, cgroups) {
-
- cg = iterator->elem;
-
- if (lxc_cgroup_set(name, cg->subsystem, cg->value))
- goto out;
-
- DEBUG("cgroup '%s' set to '%s'", cg->subsystem, cg->value);
- }
-
- ret = 0;
- INFO("cgroup has been setup");
-out:
- return ret;
-}
-
static void parse_mntopt(char *opt, unsigned long *flags, char **data)
{
struct mount_opt *mo;
ret = mount_entry(mntent->mnt_fsname, mntent->mnt_dir,
mntent->mnt_type, mntflags, mntdata);
+ if (hasmntopt(mntent, "optional") != NULL)
+ ret = 0;
+
free(mntdata);
return ret;
unsigned long mntflags;
char *mntdata;
int r, ret = 0, offset;
+ const char *lxcpath;
if (parse_mntopts(mntent->mnt_opts, &mntflags, &mntdata) < 0) {
ERROR("failed to parse mount option '%s'", mntent->mnt_opts);
return -1;
}
+ lxcpath = default_lxc_path();
+ if (!lxcpath) {
+ ERROR("Out of memory");
+ return -1;
+ }
+
/* if rootfs->path is a blockdev path, allow container fstab to
- * use $LXCPATH/CN/rootfs as the target prefix */
- r = snprintf(path, MAXPATHLEN, LXCPATH "/%s/rootfs", lxc_name);
+ * use $lxcpath/CN/rootfs as the target prefix */
+ r = snprintf(path, MAXPATHLEN, "%s/%s/rootfs", lxcpath, lxc_name);
if (r < 0 || r >= MAXPATHLEN)
goto skipvarlib;
ret = mount_entry(mntent->mnt_fsname, path, mntent->mnt_type,
mntflags, mntdata);
+ if (hasmntopt(mntent, "optional") != NULL)
+ ret = 0;
+
out:
free(mntdata);
return ret;
ret = mount_entry(mntent->mnt_fsname, path, mntent->mnt_type,
mntflags, mntdata);
+ if (hasmntopt(mntent, "optional") != NULL)
+ ret = 0;
+
free(mntdata);
return ret;
if (!fstab)
return 0;
+ process_lock();
file = setmntent(fstab, "r");
+ process_unlock();
if (!file) {
SYSERROR("failed to use '%s'", fstab);
return -1;
ret = mount_file_entries(rootfs, file, lxc_name);
+ process_lock();
endmntent(file);
+ process_unlock();
return ret;
}
char *mount_entry;
int ret;
+ process_lock();
file = tmpfile();
+ process_unlock();
if (!file) {
ERROR("tmpfile error: %m");
return -1;
ret = mount_file_entries(rootfs, file, lxc_name);
+ process_lock();
fclose(file);
+ process_unlock();
return ret;
}
}
- DEBUG("capabilities has been setup");
+ DEBUG("capabilities have been setup");
+
+ return 0;
+}
+
+static int dropcaps_except(struct lxc_list *caps)
+{
+ struct lxc_list *iterator;
+ char *keep_entry;
+ char *ptr;
+ int i, capid;
+ int numcaps = lxc_caps_last_cap() + 1;
+ INFO("found %d capabilities\n", numcaps);
+
+ if (numcaps <= 0 || numcaps > 200)
+ return -1;
+
+ // caplist[i] is 1 if we keep capability i
+ int *caplist = alloca(numcaps * sizeof(int));
+ memset(caplist, 0, numcaps * sizeof(int));
+
+ lxc_list_for_each(iterator, caps) {
+
+ keep_entry = iterator->elem;
+
+ capid = -1;
+
+ for (i = 0; i < sizeof(caps_opt)/sizeof(caps_opt[0]); i++) {
+
+ if (strcmp(keep_entry, caps_opt[i].name))
+ continue;
+
+ capid = caps_opt[i].value;
+ break;
+ }
+
+ if (capid < 0) {
+ /* try to see if it's numeric, so the user may specify
+ * capabilities that the running kernel knows about but
+ * we don't */
+ capid = strtol(keep_entry, &ptr, 10);
+ if (!ptr || *ptr != '\0' ||
+ capid == LONG_MIN || capid == LONG_MAX)
+ /* not a valid number */
+ capid = -1;
+ else if (capid > lxc_caps_last_cap())
+ /* we have a number but it's not a valid
+ * capability */
+ capid = -1;
+ }
+
+ if (capid < 0) {
+ ERROR("unknown capability %s", keep_entry);
+ return -1;
+ }
+
+ DEBUG("drop capability '%s' (%d)", keep_entry, capid);
+
+ caplist[capid] = 1;
+ }
+ for (i=0; i<numcaps; i++) {
+ if (caplist[i])
+ continue;
+ if (prctl(PR_CAPBSET_DROP, i, 0, 0, 0)) {
+ SYSERROR("failed to remove capability %d", i);
+ return -1;
+ }
+ }
+
+ DEBUG("capabilities have been setup");
return 0;
}
memcpy(ifr.ifr_name, ifname, IFNAMSIZ);
memcpy((char *) &ifr.ifr_hwaddr, (char *) &sockaddr, sizeof(sockaddr));
+ process_lock();
fd = socket(AF_INET, SOCK_DGRAM, 0);
+ process_unlock();
if (fd < 0) {
ERROR("socket failure : %s", strerror(errno));
return -1;
}
ret = ioctl(fd, SIOCSIFHWADDR, &ifr);
+ process_lock();
close(fd);
+ process_unlock();
if (ret)
ERROR("ioctl failure : %s", strerror(errno));
int err;
int sockfd;
+ process_lock();
sockfd = socket(AF_INET, SOCK_DGRAM, 0);
+ process_unlock();
if (sockfd < 0)
return -errno;
snprintf((char *)ifr.ifr_name, IFNAMSIZ, "%s", veth1);
err = ioctl(sockfd, SIOCGIFHWADDR, &ifr);
if (err < 0) {
+ process_lock();
close(sockfd);
+ process_unlock();
return -errno;
}
ifr.ifr_hwaddr.sa_data[0] = 0xfe;
err = ioctl(sockfd, SIOCSIFHWADDR, &ifr);
+ process_lock();
close(sockfd);
+ process_unlock();
if (err < 0)
return -errno;
}
memset(new, 0, sizeof(*new));
+ new->loglevel = LXC_LOG_PRIORITY_NOTSET;
new->personality = -1;
new->console.log_path = NULL;
new->console.log_fd = -1;
new->console.path = NULL;
new->console.peer = -1;
+ new->console.peerpty.busy = -1;
+ new->console.peerpty.master = -1;
+ new->console.peerpty.slave = -1;
new->console.master = -1;
new->console.slave = -1;
new->console.name[0] = '\0';
new->maincmd_fd = -1;
- new->rootfs.mount = default_rootfs_mount;
- new->loglevel = LXC_LOG_PRIORITY_NOTSET;
+ new->rootfs.mount = strdup(default_rootfs_mount);
+ if (!new->rootfs.mount) {
+ ERROR("lxc_conf_init : %m");
+ free(new);
+ return NULL;
+ }
+ new->kmsg = 1;
lxc_list_init(&new->cgroup);
lxc_list_init(&new->network);
lxc_list_init(&new->mount_list);
lxc_list_init(&new->caps);
+ lxc_list_init(&new->keepcaps);
+ lxc_list_init(&new->id_map);
for (i=0; i<NUM_LXC_HOOKS; i++)
lxc_list_init(&new->hooks[i]);
-#if HAVE_APPARMOR
- new->aa_profile = NULL;
-#endif
-#if HAVE_APPARMOR /* || HAVE_SMACK || HAVE_SELINUX */
+ new->lsm_aa_profile = NULL;
+ new->lsm_se_context = NULL;
new->lsm_umount_proc = 0;
-#endif
return new;
}
ERROR("veth1 name too long");
return -1;
}
- veth1 = mktemp(veth1buf);
+ veth1 = mkifname(veth1buf);
/* store away for deconf */
memcpy(netdev->priv.veth_attr.veth1, veth1, IFNAMSIZ);
}
snprintf(veth2buf, sizeof(veth2buf), "vethXXXXXX");
- veth2 = mktemp(veth2buf);
+ veth2 = mkifname(veth2buf);
if (!strlen(veth1) || !strlen(veth2)) {
ERROR("failed to allocate a temporary name");
if (err >= sizeof(peerbuf))
return -1;
- peer = mktemp(peerbuf);
+ peer = mkifname(peerbuf);
if (!strlen(peer)) {
ERROR("failed to make a temporary name");
return -1;
return 0;
}
+static int write_id_mapping(enum idtype idtype, pid_t pid, const char *buf,
+ size_t buf_size)
+{
+ char path[PATH_MAX];
+ int ret, closeret;
+ FILE *f;
+
+ ret = snprintf(path, PATH_MAX, "/proc/%d/%cid_map", pid, idtype == ID_TYPE_UID ? 'u' : 'g');
+ if (ret < 0 || ret >= PATH_MAX) {
+ fprintf(stderr, "%s: path name too long", __func__);
+ return -E2BIG;
+ }
+ process_lock();
+ f = fopen(path, "w");
+ process_unlock();
+ if (!f) {
+ perror("open");
+ return -EINVAL;
+ }
+ ret = fwrite(buf, buf_size, 1, f);
+ if (ret < 0)
+ SYSERROR("writing id mapping");
+ process_lock();
+ closeret = fclose(f);
+ process_unlock();
+ if (closeret)
+ SYSERROR("writing id mapping");
+ return ret < 0 ? ret : closeret;
+}
+
+int lxc_map_ids(struct lxc_list *idmap, pid_t pid)
+{
+ struct lxc_list *iterator;
+ struct id_map *map;
+ int ret = 0;
+ enum idtype type;
+ char *buf = NULL, *pos;
+
+ for(type = ID_TYPE_UID; type <= ID_TYPE_GID; type++) {
+ int left, fill;
+
+ pos = buf;
+ lxc_list_for_each(iterator, idmap) {
+ /* The kernel only takes <= 4k for writes to /proc/<nr>/[ug]id_map */
+ if (!buf)
+ buf = pos = malloc(4096);
+ if (!buf)
+ return -ENOMEM;
+
+ map = iterator->elem;
+ if (map->idtype == type) {
+ left = 4096 - (pos - buf);
+ fill = snprintf(pos, left, "%lu %lu %lu\n",
+ map->nsid, map->hostid, map->range);
+ if (fill <= 0 || fill >= left)
+ SYSERROR("snprintf failed, too many mappings");
+ pos += fill;
+ }
+ }
+ if (pos == buf) // no mappings were found
+ continue;
+ ret = write_id_mapping(type, pid, buf, pos-buf);
+ if (ret)
+ break;
+ }
+
+ if (buf)
+ free(buf);
+ return ret;
+}
+
int lxc_find_gateway_addresses(struct lxc_handler *handler)
{
struct lxc_list *network = &handler->conf->network;
int lxc_create_tty(const char *name, struct lxc_conf *conf)
{
struct lxc_tty_info *tty_info = &conf->tty_info;
- int i;
+ int i, ret;
/* no tty in the configuration */
if (!conf->tty)
struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
- if (openpty(&pty_info->master, &pty_info->slave,
- pty_info->name, NULL, NULL)) {
+ process_lock();
+ ret = openpty(&pty_info->master, &pty_info->slave,
+ pty_info->name, NULL, NULL);
+ process_unlock();
+ if (ret) {
SYSERROR("failed to create pty #%d", i);
tty_info->nbtty = i;
lxc_delete_tty(tty_info);
for (i = 0; i < tty_info->nbtty; i++) {
struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
+ process_lock();
close(pty_info->master);
close(pty_info->slave);
+ process_unlock();
}
free(tty_info->pty_info);
tty_info->nbtty = 0;
}
-int lxc_setup(const char *name, struct lxc_conf *lxc_conf)
+/*
+ * given a host uid, return the ns uid if it is mapped.
+ * if it is not mapped, return the original host id.
+ */
+static int shiftid(struct lxc_conf *c, int uid, enum idtype w)
{
-#if HAVE_APPARMOR /* || HAVE_SMACK || HAVE_SELINUX */
- int mounted;
-#endif
+ struct lxc_list *iterator;
+ struct id_map *map;
+ int low, high;
+
+ lxc_list_for_each(iterator, &c->id_map) {
+ map = iterator->elem;
+ if (map->idtype != w)
+ continue;
+
+ low = map->nsid;
+ high = map->nsid + map->range;
+ if (uid < low || uid >= high)
+ continue;
+
+ return uid - low + map->hostid;
+ }
+
+ return uid;
+}
+
+/*
+ * Take a pathname for a file created on the host, and map the uid and gid
+ * into the container if needed. (Used for ttys)
+ */
+static int uid_shift_file(char *path, struct lxc_conf *c)
+{
+ struct stat statbuf;
+ int newuid, newgid;
+
+ if (stat(path, &statbuf)) {
+ SYSERROR("stat(%s)", path);
+ return -1;
+ }
+
+ newuid = shiftid(c, statbuf.st_uid, ID_TYPE_UID);
+ newgid = shiftid(c, statbuf.st_gid, ID_TYPE_GID);
+ if (newuid != statbuf.st_uid || newgid != statbuf.st_gid) {
+ DEBUG("chowning %s from %d:%d to %d:%d\n", path, (int)statbuf.st_uid, (int)statbuf.st_gid, newuid, newgid);
+ if (chown(path, newuid, newgid)) {
+ SYSERROR("chown(%s)", path);
+ return -1;
+ }
+ }
+ return 0;
+}
+
+int uid_shift_ttys(int pid, struct lxc_conf *conf)
+{
+ int i, ret;
+ struct lxc_tty_info *tty_info = &conf->tty_info;
+ char path[MAXPATHLEN];
+ char *ttydir = conf->ttydir;
+
+ if (!conf->rootfs.path)
+ return 0;
+ /* first the console */
+ ret = snprintf(path, sizeof(path), "/proc/%d/root/dev/%s/console", pid, ttydir ? ttydir : "");
+ if (ret < 0 || ret >= sizeof(path)) {
+ ERROR("console path too long\n");
+ return -1;
+ }
+ if (uid_shift_file(path, conf)) {
+ DEBUG("Failed to chown the console %s.\n", path);
+ return -1;
+ }
+ for (i=0; i< tty_info->nbtty; i++) {
+ ret = snprintf(path, sizeof(path), "/proc/%d/root/dev/%s/tty%d",
+ pid, ttydir ? ttydir : "", i + 1);
+ if (ret < 0 || ret >= sizeof(path)) {
+ ERROR("pathname too long for ttys");
+ return -1;
+ }
+ if (uid_shift_file(path, conf)) {
+ DEBUG("Failed to chown pty %s.\n", path);
+ return -1;
+ }
+ }
+ return 0;
+}
+
+int lxc_setup(const char *name, struct lxc_conf *lxc_conf, const char *lxcpath, struct cgroup_process_info *cgroup_info)
+{
if (setup_utsname(lxc_conf->utsname)) {
ERROR("failed to setup the utsname for '%s'", name);
return -1;
return -1;
}
- if (run_lxc_hooks(name, "pre-mount", lxc_conf)) {
+ if (run_lxc_hooks(name, "pre-mount", lxc_conf, lxcpath, NULL)) {
ERROR("failed to run pre-mount hooks for container '%s'.", name);
return -1;
}
}
}
+ /* do automatic mounts (mainly /proc and /sys), but exclude
+ * those that need to wait until other stuff has finished
+ */
+ if (lxc_mount_auto_mounts(lxc_conf, lxc_conf->auto_mounts & ~LXC_AUTO_CGROUP & ~LXC_AUTO_PROC_SYSRQ, cgroup_info) < 0) {
+ ERROR("failed to setup the automatic mounts for '%s'", name);
+ return -1;
+ }
+
if (setup_mount(&lxc_conf->rootfs, lxc_conf->fstab, name)) {
ERROR("failed to setup the mounts for '%s'", name);
return -1;
}
- if (setup_mount_entries(&lxc_conf->rootfs, &lxc_conf->mount_list, name)) {
+ if (!lxc_list_empty(&lxc_conf->mount_list) && setup_mount_entries(&lxc_conf->rootfs, &lxc_conf->mount_list, name)) {
ERROR("failed to setup the mount entries for '%s'", name);
return -1;
}
- if (run_lxc_hooks(name, "mount", lxc_conf)) {
+ /* now mount only cgroup, if wanted;
+ * before, /sys could not have been mounted
+ * (is either mounted automatically or via fstab entries)
+ */
+ if (lxc_mount_auto_mounts(lxc_conf, lxc_conf->auto_mounts & LXC_AUTO_CGROUP, cgroup_info) < 0) {
+ ERROR("failed to setup the automatic mounts for '%s'", name);
+ return -1;
+ }
+
+ if (run_lxc_hooks(name, "mount", lxc_conf, lxcpath, NULL)) {
ERROR("failed to run mount hooks for container '%s'.", name);
return -1;
}
if (lxc_conf->autodev) {
+ if (run_lxc_hooks(name, "autodev", lxc_conf, lxcpath, NULL)) {
+ ERROR("failed to run autodev hooks for container '%s'.", name);
+ return -1;
+ }
if (setup_autodev(lxc_conf->rootfs.mount)) {
ERROR("failed to populate /dev in the container");
return -1;
}
}
- if (setup_cgroup(name, &lxc_conf->cgroup)) {
- ERROR("failed to setup the cgroups for '%s'", name);
+ /* over-mount /proc/sysrq-trigger with /dev/null now, if wanted;
+ * before /dev/null did not necessarily exist
+ */
+ if (lxc_mount_auto_mounts(lxc_conf, lxc_conf->auto_mounts & LXC_AUTO_PROC_SYSRQ, cgroup_info) < 0) {
+ ERROR("failed to setup the automatic mounts for '%s'", name);
return -1;
}
- if (setup_console(&lxc_conf->rootfs, &lxc_conf->console, lxc_conf->ttydir)) {
+ if (!lxc_conf->is_execute && setup_console(&lxc_conf->rootfs, &lxc_conf->console, lxc_conf->ttydir)) {
ERROR("failed to setup the console for '%s'", name);
return -1;
}
- if (setup_kmsg(&lxc_conf->rootfs, &lxc_conf->console)) // don't fail
- ERROR("failed to setup kmsg for '%s'", name);
+ if (lxc_conf->kmsg) {
+ if (setup_kmsg(&lxc_conf->rootfs, &lxc_conf->console)) // don't fail
+ ERROR("failed to setup kmsg for '%s'", name);
+ }
- if (setup_tty(&lxc_conf->rootfs, &lxc_conf->tty_info, lxc_conf->ttydir)) {
+ if (!lxc_conf->is_execute && setup_tty(&lxc_conf->rootfs, &lxc_conf->tty_info, lxc_conf->ttydir)) {
ERROR("failed to setup the ttys for '%s'", name);
return -1;
}
-#if HAVE_APPARMOR /* || HAVE_SMACK || HAVE_SELINUX */
- INFO("rootfs path is .%s., mount is .%s.", lxc_conf->rootfs.path,
- lxc_conf->rootfs.mount);
- if (lxc_conf->rootfs.path == NULL || strlen(lxc_conf->rootfs.path) == 0)
- mounted = 0;
- else
- mounted = lsm_mount_proc_if_needed(lxc_conf->rootfs.path, lxc_conf->rootfs.mount);
- if (mounted == -1) {
- SYSERROR("failed to mount /proc in the container.");
+ /* mount /proc if needed for LSM transition */
+ if (lsm_proc_mount(lxc_conf) < 0) {
+ ERROR("failed to LSM mount proc for '%s'", name);
return -1;
- } else if (mounted == 1) {
- lxc_conf->lsm_umount_proc = 1;
}
-#endif
if (setup_pivot_root(&lxc_conf->rootfs)) {
ERROR("failed to set rootfs for '%s'", name);
return -1;
}
- if (setup_caps(&lxc_conf->caps)) {
- ERROR("failed to drop capabilities");
- return -1;
+ if (lxc_list_empty(&lxc_conf->id_map)) {
+ if (!lxc_list_empty(&lxc_conf->keepcaps)) {
+ if (!lxc_list_empty(&lxc_conf->caps)) {
+ ERROR("Simultaneously requested dropping and keeping caps");
+ return -1;
+ }
+ if (dropcaps_except(&lxc_conf->keepcaps)) {
+ ERROR("failed to keep requested caps\n");
+ return -1;
+ }
+ } else if (setup_caps(&lxc_conf->caps)) {
+ ERROR("failed to drop capabilities");
+ return -1;
+ }
}
NOTICE("'%s' is setup.", name);
return 0;
}
-int run_lxc_hooks(const char *name, char *hook, struct lxc_conf *conf)
+int run_lxc_hooks(const char *name, char *hook, struct lxc_conf *conf,
+ const char *lxcpath, char *argv[])
{
int which = -1;
struct lxc_list *it;
which = LXCHOOK_PREMOUNT;
else if (strcmp(hook, "mount") == 0)
which = LXCHOOK_MOUNT;
+ else if (strcmp(hook, "autodev") == 0)
+ which = LXCHOOK_AUTODEV;
else if (strcmp(hook, "start") == 0)
which = LXCHOOK_START;
else if (strcmp(hook, "post-stop") == 0)
which = LXCHOOK_POSTSTOP;
+ else if (strcmp(hook, "clone") == 0)
+ which = LXCHOOK_CLONE;
else
return -1;
lxc_list_for_each(it, &conf->hooks[which]) {
int ret;
char *hookname = it->elem;
- ret = run_script(name, "lxc", hookname, hook, NULL);
+ ret = run_script_argv(name, "lxc", hookname, hook, lxcpath, argv);
if (ret)
return ret;
}
return 0;
}
+int lxc_clear_idmaps(struct lxc_conf *c)
+{
+ struct lxc_list *it, *next;
+
+ lxc_list_for_each_safe(it, &c->id_map, next) {
+ lxc_list_del(it);
+ free(it->elem);
+ free(it);
+ }
+ return 0;
+}
+
+int lxc_clear_config_keepcaps(struct lxc_conf *c)
+{
+ struct lxc_list *it,*next;
+
+ lxc_list_for_each_safe(it, &c->keepcaps, next) {
+ lxc_list_del(it);
+ free(it->elem);
+ free(it);
+ }
+ return 0;
+}
+
int lxc_clear_cgroups(struct lxc_conf *c, const char *key)
{
struct lxc_list *it,*next;
return;
if (conf->console.path)
free(conf->console.path);
- if (conf->rootfs.mount != default_rootfs_mount)
+ if (conf->rootfs.mount)
free(conf->rootfs.mount);
if (conf->rootfs.path)
free(conf->rootfs.path);
free(conf->ttydir);
if (conf->fstab)
free(conf->fstab);
- if (conf->logfile)
- free(conf->logfile);
+ if (conf->rcfile)
+ free(conf->rcfile);
lxc_clear_config_network(conf);
-#if HAVE_APPARMOR
- if (conf->aa_profile)
- free(conf->aa_profile);
-#endif
+ if (conf->lsm_aa_profile)
+ free(conf->lsm_aa_profile);
+ if (conf->lsm_se_context)
+ free(conf->lsm_se_context);
lxc_seccomp_free(conf);
lxc_clear_config_caps(conf);
+ lxc_clear_config_keepcaps(conf);
lxc_clear_cgroups(conf, "lxc.cgroup");
lxc_clear_hooks(conf, "lxc.hook");
lxc_clear_mount_entries(conf);
lxc_clear_saved_nics(conf);
+ lxc_clear_idmaps(conf);
free(conf);
}