* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
-#define _GNU_SOURCE
-#include "config.h"
-
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE 1
+#endif
#include <arpa/inet.h>
#include <dirent.h>
#include <errno.h>
#include <time.h>
#include <unistd.h>
+#include "af_unix.h"
+#include "caps.h"
+#include "cgroup.h"
+#include "conf.h"
+#include "config.h"
+#include "confile.h"
+#include "confile_utils.h"
+#include "error.h"
+#include "log.h"
+#include "lsm/lsm.h"
+#include "lxclock.h"
+#include "lxcseccomp.h"
+#include "macro.h"
+#include "namespace.h"
+#include "network.h"
+#include "parse.h"
+#include "raw_syscalls.h"
+#include "ringbuf.h"
+#include "start.h"
+#include "storage.h"
+#include "storage/overlay.h"
+#include "syscall_wrappers.h"
+#include "terminal.h"
+#include "utils.h"
+
#ifdef MAJOR_IN_MKDEV
#include <sys/mkdev.h>
#endif
#include <../include/prlimit.h>
#endif
-#include "af_unix.h"
-#include "caps.h"
-#include "cgroup.h"
-#include "conf.h"
-#include "confile_utils.h"
-#include "error.h"
-#include "log.h"
-#include "lsm/lsm.h"
-#include "lxclock.h"
-#include "lxcseccomp.h"
-#include "namespace.h"
-#include "network.h"
-#include "parse.h"
-#include "ringbuf.h"
-#include "start.h"
-#include "storage.h"
-#include "storage/overlay.h"
-#include "terminal.h"
-#include "utils.h"
-
-#ifndef MS_PRIVATE
-#define MS_PRIVATE (1<<18)
-#endif
-
-#ifndef MS_LAZYTIME
-#define MS_LAZYTIME (1<<25)
-#endif
-
lxc_log_define(conf, lxc);
/* The lxc_conf of the container currently being worked on in an API call.
* This is used in the error calls.
*/
#ifdef HAVE_TLS
-__thread struct lxc_conf *current_config;
+thread_local struct lxc_conf *current_config;
#else
struct lxc_conf *current_config;
#endif
-/* Define pivot_root() if missing from the C library */
-#ifndef HAVE_PIVOT_ROOT
-static int pivot_root(const char *new_root, const char *put_old)
-{
-#ifdef __NR_pivot_root
- return syscall(__NR_pivot_root, new_root, put_old);
-#else
- errno = ENOSYS;
- return -1;
-#endif
-}
-#else
-extern int pivot_root(const char *new_root, const char *put_old);
-#endif
-
char *lxchook_names[NUM_LXC_HOOKS] = {
"pre-start",
"pre-mount",
for (i = 0; argv && argv[i]; i++)
size += strlen(argv[i]) + 1;
- size += sizeof("exec");
+ size += STRLITERALLEN("exec");
+ size++;
size += strlen(script);
size++;
size += strlen(p) + 1;
va_end(ap);
- size += strlen("exec");
+ size += STRLITERALLEN("exec");
size += strlen(script);
size += strlen(name);
size += strlen(section);
int pin_rootfs(const char *rootfs)
{
int fd, ret;
- char absrootfs[MAXPATHLEN], absrootfspin[MAXPATHLEN];
+ char absrootfspin[PATH_MAX];
+ char *absrootfs;
struct stat s;
struct statfs sfs;
if (rootfs == NULL || strlen(rootfs) == 0)
return -2;
- if (!realpath(rootfs, absrootfs))
+ absrootfs = realpath(rootfs, NULL);
+ if (!absrootfs)
return -2;
ret = stat(absrootfs, &s);
- if (ret < 0)
+ if (ret < 0) {
+ free(absrootfs);
return -1;
+ }
- if (!S_ISDIR(s.st_mode))
+ if (!S_ISDIR(s.st_mode)) {
+ free(absrootfs);
return -2;
+ }
- ret = snprintf(absrootfspin, MAXPATHLEN, "%s/.lxc-keep", absrootfs);
- if (ret >= MAXPATHLEN)
+ ret = snprintf(absrootfspin, PATH_MAX, "%s/.lxc-keep", absrootfs);
+ free(absrootfs);
+ if (ret < 0 || ret >= PATH_MAX)
return -1;
fd = open(absrootfspin, O_CREAT | O_RDWR, S_IWUSR | S_IRUSR);
#endif
}
+static int add_shmount_to_list(struct lxc_conf *conf)
+{
+ char new_mount[PATH_MAX];
+ /* Offset for the leading '/' since the path_cont
+ * is absolute inside the container.
+ */
+ int offset = 1, ret = -1;
+
+ ret = snprintf(new_mount, sizeof(new_mount),
+ "%s %s none bind,create=dir 0 0", conf->shmount.path_host,
+ conf->shmount.path_cont + offset);
+ if (ret < 0 || (size_t)ret >= sizeof(new_mount))
+ return -1;
+
+ return add_elem_to_mount_list(new_mount, conf);
+}
+
static int lxc_mount_auto_mounts(struct lxc_conf *conf, int flags, struct lxc_handler *handler)
{
int i, r;
{ LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RW, "sysfs", "%r/sys", "sysfs", 0, NULL },
{ LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RO, "sysfs", "%r/sys", "sysfs", MS_RDONLY, NULL },
{ LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, "sysfs", "%r/sys", "sysfs", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL },
- { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, "%r/sys", "%r/sys", NULL, MS_BIND, NULL },
{ LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, NULL, "%r/sys", NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL },
{ LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, "sysfs", "%r/sys/devices/virtual/net", "sysfs", 0, NULL },
{ LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, "%r/sys/devices/virtual/net/devices/virtual/net", "%r/sys/devices/virtual/net", NULL, MS_BIND, NULL },
}
}
+ if (flags & LXC_AUTO_SHMOUNTS_MASK) {
+ int ret = add_shmount_to_list(conf);
+ if (ret < 0) {
+ ERROR("Failed to add shmount entry to container config");
+ return -1;
+ }
+ }
+
return 0;
}
static int lxc_setup_dev_symlinks(const struct lxc_rootfs *rootfs)
{
int i, ret;
- char path[MAXPATHLEN];
+ char path[PATH_MAX];
struct stat s;
for (i = 0; i < sizeof(dev_symlinks) / sizeof(dev_symlinks[0]); i++) {
ret = snprintf(path, sizeof(path), "%s/dev/%s",
rootfs->path ? rootfs->mount : "", d->name);
- if (ret < 0 || ret >= MAXPATHLEN)
+ if (ret < 0 || ret >= PATH_MAX)
return -1;
/* Stat the path first. If we don't get an error accept it as
int i, ret;
const struct lxc_tty_info *ttys = &conf->ttys;
char *ttydir = ttys->dir;
- char path[MAXPATHLEN], lxcpath[MAXPATHLEN];
+ char path[PATH_MAX], lxcpath[PATH_MAX];
if (!conf->rootfs.path)
return 0;
if (ret < 0 || (size_t)ret >= sizeof(lxcpath))
return -1;
- ret = mknod(path, S_IFREG | 0000, 0);
+ ret = mknod(lxcpath, S_IFREG | 0000, 0);
if (ret < 0 && errno != EEXIST) {
SYSERROR("Failed to create \"%s\"", lxcpath);
return -1;
ret = mount(tty->name, lxcpath, "none", MS_BIND, 0);
if (ret < 0) {
- WARN("Failed to bind mount \"%s\" onto \"%s\"",
- tty->name, path);
+ SYSWARN("Failed to bind mount \"%s\" onto \"%s\"",
+ tty->name, lxcpath);
continue;
}
DEBUG("Bind mounted \"%s\" onto \"%s\"", tty->name,
- path);
+ lxcpath);
ret = snprintf(lxcpath, sizeof(lxcpath), "%s/tty%d",
ttydir, i + 1);
int lxc_allocate_ttys(struct lxc_conf *conf)
{
- int i, ret;
+ size_t i;
+ int ret;
struct lxc_tty_info *ttys = &conf->ttys;
/* no tty in the configuration */
tty->slave = -EBADF;
ret = openpty(&tty->master, &tty->slave, NULL, NULL, NULL);
if (ret < 0) {
- SYSERROR("Failed to create tty %d", i);
+ SYSERROR("Failed to create tty %zu", i);
ttys->max = i;
lxc_delete_tty(ttys);
return -ENOTTY;
ret = ttyname_r(tty->slave, tty->name, sizeof(tty->name));
if (ret < 0) {
- SYSERROR("Failed to retrieve name of tty %d slave", i);
+ SYSERROR("Failed to retrieve name of tty %zu slave", i);
ttys->max = i;
lxc_delete_tty(ttys);
return -ENOTTY;
return ret;
}
-static int setup_rootfs_pivot_root(const char *rootfs)
-{
- int ret;
- int newroot = -1, oldroot = -1;
-
- oldroot = open("/", O_DIRECTORY | O_RDONLY);
- if (oldroot < 0) {
- SYSERROR("Failed to open old root directory");
- return -1;
- }
-
- newroot = open(rootfs, O_DIRECTORY | O_RDONLY);
- if (newroot < 0) {
- SYSERROR("Failed to open new root directory");
- goto on_error;
- }
-
- /* change into new root fs */
- ret = fchdir(newroot);
- if (ret < 0) {
- SYSERROR("Failed to change to new rootfs \"%s\"", rootfs);
- goto on_error;
- }
-
- /* pivot_root into our new root fs */
- ret = pivot_root(".", ".");
- if (ret < 0) {
- SYSERROR("Failed to pivot_root()");
- goto on_error;
- }
-
- /* At this point the old-root is mounted on top of our new-root. To
- * unmounted it we must not be chdir'd into it, so escape back to
- * old-root.
- */
- ret = fchdir(oldroot);
- if (ret < 0) {
- SYSERROR("Failed to enter old root directory");
- goto on_error;
- }
-
- /* Make oldroot rslave to make sure our umounts don't propagate to the
- * host.
- */
- ret = mount("", ".", "", MS_SLAVE | MS_REC, NULL);
- if (ret < 0) {
- SYSERROR("Failed to make oldroot rslave");
- goto on_error;
- }
-
- ret = umount2(".", MNT_DETACH);
- if (ret < 0) {
- SYSERROR("Failed to detach old root directory");
- goto on_error;
- }
-
- ret = fchdir(newroot);
- if (ret < 0) {
- SYSERROR("Failed to re-enter new root directory");
- goto on_error;
- }
-
- close(oldroot);
- close(newroot);
-
- DEBUG("pivot_root(\"%s\") successful", rootfs);
-
- return 0;
-
-on_error:
- if (oldroot != -1)
- close(oldroot);
- if (newroot != -1)
- close(newroot);
-
- return -1;
-}
-
/* Just create a path for /dev under $lxcpath/$name and in rootfs If we hit an
* error, log it but don't fail yet.
*/
static int lxc_fill_autodev(const struct lxc_rootfs *rootfs)
{
int i, ret;
- char path[MAXPATHLEN];
+ char path[PATH_MAX];
mode_t cmask;
int use_mknod = LXC_DEVNODE_MKNOD;
- ret = snprintf(path, MAXPATHLEN, "%s/dev",
+ ret = snprintf(path, PATH_MAX, "%s/dev",
rootfs->path ? rootfs->mount : "");
- if (ret < 0 || ret >= MAXPATHLEN)
+ if (ret < 0 || ret >= PATH_MAX)
return -1;
/* ignore, just don't try to fill in */
cmask = umask(S_IXUSR | S_IXGRP | S_IXOTH);
for (i = 0; i < sizeof(lxc_devices) / sizeof(lxc_devices[0]); i++) {
- char hostpath[MAXPATHLEN];
+ char hostpath[PATH_MAX];
const struct lxc_device_node *device = &lxc_devices[i];
- ret = snprintf(path, MAXPATHLEN, "%s/dev/%s",
+ ret = snprintf(path, PATH_MAX, "%s/dev/%s",
rootfs->path ? rootfs->mount : "", device->name);
- if (ret < 0 || ret >= MAXPATHLEN)
+ if (ret < 0 || ret >= PATH_MAX)
return -1;
if (use_mknod >= LXC_DEVNODE_MKNOD) {
}
/* Fallback to bind-mounting the device from the host. */
- ret = snprintf(hostpath, MAXPATHLEN, "/dev/%s", device->name);
- if (ret < 0 || ret >= MAXPATHLEN)
+ ret = snprintf(hostpath, PATH_MAX, "/dev/%s", device->name);
+ if (ret < 0 || ret >= PATH_MAX)
return -1;
ret = safe_mount(hostpath, path, 0, MS_BIND, NULL,
return 0;
}
-static int lxc_setup_rootfs(struct lxc_conf *conf)
+static int lxc_mount_rootfs(struct lxc_conf *conf)
{
int ret;
struct lxc_storage *bdev;
- const struct lxc_rootfs *rootfs;
+ const struct lxc_rootfs *rootfs = &conf->rootfs;
- rootfs = &conf->rootfs;
if (!rootfs->path) {
ret = mount("", "/", NULL, MS_SLAVE | MS_REC, 0);
if (ret < 0) {
- SYSERROR("Failed to make / rslave");
+ SYSERROR("Failed to remount \"/\" MS_REC | MS_SLAVE");
return -1;
}
return 0;
}
-int prepare_ramfs_root(char *root)
+int lxc_chroot(const struct lxc_rootfs *rootfs)
{
int i, ret;
char *p, *p2;
- char buf[LXC_LINELEN], nroot[PATH_MAX];
+ char buf[LXC_LINELEN];
+ char *nroot;
FILE *f;
+ char *root = rootfs->mount;
- if (!realpath(root, nroot))
+ nroot = realpath(root, NULL);
+ if (!nroot) {
+ SYSERROR("Failed to resolve \"%s\"", root);
return -1;
+ }
ret = chdir("/");
- if (ret < 0)
+ if (ret < 0) {
+ free(nroot);
return -1;
+ }
/* We could use here MS_MOVE, but in userns this mount is locked and
* can't be moved.
*/
- ret = mount(root, "/", NULL, MS_REC | MS_BIND, NULL);
+ ret = mount(nroot, "/", NULL, MS_REC | MS_BIND, NULL);
if (ret < 0) {
- SYSERROR("Failed to move \"%s\" into \"/\"", root);
+ SYSERROR("Failed to mount \"%s\" onto \"/\" as MS_REC | MS_BIND", nroot);
+ free(nroot);
return -1;
}
+ free(nroot);
ret = mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, NULL);
if (ret < 0) {
- SYSERROR("Failed to make \"/\" rprivate");
+ SYSERROR("Failed to remount \"/\"");
return -1;
}
- /* The following code cleans up inhereted mounts which are not required
+ /* The following code cleans up inherited mounts which are not required
* for CT.
*
* The mountinfo file shows not all mounts, if a few points have been
f = fopen("./proc/self/mountinfo", "r");
if (!f) {
- SYSERROR("Unable to open /proc/self/mountinfo");
+ SYSERROR("Failed to open \"/proc/self/mountinfo\"");
return -1;
}
/* It is weird, but chdir("..") moves us in a new root */
ret = chdir("..");
if (ret < 0) {
- SYSERROR("Unable to change working directory");
+ SYSERROR("Failed to chdir(\"..\")");
return -1;
}
ret = chroot(".");
if (ret < 0) {
- SYSERROR("Unable to chroot");
+ SYSERROR("Failed to chroot(\".\")");
return -1;
}
return 0;
}
-static int setup_pivot_root(const struct lxc_rootfs *rootfs)
+/* (The following explanation is copied verbatim from the kernel.)
+ *
+ * pivot_root Semantics:
+ * Moves the root file system of the current process to the directory put_old,
+ * makes new_root as the new root file system of the current process, and sets
+ * root/cwd of all processes which had them on the current root to new_root.
+ *
+ * Restrictions:
+ * The new_root and put_old must be directories, and must not be on the
+ * same file system as the current process root. The put_old must be
+ * underneath new_root, i.e. adding a non-zero number of /.. to the string
+ * pointed to by put_old must yield the same directory as new_root. No other
+ * file system may be mounted on put_old. After all, new_root is a mountpoint.
+ *
+ * Also, the current root cannot be on the 'rootfs' (initial ramfs) filesystem.
+ * See Documentation/filesystems/ramfs-rootfs-initramfs.txt for alternatives
+ * in this situation.
+ *
+ * Notes:
+ * - we don't move root/cwd if they are not at the root (reason: if something
+ * cared enough to change them, it's probably wrong to force them elsewhere)
+ * - it's okay to pick a root that isn't the root of a file system, e.g.
+ * /nfs/my_root where /nfs is the mount point. It must be a mountpoint,
+ * though, so you may need to say mount --bind /nfs/my_root /nfs/my_root
+ * first.
+ */
+static int lxc_pivot_root(const char *rootfs)
{
- int ret;
+ int oldroot;
+ int newroot = -1, ret = -1;
- if (!rootfs->path) {
- DEBUG("Container does not have a rootfs");
- return 0;
+ oldroot = open("/", O_DIRECTORY | O_RDONLY | O_CLOEXEC);
+ if (oldroot < 0) {
+ SYSERROR("Failed to open old root directory");
+ return -1;
}
- if (detect_ramfs_rootfs()) {
- DEBUG("Detected that container is on ramfs");
+ newroot = open(rootfs, O_DIRECTORY | O_RDONLY | O_CLOEXEC);
+ if (newroot < 0) {
+ SYSERROR("Failed to open new root directory");
+ goto on_error;
+ }
- ret = prepare_ramfs_root(rootfs->mount);
- if (ret < 0) {
- ERROR("Failed to prepare minimal ramfs root");
- return -1;
- }
+ /* change into new root fs */
+ ret = fchdir(newroot);
+ if (ret < 0) {
+ ret = -1;
+ SYSERROR("Failed to change to new rootfs \"%s\"", rootfs);
+ goto on_error;
+ }
- DEBUG("Prepared ramfs root for container");
- return 0;
+ /* pivot_root into our new root fs */
+ ret = pivot_root(".", ".");
+ if (ret < 0) {
+ ret = -1;
+ SYSERROR("Failed to pivot_root()");
+ goto on_error;
}
- ret = setup_rootfs_pivot_root(rootfs->mount);
+ /* At this point the old-root is mounted on top of our new-root. To
+ * unmounted it we must not be chdir'd into it, so escape back to
+ * old-root.
+ */
+ ret = fchdir(oldroot);
if (ret < 0) {
- ERROR("Failed to pivot_root()");
- return -1;
+ ret = -1;
+ SYSERROR("Failed to enter old root directory");
+ goto on_error;
}
- DEBUG("Finished pivot_root()");
- return 0;
+ /* Make oldroot rslave to make sure our umounts don't propagate to the
+ * host.
+ */
+ ret = mount("", ".", "", MS_SLAVE | MS_REC, NULL);
+ if (ret < 0) {
+ ret = -1;
+ SYSERROR("Failed to make oldroot rslave");
+ goto on_error;
+ }
+
+ ret = umount2(".", MNT_DETACH);
+ if (ret < 0) {
+ ret = -1;
+ SYSERROR("Failed to detach old root directory");
+ goto on_error;
+ }
+
+ ret = fchdir(newroot);
+ if (ret < 0) {
+ ret = -1;
+ SYSERROR("Failed to re-enter new root directory");
+ goto on_error;
+ }
+
+ ret = 0;
+
+ TRACE("pivot_root(\"%s\") successful", rootfs);
+
+on_error:
+ close(oldroot);
+
+ if (newroot >= 0)
+ close(newroot);
+
+ return ret;
}
-static const struct id_map *find_mapped_nsid_entry(struct lxc_conf *conf, unsigned id,
- enum idtype idtype)
+static int lxc_setup_rootfs_switch_root(const struct lxc_rootfs *rootfs)
+{
+ if (!rootfs->path) {
+ DEBUG("Container does not have a rootfs");
+ return 0;
+ }
+
+ if (detect_ramfs_rootfs())
+ return lxc_chroot(rootfs);
+
+ return lxc_pivot_root(rootfs->mount);
+}
+
+static const struct id_map *find_mapped_nsid_entry(struct lxc_conf *conf,
+ unsigned id,
+ enum idtype idtype)
{
struct lxc_list *it;
struct id_map *map;
static int lxc_setup_devpts(struct lxc_conf *conf)
{
int ret;
- const char *default_devpts_mntopts = "gid=5,newinstance,ptmxmode=0666,mode=0620";
+ char **opts;
char devpts_mntopts[256];
+ char *mntopt_sets[5];
+ char default_devpts_mntopts[256] = "gid=5,newinstance,ptmxmode=0666,mode=0620";
if (conf->pty_max <= 0) {
DEBUG("No new devpts instance will be mounted since no pts "
return -1;
}
- /* mount new devpts instance */
- ret = mount("devpts", "/dev/pts", "devpts", MS_NOSUID | MS_NOEXEC, devpts_mntopts);
+ /* gid=5 && max= */
+ mntopt_sets[0] = devpts_mntopts;
+
+ /* !gid=5 && max= */
+ mntopt_sets[1] = devpts_mntopts + STRLITERALLEN("gid=5") + 1;
+
+ /* gid=5 && !max= */
+ mntopt_sets[2] = default_devpts_mntopts;
+
+ /* !gid=5 && !max= */
+ mntopt_sets[3] = default_devpts_mntopts + STRLITERALLEN("gid=5") + 1;
+
+ /* end */
+ mntopt_sets[4] = NULL;
+
+ for (ret = -1, opts = mntopt_sets; opts && *opts; opts++) {
+ /* mount new devpts instance */
+ ret = mount("devpts", "/dev/pts", "devpts", MS_NOSUID | MS_NOEXEC, *opts);
+ if (ret == 0)
+ break;
+ }
+
if (ret < 0) {
- /* try mounting without gid=5 */
- ret = mount("devpts", "/dev/pts", "devpts",
- MS_NOSUID | MS_NOEXEC, devpts_mntopts + sizeof("gid=5"));
- if (ret < 0) {
- SYSERROR("Failed to mount new devpts instance");
- return -1;
- }
+ SYSERROR("Failed to mount new devpts instance");
+ return -1;
}
- DEBUG("Mount new devpts instance with options \"%s\"", devpts_mntopts);
+ DEBUG("Mount new devpts instance with options \"%s\"", *opts);
/* Remove any pre-existing /dev/ptmx file. */
ret = remove("/dev/ptmx");
const struct lxc_terminal *console)
{
int ret;
- char path[MAXPATHLEN];
+ char path[PATH_MAX];
char *rootfs_path = rootfs->path ? rootfs->mount : "";
if (console->path && !strcmp(console->path, "none"))
return -errno;
}
- ret = fchmod(console->slave, S_IXUSR | S_IXGRP | S_IXOTH);
+ ret = fchmod(console->slave, S_IXUSR | S_IXGRP);
if (ret < 0) {
SYSERROR("Failed to set mode \"0%o\" to \"%s\"",
- S_IXUSR | S_IXGRP | S_IXOTH, console->name);
+ S_IXUSR | S_IXGRP, console->name);
return -errno;
}
char *ttydir)
{
int ret;
- char path[MAXPATHLEN], lxcpath[MAXPATHLEN];
+ char path[PATH_MAX], lxcpath[PATH_MAX];
char *rootfs_path = rootfs->path ? rootfs->mount : "";
if (console->path && !strcmp(console->path, "none"))
return -errno;
}
- ret = fchmod(console->slave, S_IXUSR | S_IXGRP | S_IXOTH);
+ ret = fchmod(console->slave, S_IXUSR | S_IXGRP);
if (ret < 0) {
SYSERROR("Failed to set mode \"0%o\" to \"%s\"",
- S_IXUSR | S_IXGRP | S_IXOTH, console->name);
+ S_IXUSR | S_IXGRP, console->name);
return -errno;
}
int parse_mntopts(const char *mntopts, unsigned long *mntflags, char **mntdata)
{
char *data, *p, *s;
- char *saveptr = NULL;
size_t size;
*mntdata = NULL;
}
*data = 0;
- for (; (p = strtok_r(s, ",", &saveptr)); s = NULL)
+ lxc_iterate_parts(p, s, ",")
parse_mntopt(p, mntflags, &data, size);
if (*data)
}
}
-static int parse_propagationopts(const char *mntopts, unsigned long *pflags)
+int parse_propagationopts(const char *mntopts, unsigned long *pflags)
{
char *p, *s;
- char *saveptr = NULL;
if (!mntopts)
return 0;
}
*pflags = 0L;
- for (; (p = strtok_r(s, ",", &saveptr)); s = NULL)
+ lxc_iterate_parts(p, s, ",")
parse_propagationopt(p, pflags);
free(s);
bool dev, bool relative, const char *rootfs)
{
int ret;
- char srcbuf[MAXPATHLEN];
+ char srcbuf[PATH_MAX];
const char *srcpath = fsname;
#ifdef HAVE_STATVFS
struct statvfs sb;
#endif
if (relative) {
- ret = snprintf(srcbuf, MAXPATHLEN, "%s/%s", rootfs ? rootfs : "/", fsname ? fsname : "");
- if (ret < 0 || ret >= MAXPATHLEN) {
+ ret = snprintf(srcbuf, PATH_MAX, "%s/%s", rootfs ? rootfs : "/", fsname ? fsname : "");
+ if (ret < 0 || ret >= PATH_MAX) {
ERROR("source path is too long");
return -1;
}
static inline int mount_entry_on_systemfs(struct mntent *mntent)
{
int ret;
- char path[MAXPATHLEN];
+ char path[PATH_MAX];
/* For containers created without a rootfs all mounts are treated as
* absolute paths starting at / on the host.
int offset;
char *aux;
const char *lxcpath;
- char path[MAXPATHLEN];
+ char path[PATH_MAX];
int ret = 0;
lxcpath = lxc_global_config_value("lxc.lxcpath");
/* If rootfs->path is a blockdev path, allow container fstab to use
* <lxcpath>/<name>/rootfs" as the target prefix.
*/
- ret = snprintf(path, MAXPATHLEN, "%s/%s/rootfs", lxcpath, lxc_name);
- if (ret < 0 || ret >= MAXPATHLEN)
+ ret = snprintf(path, PATH_MAX, "%s/%s/rootfs", lxcpath, lxc_name);
+ if (ret < 0 || ret >= PATH_MAX)
goto skipvarlib;
aux = strstr(mntent->mnt_dir, path);
offset = strlen(rootfs->path);
skipabs:
- ret = snprintf(path, MAXPATHLEN, "%s/%s", rootfs->mount, aux + offset);
- if (ret < 0 || ret >= MAXPATHLEN)
+ ret = snprintf(path, PATH_MAX, "%s/%s", rootfs->mount, aux + offset);
+ if (ret < 0 || ret >= PATH_MAX)
return -1;
return mount_entry_on_generic(mntent, path, rootfs, lxc_name, lxc_path);
const char *lxc_path)
{
int ret;
- char path[MAXPATHLEN];
+ char path[PATH_MAX];
/* relative to root mount point */
ret = snprintf(path, sizeof(path), "%s/%s", rootfs->mount, mntent->mnt_dir);
return ret;
}
-FILE *make_anonymous_mount_file(struct lxc_list *mount)
+/*
+ * In order for nested containers to be able to mount /proc and /sys they need
+ * to see a "pure" proc and sysfs mount points with nothing mounted on top
+ * (like lxcfs).
+ * For this we provide proc and sysfs in /dev/.lxc/{proc,sys} while using an
+ * apparmor rule to deny access to them. This is mostly for convenience: The
+ * container's root user can mount them anyway and thus has access to the two
+ * file systems. But a non-root user in the container should not be allowed to
+ * access them as a side effect without explicitly allowing it.
+ */
+static const char nesting_helpers[] =
+"proc dev/.lxc/proc proc create=dir,optional\n"
+"sys dev/.lxc/sys sysfs create=dir,optional\n";
+
+FILE *make_anonymous_mount_file(struct lxc_list *mount,
+ bool include_nesting_helpers)
{
int ret;
char *mount_entry;
TRACE("Created temporary mount file");
}
- if (fd < 0) {
- SYSERROR("Could not create temporary mount file");
- return NULL;
- }
lxc_list_for_each (iterator, mount) {
size_t len;
goto on_error;
}
+ if (include_nesting_helpers) {
+ ret = lxc_write_nointr(fd, nesting_helpers,
+ STRARRAYLEN(nesting_helpers));
+ if (ret != STRARRAYLEN(nesting_helpers))
+ goto on_error;
+ }
+
ret = lseek(fd, 0, SEEK_SET);
if (ret < 0)
goto on_error;
int ret;
FILE *f;
- f = make_anonymous_mount_file(mount);
+ f = make_anonymous_mount_file(mount, conf->lsm_aa_allow_nesting);
if (!f)
return -1;
return -1;
}
- ret = prctl(PR_CAPBSET_DROP, capid, 0, 0, 0);
+ ret = prctl(PR_CAPBSET_DROP, prctl_arg(capid), prctl_arg(0),
+ prctl_arg(0), prctl_arg(0));
if (ret < 0) {
SYSERROR("Failed to remove %s capability", drop_entry);
return -1;
if (caplist[i])
continue;
- ret = prctl(PR_CAPBSET_DROP, i, 0, 0, 0);
+ ret = prctl(PR_CAPBSET_DROP, prctl_arg(i), prctl_arg(0),
+ prctl_arg(0), prctl_arg(0));
if (ret < 0) {
SYSERROR("Failed to remove capability %d", i);
return -1;
SYSERROR("Failed to set limit %s", lim->resource);
return -1;
}
+
+ TRACE("Setup \"%s\" limit", lim->resource);
#else
- ERROR("Cannot set limit %s as prlimit is missing", lim->resource);
+ ERROR("Cannot set limit \"%s\" as prlimit is missing", lim->resource);
return -1;
#endif
}
struct lxc_sysctl *elem;
int ret = 0;
char *tmp = NULL;
- char filename[MAXPATHLEN] = {0};
+ char filename[PATH_MAX] = {0};
lxc_list_for_each (it, sysctls) {
elem = it->elem;
struct lxc_proc *elem;
int ret = 0;
char *tmp = NULL;
- char filename[MAXPATHLEN] = {0};
+ char filename[PATH_MAX] = {0};
lxc_list_for_each (it, procs) {
elem = it->elem;
free(new);
return NULL;
}
+ new->rootfs.managed = true;
new->logfd = -1;
lxc_list_init(&new->cgroup);
lxc_list_init(&new->cgroup2);
lxc_list_init(&new->groups);
lxc_list_init(&new->state_clients);
new->lsm_aa_profile = NULL;
+ lxc_list_init(&new->lsm_aa_raw);
new->lsm_se_context = NULL;
new->tmp_umount_proc = false;
+ new->tmp_umount_proc = 0;
+ new->shmount.path_host = NULL;
+ new->shmount.path_cont = NULL;
/* if running in a new user namespace, init and COMMAND
* default to running as UID/GID 0 when using lxc-execute */
size_t buf_size)
{
int fd, ret;
- char path[MAXPATHLEN];
+ char path[PATH_MAX];
if (geteuid() != 0 && idtype == ID_TYPE_GID) {
size_t buflen;
- ret = snprintf(path, MAXPATHLEN, "/proc/%d/setgroups", pid);
- if (ret < 0 || ret >= MAXPATHLEN)
+ ret = snprintf(path, PATH_MAX, "/proc/%d/setgroups", pid);
+ if (ret < 0 || ret >= PATH_MAX)
return -E2BIG;
fd = open(path, O_WRONLY);
}
if (fd >= 0) {
- buflen = sizeof("deny\n") - 1;
+ buflen = STRLITERALLEN("deny\n");
errno = 0;
ret = lxc_write_nointr(fd, "deny\n", buflen);
close(fd);
}
}
- ret = snprintf(path, MAXPATHLEN, "/proc/%d/%cid_map", pid,
+ ret = snprintf(path, PATH_MAX, "/proc/%d/%cid_map", pid,
idtype == ID_TYPE_UID ? 'u' : 'g');
- if (ret < 0 || ret >= MAXPATHLEN)
+ if (ret < 0 || ret >= PATH_MAX)
return -E2BIG;
fd = open(path, O_WRONLY);
* of the doubt. Otherwise we might fail even though all the necessary
* file capabilities are set.
*/
- DEBUG("Cannot check for file capabilites as full capability support is "
+ DEBUG("Cannot check for file capabilities as full capability support is "
"missing. Manual intervention needed");
fret = 1;
#endif
int fill, left;
char u_or_g;
char *pos;
- char cmd_output[MAXPATHLEN];
+ char cmd_output[PATH_MAX];
struct id_map *map;
struct lxc_list *iterator;
enum idtype type;
* +
* strlen(" ") = 1
* +
- * LXC_NUMSTRLEN64
+ * INTTYPE_TO_STRLEN(uint32_t)
* +
* strlen(" ") = 1
*
* LXC_IDMAPLEN bytes available for our the {g,u]id mapping.
*/
int ret = 0, gidmap = 0, uidmap = 0;
- char mapbuf[9 + 1 + LXC_NUMSTRLEN64 + 1 + LXC_IDMAPLEN] = {0};
+ char mapbuf[9 + 1 + INTTYPE_TO_STRLEN(uint32_t) + 1 + LXC_IDMAPLEN] = {0};
bool had_entry = false, use_shadow = false;
int hostuid, hostgid;
if (!had_entry)
continue;
- /* Try to catch the ouput of new{g,u}idmap to make debugging
+ /* Try to catch the output of new{g,u}idmap to make debugging
* easier.
*/
if (use_shadow) {
"-m", map5,
"--", "chown", ugid, path,
NULL};
- char cmd_output[MAXPATHLEN];
+ char cmd_output[PATH_MAX];
hostuid = geteuid();
hostgid = getegid();
}
}
-#define __LXC_SENDFILE_MAX 0x7ffff000 /* maximum number of bytes sendfile can handle */
again:
- copied = sendfile(memfd, mntinfo_fd, NULL, __LXC_SENDFILE_MAX);
+ copied = lxc_sendfile_nointr(memfd, mntinfo_fd, NULL, LXC_SENDFILE_MAX);
if (copied < 0) {
if (errno == EINTR)
goto again;
/* This does the work of remounting / if it is shared, calling the container
* pre-mount hooks, and mounting the rootfs.
*/
-int do_rootfs_setup(struct lxc_conf *conf, const char *name, const char *lxcpath)
+int lxc_setup_rootfs_prepare_root(struct lxc_conf *conf, const char *name,
+ const char *lxcpath)
{
int ret;
return -1;
}
- ret = lxc_setup_rootfs(conf);
+ ret = lxc_mount_rootfs(conf);
if (ret < 0) {
ERROR("Failed to setup rootfs for");
return -1;
static bool verify_start_hooks(struct lxc_conf *conf)
{
- char path[MAXPATHLEN];
+ char path[PATH_MAX];
struct lxc_list *it;
lxc_list_for_each (it, &conf->hooks[LXCHOOK_START]) {
int ret;
- struct stat st;
char *hookname = it->elem;
- ret = snprintf(path, MAXPATHLEN, "%s%s",
+ ret = snprintf(path, PATH_MAX, "%s%s",
conf->rootfs.path ? conf->rootfs.mount : "",
hookname);
- if (ret < 0 || ret >= MAXPATHLEN)
+ if (ret < 0 || ret >= PATH_MAX)
return false;
- ret = stat(path, &st);
+ ret = access(path, X_OK);
if (ret < 0) {
- SYSERROR("Start hook %s not found in container",
+ SYSERROR("Start hook \"%s\" not found in container",
hookname);
return false;
}
static bool execveat_supported(void)
{
-#ifdef __NR_execveat
- /*
- * We use the syscall here, because it was introduced in kernel 3.19,
- * while glibc got support for using the syscall much later, in 2.27.
- * We don't want to use glibc because it falls back to /proc, and the
- * container may not have /proc mounted depending on its configuration.
- */
- syscall(__NR_execveat, -1, "", NULL, NULL, AT_EMPTY_PATH);
+ lxc_raw_execveat(-1, "", NULL, NULL, AT_EMPTY_PATH);
if (errno == ENOSYS)
return false;
return true;
-#else
- return false;
-#endif
}
int lxc_setup(struct lxc_handler *handler)
const char *lxcpath = handler->lxcpath, *name = handler->name;
struct lxc_conf *lxc_conf = handler->conf;
- ret = do_rootfs_setup(lxc_conf, name, lxcpath);
+ ret = lxc_setup_rootfs_prepare_root(lxc_conf, name, lxcpath);
if (ret < 0) {
ERROR("Failed to setup rootfs");
return -1;
}
}
+ ret = lxc_setup_keyring();
+ if (ret < 0)
+ return -1;
+
ret = lxc_setup_network_in_child_namespaces(lxc_conf, &lxc_conf->network);
if (ret < 0) {
ERROR("Failed to setup network");
return -1;
}
- /* Make sure any start hooks are in the container */
- if (!verify_start_hooks(lxc_conf))
- return -1;
-
if (lxc_conf->is_execute) {
if (execveat_supported()) {
int fd;
}
}
+ /* Make sure any start hooks are in the container */
+ if (!verify_start_hooks(lxc_conf)) {
+ ERROR("Failed to verify start hooks");
+ return -1;
+ }
+
ret = lxc_setup_console(&lxc_conf->rootfs, &lxc_conf->console,
lxc_conf->ttys.dir);
if (ret < 0) {
return -1;
}
- ret = setup_pivot_root(&lxc_conf->rootfs);
+ ret = lxc_setup_rootfs_switch_root(&lxc_conf->rootfs);
if (ret < 0) {
ERROR("Failed to pivot root into rootfs");
return -1;
if (version == CGROUP2_SUPER_MAGIC) {
global_token = "lxc.cgroup2";
namespaced_token = "lxc.cgroup2.";
- namespaced_token_len = sizeof("lxc.cgroup2.") - 1;
+ namespaced_token_len = STRLITERALLEN("lxc.cgroup2.");
list = &c->cgroup2;
} else if (version == CGROUP_SUPER_MAGIC) {
global_token = "lxc.cgroup";
namespaced_token = "lxc.cgroup.";
- namespaced_token_len = sizeof("lxc.cgroup.") - 1;
+ namespaced_token_len = STRLITERALLEN("lxc.cgroup.");
list = &c->cgroup;
} else {
return -EINVAL;
if (strcmp(key, global_token) == 0)
all = true;
- else if (strncmp(key, namespaced_token, sizeof(namespaced_token) - 1) == 0)
+ else if (strncmp(key, namespaced_token, namespaced_token_len) == 0)
k += namespaced_token_len;
else
return -EINVAL;
if (strcmp(key, "lxc.limit") == 0 || strcmp(key, "lxc.prlimit") == 0)
all = true;
- else if (strncmp(key, "lxc.limit.", sizeof("lxc.limit.") - 1) == 0)
- k = key + sizeof("lxc.limit.") - 1;
- else if (strncmp(key, "lxc.prlimit.", sizeof("lxc.prlimit.") - 1) == 0)
- k = key + sizeof("lxc.prlimit.") - 1;
+ else if (strncmp(key, "lxc.limit.", STRLITERALLEN("lxc.limit.")) == 0)
+ k = key + STRLITERALLEN("lxc.limit.");
+ else if (strncmp(key, "lxc.prlimit.", STRLITERALLEN("lxc.prlimit.")) == 0)
+ k = key + STRLITERALLEN("lxc.prlimit.");
else
return -1;
if (strcmp(key, "lxc.sysctl") == 0)
all = true;
- else if (strncmp(key, "lxc.sysctl.", sizeof("lxc.sysctl.") - 1) == 0)
- k = key + sizeof("lxc.sysctl.") - 1;
+ else if (strncmp(key, "lxc.sysctl.", STRLITERALLEN("lxc.sysctl.")) == 0)
+ k = key + STRLITERALLEN("lxc.sysctl.");
else
return -1;
if (strcmp(key, "lxc.proc") == 0)
all = true;
- else if (strncmp(key, "lxc.proc.", sizeof("lxc.proc.") - 1) == 0)
- k = key + sizeof("lxc.proc.") - 1;
+ else if (strncmp(key, "lxc.proc.", STRLITERALLEN("lxc.proc.")) == 0)
+ k = key + STRLITERALLEN("lxc.proc.");
else
return -1;
if (strcmp(key, "lxc.hook") == 0)
all = true;
- else if (strncmp(key, "lxc.hook.", sizeof("lxc.hook.") - 1) == 0)
- k = key + sizeof("lxc.hook.") - 1;
+ else if (strncmp(key, "lxc.hook.", STRLITERALLEN("lxc.hook.")) == 0)
+ k = key + STRLITERALLEN("lxc.hook.");
else
return -1;
}
}
+int lxc_clear_apparmor_raw(struct lxc_conf *c)
+{
+ struct lxc_list *it, *next;
+
+ lxc_list_for_each_safe (it, &c->lsm_aa_raw, next) {
+ lxc_list_del(it);
+ free(it->elem);
+ free(it);
+ }
+
+ return 0;
+}
+
void lxc_conf_free(struct lxc_conf *conf)
{
if (!conf)
free(conf->syslog);
lxc_free_networks(&conf->network);
free(conf->lsm_aa_profile);
+ free(conf->lsm_aa_profile_computed);
free(conf->lsm_se_context);
lxc_seccomp_free(conf);
lxc_clear_config_caps(conf);
lxc_clear_limits(conf, "lxc.prlimit");
lxc_clear_sysctls(conf, "lxc.sysctl");
lxc_clear_procs(conf, "lxc.proc");
+ lxc_clear_apparmor_raw(conf);
free(conf->cgroup_meta.dir);
free(conf->cgroup_meta.controllers);
+ free(conf->shmount.path_host);
+ free(conf->shmount.path_cont);
free(conf);
}
static int run_userns_fn(void *data)
{
+ int ret;
char c;
struct userns_fn_data *d = data;
/* Wait for parent to finish establishing a new mapping in the user
* namespace we are executing in.
*/
- if (lxc_read_nointr(d->p[0], &c, 1) != 1)
- return -1;
-
+ ret = lxc_read_nointr(d->p[0], &c, 1);
/* Close read end of the pipe. */
close(d->p[0]);
+ if (ret != 1)
+ return -1;
if (d->fn_name)
- TRACE("calling function \"%s\"", d->fn_name);
+ TRACE("Calling function \"%s\"", d->fn_name);
/* Call function to run. */
return d->fn(d->arg);
* - the container root {g,u}id as seen from the host > user's host {g,u}id
* - the container root -> some sub{g,u}id
* The former we add, if the user did not specifiy a mapping. The latter we
- * retrieve from the ontainer's configured {g,u}id mappings as it must have been
+ * retrieve from the container's configured {g,u}id mappings as it must have been
* there to start the container in the first place.
*/
int userns_exec_1(struct lxc_conf *conf, int (*fn)(void *), void *data,
if (!idmap)
return -1;
- ret = pipe(p);
+ ret = pipe2(p, O_CLOEXEC);
if (ret < 0) {
SYSERROR("Failed to create pipe");
return -1;
if (!conf)
return -EINVAL;
- ret = pipe(p);
+ ret = pipe2(p, O_CLOEXEC);
if (ret < 0) {
SYSERROR("opening pipe");
return -1;