#include <sys/capability.h>
#endif
-#if !HAVE_DECL_PR_CAPBSET_DROP
+#ifndef HAVE_DECL_PR_CAPBSET_DROP
#define PR_CAPBSET_DROP 24
#endif
+#ifndef HAVE_DECL_PR_SET_NO_NEW_PRIVS
+#define PR_SET_NO_NEW_PRIVS 38
+#endif
+
+#ifndef HAVE_DECL_PR_GET_NO_NEW_PRIVS
+#define PR_GET_NO_NEW_PRIVS 39
+#endif
+
#include "af_unix.h"
+#include "bdev.h"
#include "caps.h"
#include "cgroup.h"
#include "commands.h"
#include "start.h"
#include "sync.h"
#include "utils.h"
-#include "bdev/bdev.h"
#include "lsm/lsm.h"
lxc_log_define(lxc_start, lxc);
*/
int lxc_check_inherited(struct lxc_conf *conf, bool closeall, int fd_to_ignore)
{
- struct dirent dirent, *direntp;
+ struct dirent *direntp;
int fd, fddir;
DIR *dir;
fddir = dirfd(dir);
- while (!readdir_r(dir, &dirent, &direntp)) {
+ while ((direntp = readdir(dir))) {
if (!direntp)
break;
WARN("inherited fd %d", fd);
}
+ /*
+ * only enable syslog at this point to avoid the above logging function
+ * to open a new fd and make the check_inherited function enter an
+ * infinite loop.
+ */
+ lxc_log_enable_syslog();
+
closedir(dir); /* cannot fail */
return 0;
}
handler->nsfd[i] = -1;
}
}
+
+ if (handler->netnsfd >= 0) {
+ close(handler->netnsfd);
+ handler->netnsfd = -1;
+ }
+
lxc_set_state(name, handler, STOPPED);
if (run_lxc_hooks(name, "post-stop", handler->conf, handler->lxcpath, NULL)) {
if (ret < 0) {
SYSERROR("Error unsharing network namespace");
goto out_warn_father;
+ } else {
+ INFO("Unshared NET namespace.");
}
}
if (lxc_sync_barrier_parent(handler, LXC_SYNC_CGROUP))
goto out_error;
+ /* Unshare cgroup namespace after we have setup our cgroups. If we do it
+ * earlier we end up with a wrong view of /proc/self/cgroup. For
+ * example, assume we unshare(CLONE_NEWCGROUP) first, and then create
+ * the cgroup for the container, say /sys/fs/cgroup/cpuset/lxc/c, then
+ * /proc/self/cgroup would show us:
+ *
+ * 8:cpuset:/lxc/c
+ *
+ * whereas it should actually show
+ *
+ * 8:cpuset:/
+ */
+ if (cgns_supported()) {
+ if (unshare(CLONE_NEWCGROUP) < 0) {
+ INFO("Failed to unshare CLONE_NEWCGROUP.");
+ goto out_warn_father;
+ }
+ INFO("Unshared CLONE_NEWCGROUP.");
+ }
+
/* Set the label to change to when we exec(2) the container's init */
if (lsm_process_label_set(NULL, handler->conf, 1, 1) < 0)
goto out_warn_father;
+ /* Set PR_SET_NO_NEW_PRIVS after we changed the lsm label. If we do it
+ * before we aren't allowed anymore. */
+ if (handler->conf->no_new_privs) {
+ if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
+ SYSERROR("Could not set PR_SET_NO_NEW_PRIVS to block execve() gainable privileges.");
+ goto out_warn_father;
+ }
+ DEBUG("Set PR_SET_NO_NEW_PRIVS to block execve() gainable privileges.");
+ }
+
/* Some init's such as busybox will set sane tty settings on stdin,
* stdout, stderr which it thinks is the console. We already set them
* the way we wanted on the real terminal, and we want init to do its
devnull_fd = -1;
}
- if (cgns_supported() && unshare(CLONE_NEWCGROUP) != 0) {
- SYSERROR("Failed to unshare cgroup namespace");
- goto out_warn_father;
- }
-
setsid();
/* after this call, we are in error because this
void resolve_clone_flags(struct lxc_handler *handler)
{
handler->clone_flags = CLONE_NEWPID | CLONE_NEWNS;
+ INFO("Adding CLONE_NEWPID to clone flags.");
+ INFO("Adding CLONE_NEWNS to clone flags.");
if (!lxc_list_empty(&handler->conf->id_map)) {
- INFO("Cloning a new user namespace");
+ INFO("Adding CLONE_NEWUSER to clone flags.");
handler->clone_flags |= CLONE_NEWUSER;
}
if (handler->conf->inherit_ns_fd[LXC_NS_NET] == -1) {
- if (!lxc_requests_empty_network(handler))
+ if (!lxc_requests_empty_network(handler)) {
+ INFO("Adding CLONE_NEWNET to clone flags.");
handler->clone_flags |= CLONE_NEWNET;
+ }
} else {
- INFO("Inheriting a net namespace");
+ INFO("Inheriting a NET namespace.");
}
if (handler->conf->inherit_ns_fd[LXC_NS_IPC] == -1) {
+ INFO("Adding CLONE_NEWIPC to clone flags.");
handler->clone_flags |= CLONE_NEWIPC;
} else {
- INFO("Inheriting an IPC namespace");
+ INFO("Inheriting an IPC namespace.");
}
if (handler->conf->inherit_ns_fd[LXC_NS_UTS] == -1) {
+ INFO("Adding CLONE_NEWUTS to clone flags.");
handler->clone_flags |= CLONE_NEWUTS;
} else {
- INFO("Inheriting a UTS namespace");
+ INFO("Inheriting a UTS namespace.");
}
}
flags = handler->clone_flags;
if (handler->clone_flags & CLONE_NEWUSER)
flags &= ~CLONE_NEWNET;
- handler->pid = lxc_clone(do_start, handler, handler->clone_flags);
+ handler->pid = lxc_clone(do_start, handler, flags);
if (handler->pid < 0) {
- SYSERROR("failed to fork into a new namespace");
+ SYSERROR("Failed to fork into a set of new namespaces.");
goto out_delete_net;
+ } else {
+ INFO("Cloned a set of new namespaces.");
}
if (!preserve_ns(handler->nsfd, handler->clone_flags | preserve_mask, handler->pid, &errmsg)) {
}
lxc_sync_fini(handler);
+ handler->netnsfd = lxc_preserve_ns(handler->pid, "net");
return 0;
return -1;
}
-int get_netns_fd(int pid)
-{
- char path[MAXPATHLEN];
- int ret, fd;
-
- ret = snprintf(path, MAXPATHLEN, "/proc/%d/ns/net", pid);
- if (ret < 0 || ret >= MAXPATHLEN) {
- WARN("Failed to pin netns file for pid %d", pid);
- return -1;
- }
-
- fd = open(path, O_RDONLY);
- if (fd < 0) {
- WARN("Failed to pin netns file %s for pid %d: %s",
- path, pid, strerror(errno));
- return -1;
- }
- return fd;
-}
-
int __lxc_start(const char *name, struct lxc_conf *conf,
struct lxc_operations* ops, void *data, const char *lxcpath,
bool backgrounded)
struct lxc_handler *handler;
int err = -1;
int status;
- int netnsfd = -1;
handler = lxc_init(name, conf, lxcpath);
if (!handler) {
handler->ops = ops;
handler->data = data;
handler->backgrounded = backgrounded;
+ handler->netnsfd = -1;
if (must_drop_cap_sys_boot(handler->conf)) {
#if HAVE_SYS_CAPABILITY_H
/* if the backing store is a device, mount it here and now */
if (rootfs_is_blockdev(conf)) {
if (unshare(CLONE_NEWNS) < 0) {
- ERROR("Error unsharing mounts");
+ ERROR("Error unsharing MOUNT namespace.");
goto out_fini_nonet;
+ } else {
+ INFO("Unshared MOUNT namespace.");
}
remount_all_slave();
if (do_rootfs_setup(conf, name, lxcpath) < 0) {
handler->conf->reboot = 0;
- netnsfd = get_netns_fd(handler->pid);
-
err = lxc_poll(name, handler);
if (err) {
ERROR("mainloop exited with an error");
- if (netnsfd >= 0)
- close(netnsfd);
+ if (handler->netnsfd >= 0) {
+ close(handler->netnsfd);
+ handler->netnsfd = -1;
+ }
goto out_abort;
}
}
DEBUG("Pushing physical nics back to host namespace");
- lxc_rename_phys_nics_on_shutdown(netnsfd, handler->conf);
+ lxc_restore_phys_nics_to_netns(handler->netnsfd, handler->conf);
DEBUG("Tearing down virtual network devices used by container");
lxc_delete_network(handler);
- if (netnsfd >= 0)
- close(netnsfd);
-
if (handler->pinfd >= 0) {
close(handler->pinfd);
handler->pinfd = -1;