#include "config.h"
#include <stdio.h>
-#undef _GNU_SOURCE
#include <string.h>
#include <stdlib.h>
#include <dirent.h>
#include <unistd.h>
#include <signal.h>
#include <fcntl.h>
-#include <termios.h>
+#include <grp.h>
+#include <poll.h>
#include <sys/param.h>
#include <sys/file.h>
#include <sys/mount.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <sys/un.h>
-#include <sys/poll.h>
#include <sys/syscall.h>
#if HAVE_SYS_CAPABILITY_H
#include "namespace.h"
#include "lxcseccomp.h"
#include "caps.h"
-#include "lxclock.h"
+#include "bdev.h"
#include "lsm/lsm.h"
+#include "lxclock.h"
lxc_log_define(lxc_start, lxc);
[LXC_NS_NET] = {"net", CLONE_NEWNET}
};
+extern void mod_all_rdeps(struct lxc_container *c, bool inc);
+static bool do_destroy_container(struct lxc_conf *conf);
+static int lxc_rmdir_onedev_wrapper(void *data);
+static void lxc_destroy_container_on_signal(struct lxc_handler *handler,
+ const char *name);
+
+static void print_top_failing_dir(const char *path)
+{
+ size_t len = strlen(path);
+ char *copy = alloca(len+1), *p, *e, saved;
+ strcpy(copy, path);
+
+ p = copy;
+ e = copy + len;
+ while (p < e) {
+ while (p < e && *p == '/') p++;
+ while (p < e && *p != '/') p++;
+ saved = *p;
+ *p = '\0';
+ if (access(copy, X_OK)) {
+ SYSERROR("could not access %s. Please grant it 'x' " \
+ "access, or add an ACL for the container root.",
+ copy);
+ return;
+ }
+ *p = saved;
+ }
+}
+
static void close_ns(int ns_fd[LXC_NS_MAX]) {
int i;
- process_lock();
for (i = 0; i < LXC_NS_MAX; i++) {
if (ns_fd[i] > -1) {
close(ns_fd[i]);
ns_fd[i] = -1;
}
}
- process_unlock();
}
static int preserve_ns(int ns_fd[LXC_NS_MAX], int clone_flags) {
int i, saved_errno;
char path[MAXPATHLEN];
- if (access("/proc/self/ns", X_OK)) {
- ERROR("Does this kernel version support 'attach'?");
- return -1;
- }
-
for (i = 0; i < LXC_NS_MAX; i++)
ns_fd[i] = -1;
+ if (access("/proc/self/ns", X_OK)) {
+ WARN("Kernel does not support attach; preserve_ns ignored");
+ return 0;
+ }
+
for (i = 0; i < LXC_NS_MAX; i++) {
if ((clone_flags & ns_info[i].clone_flag) == 0)
continue;
snprintf(path, MAXPATHLEN, "/proc/self/ns/%s", ns_info[i].proc_name);
- process_lock();
ns_fd[i] = open(path, O_RDONLY | O_CLOEXEC);
- process_unlock();
if (ns_fd[i] < 0)
goto error;
}
return (fd == 0 || fd == 1 || fd == 2);
}
-int lxc_check_inherited(struct lxc_conf *conf, int fd_to_ignore)
+/*
+ * Check for any fds we need to close
+ * * if fd_to_ignore != -1, then if we find that fd open we will ignore it.
+ * * By default we warn about open fds we find.
+ * * If closeall is true, we will close open fds.
+ * * If lxc-start was passed "-C", then conf->close_all_fds will be true,
+ * in which case we also close all open fds.
+ * * A daemonized container will always pass closeall=true.
+ */
+int lxc_check_inherited(struct lxc_conf *conf, bool closeall, int fd_to_ignore)
{
struct dirent dirent, *direntp;
int fd, fddir;
DIR *dir;
+ if (conf && conf->close_all_fds)
+ closeall = true;
+
restart:
- process_lock();
dir = opendir("/proc/self/fd");
- process_unlock();
if (!dir) {
WARN("failed to open directory: %m");
return -1;
if (fd == fddir || fd == lxc_log_fd || fd == fd_to_ignore)
continue;
+ if (current_config && fd == current_config->logfd)
+ continue;
+
if (match_fd(fd))
continue;
- if (conf->close_all_fds) {
- process_lock();
+ if (closeall) {
close(fd);
closedir(dir);
- process_unlock();
INFO("closed inherited fd %d", fd);
goto restart;
}
WARN("inherited fd %d", fd);
}
- process_lock();
closedir(dir); /* cannot fail */
- process_unlock();
return 0;
}
goto out_mainloop_open;
}
#else
- DEBUG("not starting utmp handler as cap_sys_boot cannot be dropped without capabilities support\n");
+ DEBUG("not starting utmp handler as cap_sys_boot cannot be dropped without capabilities support");
#endif
}
out_mainloop_open:
lxc_mainloop_close(&descr);
out_sigfd:
- process_lock();
close(sigfd);
- process_unlock();
return -1;
}
memset(handler, 0, sizeof(*handler));
+ handler->ttysock[0] = handler->ttysock[1] = -1;
handler->conf = conf;
handler->lxcpath = lxcpath;
handler->pinfd = -1;
}
/* Start of environment variable setup for hooks */
- if (setenv("LXC_NAME", name, 1)) {
+ if (name && setenv("LXC_NAME", name, 1)) {
SYSERROR("failed to set environment variable for container name");
}
- if (setenv("LXC_CONFIG_FILE", conf->rcfile, 1)) {
+ if (conf->rcfile && setenv("LXC_CONFIG_FILE", conf->rcfile, 1)) {
SYSERROR("failed to set environment variable for config path");
}
- if (setenv("LXC_ROOTFS_MOUNT", conf->rootfs.mount, 1)) {
+ if (conf->rootfs.mount && setenv("LXC_ROOTFS_MOUNT", conf->rootfs.mount, 1)) {
SYSERROR("failed to set environment variable for rootfs mount");
}
- if (setenv("LXC_ROOTFS_PATH", conf->rootfs.path, 1)) {
+ if (conf->rootfs.path && setenv("LXC_ROOTFS_PATH", conf->rootfs.path, 1)) {
SYSERROR("failed to set environment variable for rootfs mount");
}
if (conf->console.path && setenv("LXC_CONSOLE", conf->console.path, 1)) {
goto out_aborting;
}
- if (lxc_create_tty(name, conf)) {
- ERROR("failed to create the ttys");
- goto out_aborting;
- }
-
/* the signal fd has to be created before forking otherwise
* if the child process exits before we setup the signal fd,
* the event will be lost and the command will be stuck */
out_aborting:
lxc_set_state(name, handler, ABORTING);
out_close_maincmd_fd:
- process_lock();
close(conf->maincmd_fd);
- process_unlock();
conf->maincmd_fd = -1;
out_free_name:
free(handler->name);
return NULL;
}
-static void lxc_fini(const char *name, struct lxc_handler *handler)
+void lxc_fini(const char *name, struct lxc_handler *handler)
{
/* The STOPPING state is there for future cleanup code
* which can take awhile
lxc_console_delete(&handler->conf->console);
lxc_delete_tty(&handler->conf->tty_info);
- process_lock();
close(handler->conf->maincmd_fd);
- process_unlock();
handler->conf->maincmd_fd = -1;
free(handler->name);
- if (handler->cgroup) {
- lxc_cgroup_process_info_free_and_remove(handler->cgroup);
- handler->cgroup = NULL;
+ if (handler->ttysock[0] != -1) {
+ close(handler->ttysock[0]);
+ close(handler->ttysock[1]);
}
+ if (handler->conf->ephemeral == 1 && handler->conf->reboot != 1) {
+ lxc_destroy_container_on_signal(handler, name);
+ }
+ cgroup_destroy(handler);
free(handler);
}
int status;
pid_t pid;
- process_lock();
f = fopen("/proc/sys/kernel/ctrl-alt-del", "r");
- process_unlock();
if (!f) {
DEBUG("failed to open /proc/sys/kernel/ctrl-alt-del");
return 1;
}
ret = fscanf(f, "%d", &v);
- process_lock();
fclose(f);
- process_unlock();
if (ret != 1) {
DEBUG("Failed to read /proc/sys/kernel/ctrl-alt-del");
return 1;
flags |= CLONE_NEWUSER;
#ifdef __ia64__
- pid = __clone2(container_reboot_supported, stack, stack_size, flags, &cmd);
+ pid = __clone2(container_reboot_supported, stack, stack_size, flags, &cmd);
#else
- stack += stack_size;
- pid = clone(container_reboot_supported, stack, flags, &cmd);
+ stack += stack_size;
+ pid = clone(container_reboot_supported, stack, flags, &cmd);
#endif
- if (pid < 0) {
- SYSERROR("failed to clone\n");
- return -1;
- }
- if (wait(&status) < 0) {
- SYSERROR("unexpected wait error: %m\n");
- return -1;
- }
+ if (pid < 0) {
+ if (flags & CLONE_NEWUSER)
+ ERROR("failed to clone (%#x): %s (includes CLONE_NEWUSER)", flags, strerror(errno));
+ else
+ ERROR("failed to clone (%#x): %s", flags, strerror(errno));
+ return -1;
+ }
+ if (wait(&status) < 0) {
+ SYSERROR("unexpected wait error: %m");
+ return -1;
+ }
if (WEXITSTATUS(status) != 1)
return 1;
return 0;
}
+/*
+ * netpipe is used in the unprivileged case to transfer the ifindexes
+ * from parent to child
+ */
+static int netpipe = -1;
+
+static inline int count_veths(struct lxc_list *network)
+{
+ struct lxc_list *iterator;
+ struct lxc_netdev *netdev;
+ int count = 0;
+
+ lxc_list_for_each(iterator, network) {
+ netdev = iterator->elem;
+ if (netdev->type != LXC_NET_VETH)
+ continue;
+ count++;
+ }
+ return count;
+}
+
+static int read_unpriv_netifindex(struct lxc_list *network)
+{
+ struct lxc_list *iterator;
+ struct lxc_netdev *netdev;
+
+ if (netpipe == -1)
+ return 0;
+ lxc_list_for_each(iterator, network) {
+ netdev = iterator->elem;
+ if (netdev->type != LXC_NET_VETH)
+ continue;
+ if (!(netdev->name = malloc(IFNAMSIZ))) {
+ ERROR("Out of memory");
+ close(netpipe);
+ return -1;
+ }
+ if (read(netpipe, netdev->name, IFNAMSIZ) != IFNAMSIZ) {
+ close(netpipe);
+ return -1;
+ }
+ }
+ close(netpipe);
+ return 0;
+}
+
static int do_start(void *data)
{
+ struct lxc_list *iterator;
struct lxc_handler *handler = data;
- const char *lsm_label = NULL;
if (sigprocmask(SIG_SETMASK, &handler->oldmask, NULL)) {
SYSERROR("failed to set sigprocmask");
/* don't leak the pinfd to the container */
if (handler->pinfd >= 0) {
- process_lock();
close(handler->pinfd);
- process_unlock();
}
/* Tell the parent task it can begin to configure the
if (lxc_sync_barrier_parent(handler, LXC_SYNC_CONFIGURE))
return -1;
+ if (read_unpriv_netifindex(&handler->conf->network) < 0)
+ goto out_warn_father;
+
/*
* if we are in a new user namespace, become root there to have
- * privilege over our namespace
+ * privilege over our namespace. When using lxc-execute we default to root,
+ * but this can be overriden using the lxc.init_uid and lxc.init_gid
+ * configuration options.
*/
if (!lxc_list_empty(&handler->conf->id_map)) {
- NOTICE("switching to gid/uid 0 in new user namespace");
- if (setgid(0)) {
+ gid_t new_gid = 0;
+ if (handler->conf->is_execute && handler->conf->init_gid)
+ new_gid = handler->conf->init_gid;
+
+ uid_t new_uid = 0;
+ if (handler->conf->is_execute && handler->conf->init_uid)
+ new_uid = handler->conf->init_uid;
+
+ NOTICE("switching to gid/uid %d/%d in new user namespace", new_gid, new_uid);
+ if (setgid(new_gid)) {
SYSERROR("setgid");
goto out_warn_father;
}
- if (setuid(0)) {
+ if (setuid(new_uid)) {
SYSERROR("setuid");
goto out_warn_father;
}
+ if (setgroups(0, NULL)) {
+ SYSERROR("setgroups");
+ goto out_warn_father;
+ }
+ }
+
+ if (access(handler->lxcpath, X_OK)) {
+ print_top_failing_dir(handler->lxcpath);
+ goto out_warn_father;
}
#if HAVE_SYS_CAPABILITY_H
SYSERROR("failed to remove CAP_SYS_BOOT capability");
goto out_warn_father;
}
- DEBUG("Dropped cap_sys_boot\n");
+ DEBUG("Dropped cap_sys_boot");
}
#endif
/* Setup the container, ip, names, utsname, ... */
- if (lxc_setup(handler->name, handler->conf, handler->lxcpath, handler->cgroup, handler->data) ){
+ if (lxc_setup(handler)) {
ERROR("failed to setup the container");
goto out_warn_father;
}
return -1;
/* Set the label to change to when we exec(2) the container's init */
- if (!strcmp(lsm_name(), "AppArmor"))
- lsm_label = handler->conf->lsm_aa_profile;
- else if (!strcmp(lsm_name(), "SELinux"))
- lsm_label = handler->conf->lsm_se_context;
- if (lsm_process_label_set(lsm_label, 1, 1) < 0)
+ if (lsm_process_label_set(NULL, handler->conf, 1, 1) < 0)
goto out_warn_father;
- lsm_proc_unmount(handler->conf);
+
+ /* Some init's such as busybox will set sane tty settings on stdin,
+ * stdout, stderr which it thinks is the console. We already set them
+ * the way we wanted on the real terminal, and we want init to do its
+ * setup on its console ie. the pty allocated in lxc_console_create()
+ * so make sure that that pty is stdin,stdout,stderr.
+ */
+ if (lxc_console_set_stdfds(handler) < 0)
+ goto out_warn_father;
+
+ /* If we mounted a temporary proc, then unmount it now */
+ tmp_proc_unmount(handler->conf);
if (lxc_seccomp_load(handler->conf) != 0)
goto out_warn_father;
}
/* The clearenv() and putenv() calls have been moved here
- * to allow us to use enviroment variables passed to the various
+ * to allow us to use environment variables passed to the various
* hooks, such as the start hook above. Not all of the
* variables like CONFIG_PATH or ROOTFS are valid in this
* context but others are. */
/* don't error out though */
}
+ lxc_list_for_each(iterator, &handler->conf->environment) {
+ if (putenv((char *)iterator->elem)) {
+ SYSERROR("failed to set environment variable '%s'", (char *)iterator->elem);
+ goto out_warn_father;
+ }
+ }
+
if (putenv("container=lxc")) {
- SYSERROR("failed to set environment variable");
+ SYSERROR("failed to set environment variable 'container=lxc'");
goto out_warn_father;
}
- process_lock();
+ if (handler->conf->pty_names) {
+ if (putenv(handler->conf->pty_names)) {
+ SYSERROR("failed to set environment variable for container ptys");
+ goto out_warn_father;
+ }
+ }
+
close(handler->sigfd);
- process_unlock();
+
+ if (handler->backgrounded && null_stdfds() < 0)
+ goto out_warn_father;
/* after this call, we are in error because this
* ops should not return as it execs */
return -1;
}
-int save_phys_nics(struct lxc_conf *conf)
+static int save_phys_nics(struct lxc_conf *conf)
{
struct lxc_list *iterator;
SYSERROR("failed to allocate memory");
return -1;
}
- INFO("stored saved_nic #%d idx %d name %s\n", conf->num_savednics,
+ INFO("stored saved_nic #%d idx %d name %s", conf->num_savednics,
conf->saved_nics[conf->num_savednics].ifindex,
conf->saved_nics[conf->num_savednics].orig_name);
conf->num_savednics++;
return 0;
}
-int lxc_spawn(struct lxc_handler *handler)
+static int recv_fd(int sock, int *fd)
+{
+ if (lxc_abstract_unix_recv_fd(sock, fd, NULL, 0) < 0) {
+ SYSERROR("Error receiving tty fd from child");
+ return -1;
+ }
+ if (*fd == -1)
+ return -1;
+ return 0;
+}
+
+static int recv_ttys_from_child(struct lxc_handler *handler)
+{
+ struct lxc_conf *conf = handler->conf;
+ int i, sock = handler->ttysock[1];
+ struct lxc_tty_info *tty_info = &conf->tty_info;
+
+ if (!conf->tty)
+ return 0;
+
+ tty_info->pty_info = malloc(sizeof(*tty_info->pty_info)*conf->tty);
+ if (!tty_info->pty_info) {
+ SYSERROR("failed to allocate pty_info");
+ return -1;
+ }
+
+ for (i = 0; i < conf->tty; i++) {
+ struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
+ pty_info->busy = 0;
+ if (recv_fd(sock, &pty_info->slave) < 0 ||
+ recv_fd(sock, &pty_info->master) < 0) {
+ ERROR("Error receiving tty info from child");
+ return -1;
+ }
+ }
+ tty_info->nbtty = conf->tty;
+
+ return 0;
+}
+
+void resolve_clone_flags(struct lxc_handler *handler)
+{
+ handler->clone_flags = CLONE_NEWPID | CLONE_NEWNS;
+
+ if (!lxc_list_empty(&handler->conf->id_map)) {
+ INFO("Cloning a new user namespace");
+ handler->clone_flags |= CLONE_NEWUSER;
+ }
+
+ if (handler->conf->inherit_ns_fd[LXC_NS_NET] == -1) {
+ if (!lxc_requests_empty_network(handler))
+ handler->clone_flags |= CLONE_NEWNET;
+ } else {
+ INFO("Inheriting a net namespace");
+ }
+
+ if (handler->conf->inherit_ns_fd[LXC_NS_IPC] == -1) {
+ handler->clone_flags |= CLONE_NEWIPC;
+ } else {
+ INFO("Inheriting an IPC namespace");
+ }
+
+ if (handler->conf->inherit_ns_fd[LXC_NS_UTS] == -1) {
+ handler->clone_flags |= CLONE_NEWUTS;
+ } else {
+ INFO("Inheriting a UTS namespace");
+ }
+}
+
+static int lxc_spawn(struct lxc_handler *handler)
{
int failed_before_rename = 0;
const char *name = handler->name;
- struct cgroup_meta_data *cgroup_meta = NULL;
- const char *cgroup_pattern = NULL;
+ bool cgroups_connected = false;
int saved_ns_fd[LXC_NS_MAX];
int preserve_mask = 0, i;
+ int netpipepair[2], nveths;
+
+ netpipe = -1;
for (i = 0; i < LXC_NS_MAX; i++)
if (handler->conf->inherit_ns_fd[i] != -1)
if (lxc_sync_init(handler))
return -1;
- handler->clone_flags = CLONE_NEWPID|CLONE_NEWNS;
- if (!lxc_list_empty(&handler->conf->id_map)) {
- INFO("Cloning a new user namespace");
- handler->clone_flags |= CLONE_NEWUSER;
+ if (socketpair(AF_UNIX, SOCK_DGRAM, 0, handler->ttysock) < 0) {
+ lxc_sync_fini(handler);
+ return -1;
}
- if (handler->conf->inherit_ns_fd[LXC_NS_NET] == -1) {
- if (!lxc_requests_empty_network(handler))
- handler->clone_flags |= CLONE_NEWNET;
+ resolve_clone_flags(handler);
+ if (handler->clone_flags & CLONE_NEWNET) {
if (!lxc_list_empty(&handler->conf->network)) {
/* Find gateway addresses from the link device, which is
ERROR("failed to save physical nic info");
goto out_abort;
}
- } else {
- INFO("Inheriting a net namespace");
}
- if (handler->conf->inherit_ns_fd[LXC_NS_IPC] == -1) {
- handler->clone_flags |= CLONE_NEWIPC;
- } else {
- INFO("Inheriting an IPC namespace");
- }
-
- if (handler->conf->inherit_ns_fd[LXC_NS_UTS] == -1) {
- handler->clone_flags |= CLONE_NEWUTS;
- } else {
- INFO("Inheriting a UTS namespace");
- }
-
-
- cgroup_meta = lxc_cgroup_load_meta();
- if (!cgroup_meta) {
- ERROR("failed to detect cgroup metadata");
+ if (!cgroup_init(handler)) {
+ ERROR("failed initializing cgroup support");
goto out_delete_net;
}
- /* if we are running as root, use system cgroup pattern, otherwise
- * just create a cgroup under the current one. But also fall back to
- * that if for some reason reading the configuration fails and no
- * default value is available
- */
- if (getuid() == 0)
- cgroup_pattern = default_cgroup_pattern();
- if (!cgroup_pattern)
- cgroup_pattern = "%n";
+ cgroups_connected = true;
- /* Create cgroup before doing clone(), so the child will know from
- * handler which cgroup it is going to be put in later.
- */
- if ((handler->cgroup = lxc_cgroup_create(name, cgroup_pattern, cgroup_meta, NULL)) == NULL) {
- ERROR("failed to create cgroups for '%s'", name);
+ if (!cgroup_create(handler)) {
+ ERROR("failed creating cgroups");
goto out_delete_net;
}
/*
* if the rootfs is not a blockdev, prevent the container from
* marking it readonly.
+ *
+ * if the container is unprivileged then skip rootfs pinning
*/
+ if (lxc_list_empty(&handler->conf->id_map)) {
+ handler->pinfd = pin_rootfs(handler->conf->rootfs.path);
+ if (handler->pinfd == -1)
+ INFO("failed to pin the container's rootfs");
+ }
- handler->pinfd = pin_rootfs(handler->conf->rootfs.path);
- if (handler->pinfd == -1)
- INFO("failed to pin the container's rootfs");
+ if (preserve_ns(saved_ns_fd, preserve_mask) < 0)
+ goto out_delete_net;
+ if (attach_ns(handler->conf->inherit_ns_fd) < 0)
+ goto out_delete_net;
- preserve_ns(saved_ns_fd, preserve_mask);
- attach_ns(handler->conf->inherit_ns_fd);
+ if (am_unpriv() && (nveths = count_veths(&handler->conf->network))) {
+ if (pipe(netpipepair) < 0) {
+ SYSERROR("Error creating pipe");
+ goto out_delete_net;
+ }
+ /* store netpipe in the global var for do_start's use */
+ netpipe = netpipepair[0];
+ }
/* Create a process in a new set of namespaces */
handler->pid = lxc_clone(do_start, handler, handler->clone_flags);
goto out_delete_net;
}
- attach_ns(saved_ns_fd);
+ if (attach_ns(saved_ns_fd))
+ WARN("failed to restore saved namespaces");
lxc_sync_fini_child(handler);
if (lxc_sync_wait_child(handler, LXC_SYNC_CONFIGURE))
failed_before_rename = 1;
- /* In case there is still legacy ns cgroup support in the kernel.
- * Should be removed at some later point in time.
- */
- if (lxc_cgroup_create_legacy(handler->cgroup, name, handler->pid) < 0) {
- ERROR("failed to create legacy ns cgroups for '%s'", name);
+ if (!cgroup_create_legacy(handler)) {
+ ERROR("failed to setup the legacy cgroups for %s", name);
goto out_delete_net;
}
-
- if (lxc_setup_cgroup_without_devices(handler, &handler->conf->cgroup)) {
- ERROR("failed to setup the cgroups for '%s'", name);
+ if (!cgroup_setup_limits(handler, false)) {
+ ERROR("failed to setup the cgroup limits for '%s'", name);
goto out_delete_net;
}
- if (lxc_cgroup_enter(handler->cgroup, handler->pid, false) < 0)
+ if (!cgroup_enter(handler))
+ goto out_delete_net;
+
+ if (!cgroup_chown(handler))
goto out_delete_net;
if (failed_before_rename)
}
}
+ if (netpipe != -1) {
+ struct lxc_list *iterator;
+ struct lxc_netdev *netdev;
+
+ close(netpipe);
+ lxc_list_for_each(iterator, &handler->conf->network) {
+ netdev = iterator->elem;
+ if (netdev->type != LXC_NET_VETH)
+ continue;
+ if (write(netpipepair[1], netdev->name, IFNAMSIZ) != IFNAMSIZ) {
+ ERROR("Error writing veth name to container");
+ goto out_delete_net;
+ }
+ }
+ close(netpipepair[1]);
+ }
+
/* map the container uids - the container became an invalid
* userid the moment it was cloned with CLONE_NEWUSER - this
* call doesn't change anything immediately, but allows the
if (lxc_sync_barrier_child(handler, LXC_SYNC_POST_CONFIGURE))
goto out_delete_net;
- if (lxc_setup_cgroup_devices(handler, &handler->conf->cgroup)) {
+ if (!cgroup_setup_limits(handler, true)) {
ERROR("failed to setup the devices cgroup for '%s'", name);
goto out_delete_net;
}
+ cgroup_disconnect();
+ cgroups_connected = false;
+
+ /* read tty fds allocated by child */
+ if (recv_ttys_from_child(handler) < 0) {
+ ERROR("failed to receive tty info from child");
+ goto out_delete_net;
+ }
+
/* Tell the child to complete its initialization and wait for
* it to exec or return an error. (the child will never
* return LXC_SYNC_POST_CGROUP+1. It will either close the
goto out_abort;
}
- lxc_cgroup_put_meta(cgroup_meta);
lxc_sync_fini(handler);
return 0;
out_delete_net:
+ if (cgroups_connected)
+ cgroup_disconnect();
if (handler->clone_flags & CLONE_NEWNET)
lxc_delete_network(handler);
out_abort:
- lxc_cgroup_put_meta(cgroup_meta);
lxc_abort(name, handler);
lxc_sync_fini(handler);
if (handler->pinfd >= 0) {
- process_lock();
close(handler->pinfd);
- process_unlock();
handler->pinfd = -1;
}
return -1;
}
+int get_netns_fd(int pid)
+{
+ char path[MAXPATHLEN];
+ int ret, fd;
+
+ ret = snprintf(path, MAXPATHLEN, "/proc/%d/ns/net", pid);
+ if (ret < 0 || ret >= MAXPATHLEN) {
+ WARN("Failed to pin netns file for pid %d", pid);
+ return -1;
+ }
+
+ fd = open(path, O_RDONLY);
+ if (fd < 0) {
+ WARN("Failed to pin netns file %s for pid %d: %s",
+ path, pid, strerror(errno));
+ return -1;
+ }
+ return fd;
+}
+
int __lxc_start(const char *name, struct lxc_conf *conf,
- struct lxc_operations* ops, void *data, const char *lxcpath)
+ struct lxc_operations* ops, void *data, const char *lxcpath,
+ bool backgrounded)
{
struct lxc_handler *handler;
int err = -1;
int status;
+ int netnsfd = -1;
handler = lxc_init(name, conf, lxcpath);
if (!handler) {
}
handler->ops = ops;
handler->data = data;
+ handler->backgrounded = backgrounded;
if (must_drop_cap_sys_boot(handler->conf)) {
#if HAVE_SYS_CAPABILITY_H
- DEBUG("Dropping cap_sys_boot\n");
+ DEBUG("Dropping cap_sys_boot");
#else
- DEBUG("Can't drop cap_sys_boot as capabilities aren't supported\n");
+ DEBUG("Can't drop cap_sys_boot as capabilities aren't supported");
#endif
} else {
- DEBUG("Not dropping cap_sys_boot or watching utmp\n");
+ DEBUG("Not dropping cap_sys_boot or watching utmp");
handler->conf->need_utmp_watch = 0;
}
+ if (!attach_block_device(handler->conf)) {
+ ERROR("Failure attaching block device");
+ goto out_fini_nonet;
+ }
+
+ if (geteuid() == 0 && !lxc_list_empty(&conf->id_map)) {
+ /* if the backing store is a device, mount it here and now */
+ if (rootfs_is_blockdev(conf)) {
+ if (unshare(CLONE_NEWNS) < 0) {
+ ERROR("Error unsharing mounts");
+ goto out_fini_nonet;
+ }
+ remount_all_slave();
+ if (do_rootfs_setup(conf, name, lxcpath) < 0) {
+ ERROR("Error setting up rootfs mount as root before spawn");
+ goto out_fini_nonet;
+ }
+ INFO("Set up container rootfs as host root");
+ }
+ }
+
err = lxc_spawn(handler);
if (err) {
ERROR("failed to spawn '%s'", name);
- goto out_fini_nonet;
+ goto out_detach_blockdev;
}
+ handler->conf->reboot = 0;
+
+ netnsfd = get_netns_fd(handler->pid);
+
err = lxc_poll(name, handler);
if (err) {
ERROR("mainloop exited with an error");
+ if (netnsfd >= 0)
+ close(netnsfd);
goto out_abort;
}
DEBUG("Container rebooting");
handler->conf->reboot = 1;
break;
+ case SIGSYS: /* seccomp */
+ DEBUG("Container violated its seccomp policy");
+ break;
default:
- DEBUG("unknown exit status for init: %d\n", WTERMSIG(status));
+ DEBUG("unknown exit status for init: %d", WTERMSIG(status));
break;
}
}
- lxc_rename_phys_nics_on_shutdown(handler->conf);
+ DEBUG("Pushing physical nics back to host namespace");
+ lxc_rename_phys_nics_on_shutdown(netnsfd, handler->conf);
+
+ DEBUG("Tearing down virtual network devices used by container");
+ lxc_delete_network(handler);
+
+ if (netnsfd >= 0)
+ close(netnsfd);
if (handler->pinfd >= 0) {
- process_lock();
close(handler->pinfd);
- process_unlock();
handler->pinfd = -1;
}
+ lxc_monitor_send_exit_code(name, status, handler->lxcpath);
err = lxc_error_set_and_log(handler->pid, status);
out_fini:
lxc_delete_network(handler);
+out_detach_blockdev:
+ detach_block_device(handler->conf);
+
out_fini_nonet:
lxc_fini(name, handler);
return err;
};
int lxc_start(const char *name, char *const argv[], struct lxc_conf *conf,
- const char *lxcpath)
+ const char *lxcpath, bool backgrounded)
{
struct start_args start_arg = {
.argv = argv,
};
- if (lxc_check_inherited(conf, -1))
- return -1;
-
conf->need_utmp_watch = 1;
- return __lxc_start(name, conf, &start_ops, &start_arg, lxcpath);
+ return __lxc_start(name, conf, &start_ops, &start_arg, lxcpath, backgrounded);
}
+
+static void lxc_destroy_container_on_signal(struct lxc_handler *handler,
+ const char *name)
+{
+ char destroy[MAXPATHLEN];
+ bool bret = true;
+ int ret = 0;
+ struct lxc_container *c;
+ if (handler->conf && handler->conf->rootfs.path && handler->conf->rootfs.mount) {
+ bret = do_destroy_container(handler->conf);
+ if (!bret) {
+ ERROR("Error destroying rootfs for %s", name);
+ return;
+ }
+ }
+ INFO("Destroyed rootfs for %s", name);
+
+ ret = snprintf(destroy, MAXPATHLEN, "%s/%s", handler->lxcpath, name);
+ if (ret < 0 || ret >= MAXPATHLEN) {
+ ERROR("Error printing path for %s", name);
+ ERROR("Error destroying directory for %s", name);
+ return;
+ }
+
+ c = lxc_container_new(name, handler->lxcpath);
+ if (c) {
+ if (container_disk_lock(c)) {
+ INFO("Could not update lxc_snapshots file");
+ lxc_container_put(c);
+ } else {
+ mod_all_rdeps(c, false);
+ container_disk_unlock(c);
+ lxc_container_put(c);
+ }
+ }
+
+ if (am_unpriv())
+ ret = userns_exec_1(handler->conf, lxc_rmdir_onedev_wrapper, destroy);
+ else
+ ret = lxc_rmdir_onedev(destroy, NULL);
+
+ if (ret < 0) {
+ ERROR("Error destroying directory for %s", name);
+ return;
+ }
+ INFO("Destroyed directory for %s", name);
+}
+
+static int lxc_rmdir_onedev_wrapper(void *data)
+{
+ char *arg = (char *) data;
+ return lxc_rmdir_onedev(arg, NULL);
+}
+
+static bool do_destroy_container(struct lxc_conf *conf) {
+ if (am_unpriv()) {
+ if (userns_exec_1(conf, bdev_destroy_wrapper, conf) < 0)
+ return false;
+ return true;
+ }
+ return bdev_destroy(conf);
+}
+