]> git.proxmox.com Git - mirror_lxc.git/blobdiff - src/lxc/start.c
conf: explicitly remove veth device from host
[mirror_lxc.git] / src / lxc / start.c
index ad8867eb5102efc5d792998f7bbf39a09849f92a..cc48962449c7bb94346c6d9f22dc3c5871f50974 100644 (file)
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#define _GNU_SOURCE
 #include "config.h"
 
-#include <stdio.h>
-#include <string.h>
-#include <stdlib.h>
+#include <alloca.h>
 #include <dirent.h>
 #include <errno.h>
-#include <unistd.h>
-#include <signal.h>
 #include <fcntl.h>
 #include <grp.h>
 #include <poll.h>
-#include <sys/param.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
 #include <sys/file.h>
 #include <sys/mount.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-#include <sys/socket.h>
+#include <sys/param.h>
 #include <sys/prctl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
 #include <sys/types.h>
-#include <sys/wait.h>
 #include <sys/un.h>
-#include <sys/syscall.h>
+#include <sys/wait.h>
 
 #if HAVE_SYS_CAPABILITY_H
 #include <sys/capability.h>
 #endif
 
-#if !HAVE_DECL_PR_CAPBSET_DROP
+#ifndef HAVE_DECL_PR_CAPBSET_DROP
 #define PR_CAPBSET_DROP 24
 #endif
 
-#include "start.h"
-#include "conf.h"
-#include "log.h"
+#ifndef HAVE_DECL_PR_SET_NO_NEW_PRIVS
+#define PR_SET_NO_NEW_PRIVS 38
+#endif
+
+#ifndef HAVE_DECL_PR_GET_NO_NEW_PRIVS
+#define PR_GET_NO_NEW_PRIVS 39
+#endif
+
+#include "af_unix.h"
+#include "bdev.h"
+#include "caps.h"
 #include "cgroup.h"
+#include "commands.h"
+#include "conf.h"
+#include "console.h"
 #include "error.h"
-#include "af_unix.h"
-#include "mainloop.h"
-#include "utils.h"
+#include "log.h"
+#include "lxclock.h"
+#include "lxcseccomp.h"
 #include "lxcutmp.h"
+#include "mainloop.h"
 #include "monitor.h"
-#include "commands.h"
-#include "console.h"
-#include "sync.h"
 #include "namespace.h"
-#include "lxcseccomp.h"
-#include "caps.h"
-#include "bdev.h"
+#include "start.h"
+#include "sync.h"
+#include "utils.h"
 #include "lsm/lsm.h"
 
 lxc_log_define(lxc_start, lxc);
@@ -80,20 +90,29 @@ const struct ns_info ns_info[LXC_NS_MAX] = {
        [LXC_NS_UTS] = {"uts", CLONE_NEWUTS},
        [LXC_NS_IPC] = {"ipc", CLONE_NEWIPC},
        [LXC_NS_USER] = {"user", CLONE_NEWUSER},
-       [LXC_NS_NET] = {"net", CLONE_NEWNET}
+       [LXC_NS_NET] = {"net", CLONE_NEWNET},
+       [LXC_NS_CGROUP] = {"cgroup", CLONE_NEWCGROUP}
 };
 
+extern void mod_all_rdeps(struct lxc_container *c, bool inc);
+static bool do_destroy_container(struct lxc_conf *conf);
+static int lxc_rmdir_onedev_wrapper(void *data);
+static void lxc_destroy_container_on_signal(struct lxc_handler *handler,
+                                           const char *name);
+
 static void print_top_failing_dir(const char *path)
 {
        size_t len = strlen(path);
-       char *copy = alloca(len+1), *p, *e, saved;
+       char *copy = alloca(len + 1), *p, *e, saved;
        strcpy(copy, path);
 
        p = copy;
        e = copy + len;
        while (p < e) {
-               while (p < e && *p == '/') p++;
-               while (p < e && *p != '/') p++;
+               while (p < e && *p == '/')
+                       p++;
+               while (p < e && *p != '/')
+                       p++;
                saved = *p;
                *p = '\0';
                if (access(copy, X_OK)) {
@@ -117,35 +136,52 @@ static void close_ns(int ns_fd[LXC_NS_MAX]) {
        }
 }
 
-static int preserve_ns(int ns_fd[LXC_NS_MAX], int clone_flags) {
-       int i, saved_errno;
+/*
+ * preserve_ns: open /proc/@pid/ns/@ns for each namespace specified
+ * in clone_flags.
+ * Return true on success, false on failure.  On failure, leave an error
+ * message in *errmsg, which caller must free.
+ */
+static bool preserve_ns(int ns_fd[LXC_NS_MAX], int clone_flags, pid_t pid,
+                       char **errmsg)
+{
+       int i, ret;
        char path[MAXPATHLEN];
 
        for (i = 0; i < LXC_NS_MAX; i++)
                ns_fd[i] = -1;
 
-       if (access("/proc/self/ns", X_OK)) {
-               WARN("Kernel does not support attach; preserve_ns ignored");
-               return 0;
+       snprintf(path, MAXPATHLEN, "/proc/%d/ns", pid);
+       if (access(path, X_OK)) {
+               if (asprintf(errmsg, "Kernel does not support setns.") == -1)
+                       *errmsg = NULL;
+               return false;
        }
 
        for (i = 0; i < LXC_NS_MAX; i++) {
                if ((clone_flags & ns_info[i].clone_flag) == 0)
                        continue;
-               snprintf(path, MAXPATHLEN, "/proc/self/ns/%s", ns_info[i].proc_name);
+               snprintf(path, MAXPATHLEN, "/proc/%d/ns/%s", pid,
+                        ns_info[i].proc_name);
                ns_fd[i] = open(path, O_RDONLY | O_CLOEXEC);
                if (ns_fd[i] < 0)
                        goto error;
        }
 
-       return 0;
+       return true;
 
 error:
-       saved_errno = errno;
+       if (errno == ENOENT) {
+               ret = asprintf(errmsg, "Kernel does not support setns for %s",
+                       ns_info[i].proc_name);
+       } else {
+               ret = asprintf(errmsg, "Failed to open %s: %s",
+                       path, strerror(errno));
+       }
+       if (ret == -1)
+               *errmsg = NULL;
        close_ns(ns_fd);
-       errno = saved_errno;
-       SYSERROR("failed to open '%s'", path);
-       return -1;
+       return false;
 }
 
 static int attach_ns(const int ns_fd[LXC_NS_MAX]) {
@@ -181,7 +217,7 @@ static int match_fd(int fd)
  */
 int lxc_check_inherited(struct lxc_conf *conf, bool closeall, int fd_to_ignore)
 {
-       struct dirent dirent, *direntp;
+       struct dirent *direntp;
        int fd, fddir;
        DIR *dir;
 
@@ -197,7 +233,7 @@ restart:
 
        fddir = dirfd(dir);
 
-       while (!readdir_r(dir, &dirent, &direntp)) {
+       while ((direntp = readdir(dir))) {
                if (!direntp)
                        break;
 
@@ -227,6 +263,13 @@ restart:
                WARN("inherited fd %d", fd);
        }
 
+       /*
+        * only enable syslog at this point to avoid the above logging function
+        * to open a new fd and make the check_inherited function enter an
+        * infinite loop.
+        */
+       lxc_log_enable_syslog();
+
        closedir(dir); /* cannot fail */
        return 0;
 }
@@ -265,7 +308,7 @@ static int setup_signal_fd(sigset_t *oldmask)
 }
 
 static int signal_handler(int fd, uint32_t events, void *data,
-                          struct lxc_epoll_descr *descr)
+                         struct lxc_epoll_descr *descr)
 {
        struct signalfd_siginfo siginfo;
        siginfo_t info;
@@ -338,7 +381,7 @@ int lxc_poll(const char *name, struct lxc_handler *handler)
                goto out_mainloop_open;
        }
 
-       if (lxc_console_mainloop_add(&descr, handler)) {
+       if (lxc_console_mainloop_add(&descr, handler->conf)) {
                ERROR("failed to add console handler to mainloop");
                goto out_mainloop_open;
        }
@@ -370,6 +413,7 @@ out_sigfd:
 
 struct lxc_handler *lxc_init(const char *name, struct lxc_conf *conf, const char *lxcpath)
 {
+       int i;
        struct lxc_handler *handler;
 
        handler = malloc(sizeof(*handler));
@@ -383,6 +427,9 @@ struct lxc_handler *lxc_init(const char *name, struct lxc_conf *conf, const char
        handler->lxcpath = lxcpath;
        handler->pinfd = -1;
 
+       for (i = 0; i < LXC_NS_MAX; i++)
+               handler->nsfd[i] = -1;
+
        lsm_init();
 
        handler->name = strdup(name);
@@ -424,6 +471,9 @@ struct lxc_handler *lxc_init(const char *name, struct lxc_conf *conf, const char
        if (conf->console.log_path && setenv("LXC_CONSOLE_LOGPATH", conf->console.log_path, 1)) {
                SYSERROR("failed to set environment variable for console log");
        }
+       if (setenv("LXC_CGNS_AWARE", "1", 1)) {
+               SYSERROR("failed to set LXC_CGNS_AWARE environment variable");
+       }
        /* End of environment variable setup for hooks */
 
        if (run_lxc_hooks(name, "pre-start", conf, handler->lxcpath, NULL)) {
@@ -473,14 +523,63 @@ out_free:
 
 void lxc_fini(const char *name, struct lxc_handler *handler)
 {
+       int i, rc;
+       pid_t self = getpid();
+       char *namespaces[LXC_NS_MAX+1];
+       size_t namespace_count = 0;
+
        /* The STOPPING state is there for future cleanup code
         * which can take awhile
         */
        lxc_set_state(name, handler, STOPPING);
+
+       for (i = 0; i < LXC_NS_MAX; i++) {
+               if (handler->nsfd[i] != -1) {
+                       rc = asprintf(&namespaces[namespace_count], "%s:/proc/%d/fd/%d",
+                                     ns_info[i].proc_name, self, handler->nsfd[i]);
+                       if (rc == -1) {
+                               SYSERROR("failed to allocate memory");
+                               break;
+                       }
+                       ++namespace_count;
+               }
+       }
+       namespaces[namespace_count] = NULL;
+
+       if (handler->conf->reboot && setenv("LXC_TARGET", "reboot", 1)) {
+               SYSERROR("failed to set environment variable for stop target");
+       }
+       if (!handler->conf->reboot && setenv("LXC_TARGET", "stop", 1)) {
+               SYSERROR("failed to set environment variable for stop target");
+       }
+
+       if (run_lxc_hooks(name, "stop", handler->conf, handler->lxcpath, namespaces))
+               ERROR("failed to run stop hooks for container '%s'.", name);
+
+       while (namespace_count--)
+               free(namespaces[namespace_count]);
+       for (i = 0; i < LXC_NS_MAX; i++) {
+               if (handler->nsfd[i] != -1) {
+                       close(handler->nsfd[i]);
+                       handler->nsfd[i] = -1;
+               }
+       }
+
+       if (handler->netnsfd >= 0) {
+               close(handler->netnsfd);
+               handler->netnsfd = -1;
+       }
+
        lxc_set_state(name, handler, STOPPED);
 
-       if (run_lxc_hooks(name, "post-stop", handler->conf, handler->lxcpath, NULL))
+       if (run_lxc_hooks(name, "post-stop", handler->conf, handler->lxcpath, NULL)) {
                ERROR("failed to run post-stop hooks for container '%s'.", name);
+               if (handler->conf->reboot) {
+                       WARN("Container will be stopped instead of rebooted.");
+                       handler->conf->reboot = 0;
+                       setenv("LXC_TARGET", "stop", 1);
+               }
+       }
 
        /* reset mask set by setup_signal_fd */
        if (sigprocmask(SIG_SETMASK, &handler->oldmask, NULL))
@@ -495,6 +594,10 @@ void lxc_fini(const char *name, struct lxc_handler *handler)
                close(handler->ttysock[0]);
                close(handler->ttysock[1]);
        }
+
+       if (handler->conf->ephemeral == 1 && handler->conf->reboot != 1)
+               lxc_destroy_container_on_signal(handler, name);
+
        cgroup_destroy(handler);
        free(handler);
 }
@@ -519,10 +622,10 @@ void lxc_abort(const char *name, struct lxc_handler *handler)
  */
 static int container_reboot_supported(void *arg)
 {
-        int *cmd = arg;
+       int *cmd = arg;
        int ret;
 
-        ret = reboot(*cmd);
+       ret = reboot(*cmd);
        if (ret == -1 && errno == EINVAL)
                return 1;
        return 0;
@@ -532,10 +635,10 @@ static int must_drop_cap_sys_boot(struct lxc_conf *conf)
 {
        FILE *f;
        int ret, cmd, v, flags;
-        long stack_size = 4096;
-        void *stack = alloca(stack_size);
-        int status;
-        pid_t pid;
+       long stack_size = 4096;
+       void *stack = alloca(stack_size);
+       int status;
+       pid_t pid;
 
        f = fopen("/proc/sys/kernel/ctrl-alt-del", "r");
        if (!f) {
@@ -629,13 +732,15 @@ static int do_start(void *data)
 {
        struct lxc_list *iterator;
        struct lxc_handler *handler = data;
+       int devnull_fd = -1, ret;
+       char path[PATH_MAX];
 
        if (sigprocmask(SIG_SETMASK, &handler->oldmask, NULL)) {
                SYSERROR("failed to set sigprocmask");
                return -1;
        }
 
-        /* This prctl must be before the synchro, so if the parent
+       /* This prctl must be before the synchro, so if the parent
         * dies before we set the parent death signal, we will detect
         * its death with the synchro right after, otherwise we have
         * a window where the parent can exit before we set the pdeath
@@ -653,6 +758,22 @@ static int do_start(void *data)
                close(handler->pinfd);
        }
 
+       if (lxc_sync_wait_parent(handler, LXC_SYNC_STARTUP))
+               return -1;
+
+       /* Unshare CLONE_NEWNET after CLONE_NEWUSER  - see
+         https://github.com/lxc/lxd/issues/1978 */
+       if ((handler->clone_flags & (CLONE_NEWNET | CLONE_NEWUSER)) ==
+                       (CLONE_NEWNET | CLONE_NEWUSER)) {
+               ret = unshare(CLONE_NEWNET);
+               if (ret < 0) {
+                       SYSERROR("Error unsharing network namespace");
+                       goto out_warn_father;
+               } else {
+                       INFO("Unshared NET namespace.");
+               }
+       }
+
        /* Tell the parent task it can begin to configure the
         * container and wait for it to finish
         */
@@ -664,15 +785,25 @@ static int do_start(void *data)
 
        /*
         * if we are in a new user namespace, become root there to have
-        * privilege over our namespace
+        * privilege over our namespace. When using lxc-execute we default to root,
+        * but this can be overriden using the lxc.init_uid and lxc.init_gid
+        * configuration options.
         */
        if (!lxc_list_empty(&handler->conf->id_map)) {
-               NOTICE("switching to gid/uid 0 in new user namespace");
-               if (setgid(0)) {
+               gid_t new_gid = 0;
+               if (handler->conf->is_execute && handler->conf->init_gid)
+                       new_gid = handler->conf->init_gid;
+
+               uid_t new_uid = 0;
+               if (handler->conf->is_execute && handler->conf->init_uid)
+                       new_uid = handler->conf->init_uid;
+
+               NOTICE("switching to gid/uid %d/%d in new user namespace", new_gid, new_uid);
+               if (setgid(new_gid)) {
                        SYSERROR("setgid");
                        goto out_warn_father;
                }
-               if (setuid(0)) {
+               if (setuid(new_uid)) {
                        SYSERROR("setuid");
                        goto out_warn_father;
                }
@@ -697,6 +828,30 @@ static int do_start(void *data)
        }
        #endif
 
+       ret = snprintf(path, sizeof(path), "%s/dev/null", handler->conf->rootfs.mount);
+       if (ret < 0 || ret >= sizeof(path)) {
+               SYSERROR("sprintf'd too many chars");
+               goto out_warn_father;
+       }
+
+       /* In order to checkpoint restore, we need to have everything in the
+        * same mount namespace. However, some containers may not have a
+        * reasonable /dev (in particular, they may not have /dev/null), so we
+        * can't set init's std fds to /dev/null by opening it from inside the
+        * container.
+        *
+        * If that's the case, fall back to using the host's /dev/null. This
+        * means that migration won't work, but at least we won't spew output
+        * where it isn't wanted.
+        */
+       if (handler->backgrounded && !handler->conf->autodev && access(path, F_OK) < 0) {
+               devnull_fd = open_devnull();
+
+               if (devnull_fd < 0)
+                       goto out_warn_father;
+               WARN("using host's /dev/null for container init's std fds, migraiton won't work");
+       }
+
        /* Setup the container, ip, names, utsname, ... */
        if (lxc_setup(handler)) {
                ERROR("failed to setup the container");
@@ -705,19 +860,49 @@ static int do_start(void *data)
 
        /* ask father to setup cgroups and wait for him to finish */
        if (lxc_sync_barrier_parent(handler, LXC_SYNC_CGROUP))
-               return -1;
+               goto out_error;
+
+       /* Unshare cgroup namespace after we have setup our cgroups. If we do it
+        * earlier we end up with a wrong view of /proc/self/cgroup. For
+        * example, assume we unshare(CLONE_NEWCGROUP) first, and then create
+        * the cgroup for the container, say /sys/fs/cgroup/cpuset/lxc/c, then
+        * /proc/self/cgroup would show us:
+        *
+        *      8:cpuset:/lxc/c
+        *
+        * whereas it should actually show
+        *
+        *      8:cpuset:/
+        */
+       if (cgns_supported()) {
+               if (unshare(CLONE_NEWCGROUP) < 0) {
+                       INFO("Failed to unshare CLONE_NEWCGROUP.");
+                       goto out_warn_father;
+               }
+               INFO("Unshared CLONE_NEWCGROUP.");
+       }
 
        /* Set the label to change to when we exec(2) the container's init */
        if (lsm_process_label_set(NULL, handler->conf, 1, 1) < 0)
                goto out_warn_father;
 
+       /* Set PR_SET_NO_NEW_PRIVS after we changed the lsm label. If we do it
+        * before we aren't allowed anymore. */
+       if (handler->conf->no_new_privs) {
+               if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
+                       SYSERROR("Could not set PR_SET_NO_NEW_PRIVS to block execve() gainable privileges.");
+                       goto out_warn_father;
+               }
+               DEBUG("Set PR_SET_NO_NEW_PRIVS to block execve() gainable privileges.");
+       }
+
        /* Some init's such as busybox will set sane tty settings on stdin,
         * stdout, stderr which it thinks is the console. We already set them
         * the way we wanted on the real terminal, and we want init to do its
         * setup on its console ie. the pty allocated in lxc_console_create()
         * so make sure that that pty is stdin,stdout,stderr.
         */
-       if (lxc_console_set_stdfds(handler) < 0)
+       if (lxc_console_set_stdfds(handler->conf->console.slave) < 0)
                goto out_warn_father;
 
        /* If we mounted a temporary proc, then unmount it now */
@@ -762,23 +947,46 @@ static int do_start(void *data)
 
        close(handler->sigfd);
 
-       if (handler->backgrounded && null_stdfds() < 0)
+       if (devnull_fd < 0) {
+               devnull_fd = open_devnull();
+
+               if (devnull_fd < 0)
+                       goto out_warn_father;
+       }
+
+       if (handler->backgrounded && set_stdfds(devnull_fd))
                goto out_warn_father;
 
+       if (devnull_fd >= 0) {
+               close(devnull_fd);
+               devnull_fd = -1;
+       }
+
+       setsid();
+
        /* after this call, we are in error because this
         * ops should not return as it execs */
        handler->ops->start(handler, handler->data);
 
 out_warn_father:
-       /* we want the parent to know something went wrong, so any
-        * value other than what it expects is ok. */
-       lxc_sync_wake_parent(handler, LXC_SYNC_POST_CONFIGURE);
+       /* we want the parent to know something went wrong, so we return a special
+        * error code. */
+       lxc_sync_wake_parent(handler, LXC_SYNC_ERROR);
+
+out_error:
+       if (devnull_fd >= 0)
+               close(devnull_fd);
+
        return -1;
 }
 
 static int save_phys_nics(struct lxc_conf *conf)
 {
        struct lxc_list *iterator;
+       int am_root = (getuid() == 0);
+
+       if (!am_root)
+               return 0;
 
        lxc_list_for_each(iterator, &conf->network) {
                struct lxc_netdev *netdev = iterator->elem;
@@ -849,29 +1057,35 @@ static int recv_ttys_from_child(struct lxc_handler *handler)
 void resolve_clone_flags(struct lxc_handler *handler)
 {
        handler->clone_flags = CLONE_NEWPID | CLONE_NEWNS;
+       INFO("Adding CLONE_NEWPID to clone flags.");
+       INFO("Adding CLONE_NEWNS to clone flags.");
 
        if (!lxc_list_empty(&handler->conf->id_map)) {
-               INFO("Cloning a new user namespace");
+               INFO("Adding CLONE_NEWUSER to clone flags.");
                handler->clone_flags |= CLONE_NEWUSER;
        }
 
        if (handler->conf->inherit_ns_fd[LXC_NS_NET] == -1) {
-               if (!lxc_requests_empty_network(handler))
+               if (!lxc_requests_empty_network(handler)) {
+                       INFO("Adding CLONE_NEWNET to clone flags.");
                        handler->clone_flags |= CLONE_NEWNET;
+               }
        } else {
-               INFO("Inheriting a net namespace");
+               INFO("Inheriting a NET namespace.");
        }
 
        if (handler->conf->inherit_ns_fd[LXC_NS_IPC] == -1) {
+               INFO("Adding CLONE_NEWIPC to clone flags.");
                handler->clone_flags |= CLONE_NEWIPC;
        } else {
-               INFO("Inheriting an IPC namespace");
+               INFO("Inheriting an IPC namespace.");
        }
 
        if (handler->conf->inherit_ns_fd[LXC_NS_UTS] == -1) {
+               INFO("Adding CLONE_NEWUTS to clone flags.");
                handler->clone_flags |= CLONE_NEWUTS;
        } else {
-               INFO("Inheriting a UTS namespace");
+               INFO("Inheriting a UTS namespace.");
        }
 }
 
@@ -879,9 +1093,10 @@ static int lxc_spawn(struct lxc_handler *handler)
 {
        int failed_before_rename = 0;
        const char *name = handler->name;
+       char *errmsg = NULL;
        bool cgroups_connected = false;
        int saved_ns_fd[LXC_NS_MAX];
-       int preserve_mask = 0, i;
+       int preserve_mask = 0, i, flags;
        int netpipepair[2], nveths;
 
        netpipe = -1;
@@ -953,8 +1168,12 @@ static int lxc_spawn(struct lxc_handler *handler)
                        INFO("failed to pin the container's rootfs");
        }
 
-       if (preserve_ns(saved_ns_fd, preserve_mask) < 0)
+       if (!preserve_ns(saved_ns_fd, preserve_mask, getpid(), &errmsg)) {
+               SYSERROR("Failed to preserve requested namespaces: %s",
+                       errmsg ? errmsg : "(Out of memory)");
+               free(errmsg);
                goto out_delete_net;
+       }
        if (attach_ns(handler->conf->inherit_ns_fd) < 0)
                goto out_delete_net;
 
@@ -968,10 +1187,21 @@ static int lxc_spawn(struct lxc_handler *handler)
        }
 
        /* Create a process in a new set of namespaces */
-       handler->pid = lxc_clone(do_start, handler, handler->clone_flags);
+       flags = handler->clone_flags;
+       if (handler->clone_flags & CLONE_NEWUSER)
+               flags &= ~CLONE_NEWNET;
+       handler->pid = lxc_clone(do_start, handler, flags);
        if (handler->pid < 0) {
-               SYSERROR("failed to fork into a new namespace");
+               SYSERROR("Failed to fork into a set of new namespaces.");
                goto out_delete_net;
+       } else {
+               INFO("Cloned a set of new namespaces.");
+       }
+
+       if (!preserve_ns(handler->nsfd, handler->clone_flags | preserve_mask, handler->pid, &errmsg)) {
+               INFO("Failed to store namespace references for stop hook: %s",
+                       errmsg ? errmsg : "(Out of memory)");
+               free(errmsg);
        }
 
        if (attach_ns(saved_ns_fd))
@@ -979,8 +1209,25 @@ static int lxc_spawn(struct lxc_handler *handler)
 
        lxc_sync_fini_child(handler);
 
-       if (lxc_sync_wait_child(handler, LXC_SYNC_CONFIGURE))
+       /* map the container uids - the container became an invalid
+        * userid the moment it was cloned with CLONE_NEWUSER - this
+        * call doesn't change anything immediately, but allows the
+        * container to setuid(0) (0 being mapped to something else on
+        * the host) later to become a valid uid again */
+       if (lxc_map_ids(&handler->conf->id_map, handler->pid)) {
+               ERROR("failed to set up id mapping");
+               goto out_delete_net;
+       }
+
+       if (lxc_sync_wake_child(handler, LXC_SYNC_STARTUP)) {
+               failed_before_rename = 1;
+               goto out_delete_net;
+       }
+
+       if (lxc_sync_wait_child(handler, LXC_SYNC_CONFIGURE)) {
                failed_before_rename = 1;
+               goto out_delete_net;
+       }
 
        if (!cgroup_create_legacy(handler)) {
                ERROR("failed to setup the legacy cgroups for %s", name);
@@ -1002,7 +1249,8 @@ static int lxc_spawn(struct lxc_handler *handler)
 
        /* Create the network configuration */
        if (handler->clone_flags & CLONE_NEWNET) {
-               if (lxc_assign_network(&handler->conf->network, handler->pid)) {
+               if (lxc_assign_network(handler->lxcpath, handler->name,
+                                       &handler->conf->network, handler->pid)) {
                        ERROR("failed to create the configured network");
                        goto out_delete_net;
                }
@@ -1025,16 +1273,6 @@ static int lxc_spawn(struct lxc_handler *handler)
                close(netpipepair[1]);
        }
 
-       /* map the container uids - the container became an invalid
-        * userid the moment it was cloned with CLONE_NEWUSER - this
-        * call doesn't change anything immediately, but allows the
-        * container to setuid(0) (0 being mapped to something else on
-        * the host) later to become a valid uid again */
-       if (lxc_map_ids(&handler->conf->id_map, handler->pid)) {
-               ERROR("failed to set up id mapping");
-               goto out_delete_net;
-       }
-
        /* Tell the child to continue its initialization.  we'll get
         * LXC_SYNC_CGROUP when it is ready for us to setup cgroups
         */
@@ -1078,6 +1316,7 @@ static int lxc_spawn(struct lxc_handler *handler)
        }
 
        lxc_sync_fini(handler);
+       handler->netnsfd = lxc_preserve_ns(handler->pid, "net");
 
        return 0;
 
@@ -1097,26 +1336,6 @@ out_abort:
        return -1;
 }
 
-int get_netns_fd(int pid)
-{
-       char path[MAXPATHLEN];
-       int ret, fd;
-
-       ret = snprintf(path, MAXPATHLEN, "/proc/%d/ns/net", pid);
-       if (ret < 0 || ret >= MAXPATHLEN) {
-               WARN("Failed to pin netns file for pid %d", pid);
-               return -1;
-       }
-
-       fd = open(path, O_RDONLY);
-       if (fd < 0) {
-               WARN("Failed to pin netns file %s for pid %d: %s",
-                               path, pid, strerror(errno));
-               return -1;
-       }
-       return fd;
-}
-
 int __lxc_start(const char *name, struct lxc_conf *conf,
                struct lxc_operations* ops, void *data, const char *lxcpath,
                bool backgrounded)
@@ -1124,7 +1343,6 @@ int __lxc_start(const char *name, struct lxc_conf *conf,
        struct lxc_handler *handler;
        int err = -1;
        int status;
-       int netnsfd = -1;
 
        handler = lxc_init(name, conf, lxcpath);
        if (!handler) {
@@ -1134,6 +1352,7 @@ int __lxc_start(const char *name, struct lxc_conf *conf,
        handler->ops = ops;
        handler->data = data;
        handler->backgrounded = backgrounded;
+       handler->netnsfd = -1;
 
        if (must_drop_cap_sys_boot(handler->conf)) {
                #if HAVE_SYS_CAPABILITY_H
@@ -1155,8 +1374,10 @@ int __lxc_start(const char *name, struct lxc_conf *conf,
                /* if the backing store is a device, mount it here and now */
                if (rootfs_is_blockdev(conf)) {
                        if (unshare(CLONE_NEWNS) < 0) {
-                               ERROR("Error unsharing mounts");
+                               ERROR("Error unsharing MOUNT namespace.");
                                goto out_fini_nonet;
+                       } else {
+                               INFO("Unshared MOUNT namespace.");
                        }
                        remount_all_slave();
                        if (do_rootfs_setup(conf, name, lxcpath) < 0) {
@@ -1175,13 +1396,13 @@ int __lxc_start(const char *name, struct lxc_conf *conf,
 
        handler->conf->reboot = 0;
 
-       netnsfd = get_netns_fd(handler->pid);
-
        err = lxc_poll(name, handler);
        if (err) {
                ERROR("mainloop exited with an error");
-               if (netnsfd >= 0)
-                       close(netnsfd);
+               if (handler->netnsfd >= 0) {
+                       close(handler->netnsfd);
+                       handler->netnsfd = -1;
+               }
                goto out_abort;
        }
 
@@ -1194,7 +1415,7 @@ int __lxc_start(const char *name, struct lxc_conf *conf,
         * lxc-execute which simply exited.  In any case, treat
         * it as a 'halt'
         */
-        if (WIFSIGNALED(status)) {
+       if (WIFSIGNALED(status)) {
                switch(WTERMSIG(status)) {
                case SIGINT: /* halt */
                        DEBUG("Container halting");
@@ -1210,11 +1431,13 @@ int __lxc_start(const char *name, struct lxc_conf *conf,
                        DEBUG("unknown exit status for init: %d", WTERMSIG(status));
                        break;
                }
-        }
+       }
+
+       DEBUG("Pushing physical nics back to host namespace");
+       lxc_restore_phys_nics_to_netns(handler->netnsfd, handler->conf);
 
-       lxc_rename_phys_nics_on_shutdown(netnsfd, handler->conf);
-       if (netnsfd >= 0)
-               close(netnsfd);
+       DEBUG("Tearing down virtual network devices used by container");
+       lxc_delete_network(handler);
 
        if (handler->pinfd >= 0) {
                close(handler->pinfd);
@@ -1276,3 +1499,65 @@ int lxc_start(const char *name, char *const argv[], struct lxc_conf *conf,
        conf->need_utmp_watch = 1;
        return __lxc_start(name, conf, &start_ops, &start_arg, lxcpath, backgrounded);
 }
+
+static void lxc_destroy_container_on_signal(struct lxc_handler *handler,
+                                           const char *name)
+{
+       char destroy[MAXPATHLEN];
+       bool bret = true;
+       int ret = 0;
+       struct lxc_container *c;
+       if (handler->conf->rootfs.path && handler->conf->rootfs.mount) {
+               bret = do_destroy_container(handler->conf);
+               if (!bret) {
+                       ERROR("Error destroying rootfs for %s", name);
+                       return;
+               }
+       }
+       INFO("Destroyed rootfs for %s", name);
+
+       ret = snprintf(destroy, MAXPATHLEN, "%s/%s", handler->lxcpath, name);
+       if (ret < 0 || ret >= MAXPATHLEN) {
+               ERROR("Error printing path for %s", name);
+               ERROR("Error destroying directory for %s", name);
+               return;
+       }
+
+       c = lxc_container_new(name, handler->lxcpath);
+       if (c) {
+               if (container_disk_lock(c)) {
+                       INFO("Could not update lxc_snapshots file");
+                       lxc_container_put(c);
+               } else {
+                       mod_all_rdeps(c, false);
+                       container_disk_unlock(c);
+                       lxc_container_put(c);
+               }
+       }
+
+       if (am_unpriv())
+               ret = userns_exec_1(handler->conf, lxc_rmdir_onedev_wrapper, destroy);
+       else
+               ret = lxc_rmdir_onedev(destroy, NULL);
+
+       if (ret < 0) {
+               ERROR("Error destroying directory for %s", name);
+               return;
+       }
+       INFO("Destroyed directory for %s", name);
+}
+
+static int lxc_rmdir_onedev_wrapper(void *data)
+{
+       char *arg = (char *) data;
+       return lxc_rmdir_onedev(arg, NULL);
+}
+
+static bool do_destroy_container(struct lxc_conf *conf) {
+       if (am_unpriv()) {
+               if (userns_exec_1(conf, bdev_destroy_wrapper, conf) < 0)
+                       return false;
+               return true;
+       }
+       return bdev_destroy(conf);
+}