lxccontainer: remove check from goto target

[mirror_lxc.git] / src / lxc / start.c
diff --git a/src/lxc/start.c b/src/lxc/start.c

index 8359d356a7540d9a604313e2453cfaa1df807921..ab035507e6a132dacb903ddfb4df42eba77cdccd 100644 (file)
--- a/src/lxc/start.c
+++ b/src/lxc/start.c
@@ -23,9 +23,9 @@
   * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
   */
  
-#define _GNU_SOURCE
-#include "config.h"
-
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE 1
+#endif
  #include <alloca.h>
  #include <dirent.h>
  #include <errno.h>
@@ -37,7 +37,6 @@
  #include <stdio.h>
  #include <stdlib.h>
  #include <string.h>
-#include <unistd.h>
  #include <sys/file.h>
  #include <sys/mount.h>
  #include <sys/param.h>
@@ -48,22 +47,7 @@
  #include <sys/types.h>
  #include <sys/un.h>
  #include <sys/wait.h>
-
-#if HAVE_LIBCAP
-#include <sys/capability.h>
-#endif
-
-#if !HAVE_DECL_PR_CAPBSET_DROP
-#define PR_CAPBSET_DROP 24
-#endif
-
-#if !HAVE_DECL_PR_SET_NO_NEW_PRIVS
-#define PR_SET_NO_NEW_PRIVS 38
-#endif
-
-#if !HAVE_DECL_PR_GET_NO_NEW_PRIVS
-#define PR_GET_NO_NEW_PRIVS 39
-#endif
+#include <unistd.h>
  
  #include "af_unix.h"
  #include "caps.h"
@@ -71,30 +55,39 @@
  #include "commands.h"
  #include "commands_utils.h"
  #include "conf.h"
+#include "config.h"
  #include "confile_utils.h"
  #include "error.h"
+#include "file_utils.h"
  #include "list.h"
-#include "lsm/lsm.h"
  #include "log.h"
+#include "lsm/lsm.h"
  #include "lxccontainer.h"
  #include "lxclock.h"
  #include "lxcseccomp.h"
+#include "macro.h"
  #include "mainloop.h"
  #include "monitor.h"
  #include "namespace.h"
  #include "network.h"
+#include "raw_syscalls.h"
  #include "start.h"
  #include "storage/storage.h"
  #include "storage/storage_utils.h"
  #include "sync.h"
+#include "syscall_wrappers.h"
  #include "terminal.h"
  #include "utils.h"
  
+#if HAVE_LIBCAP
+#include <sys/capability.h>
+#endif
+
  #ifndef HAVE_STRLCPY
  #include "include/strlcpy.h"
  #endif
  
-lxc_log_define(lxc_start, lxc);
+lxc_log_define(start, lxc);
  
  extern void mod_all_rdeps(struct lxc_container *c, bool inc);
  static bool do_destroy_container(struct lxc_handler *handler);
@@ -187,8 +180,6 @@ static bool lxc_try_preserve_namespaces(struct lxc_handler *handler,
  
                 fd = lxc_try_preserve_ns(pid, ns_info[i].proc_name);
                 if (fd < 0) {
-                       handler->nsfd[i] = -EBADF;
-
                         /* Do not fail to start container on kernels that do
                          * not support interacting with namespaces through
                          * /proc.
@@ -208,9 +199,9 @@ static bool lxc_try_preserve_namespaces(struct lxc_handler *handler,
         return true;
  }
  
-static int match_fd(int fd)
+static inline bool match_stdfds(int fd)
  {
-       return (fd == 0 || fd == 1 || fd == 2);
+       return (fd == STDIN_FILENO || fd == STDOUT_FILENO || fd == STDERR_FILENO);
  }
  
  int lxc_check_inherited(struct lxc_conf *conf, bool closeall,
@@ -277,7 +268,7 @@ restart:
                 if (current_config && fd == current_config->logfd)
                         continue;
  
-               if (match_fd(fd))
+               if (match_stdfds(fd))
                         continue;
  
                 if (closeall) {
@@ -301,16 +292,16 @@ restart:
  
  static int setup_signal_fd(sigset_t *oldmask)
  {
-       int ret, sig;
+       int ret;
         sigset_t mask;
-       int signals[] = {SIGBUS, SIGILL, SIGSEGV, SIGWINCH};
+       const int signals[] = {SIGBUS, SIGILL, SIGSEGV, SIGWINCH};
  
         /* Block everything except serious error signals. */
         ret = sigfillset(&mask);
         if (ret < 0)
                 return -EBADF;
  
-       for (sig = 0; sig < (sizeof(signals) / sizeof(signals[0])); sig++) {
+       for (int sig = 0; sig < (sizeof(signals) / sizeof(signals[0])); sig++) {
                 ret = sigdelset(&mask, signals[sig]);
                 if (ret < 0)
                         return -EBADF;
@@ -383,13 +374,15 @@ static int signal_handler(int fd, uint32_t events, void *data,
         if (siginfo.ssi_signo == SIGHUP) {
                 kill(hdlr->pid, SIGTERM);
                 INFO("Killing %d since terminal hung up", hdlr->pid);
-               return hdlr->init_died ? LXC_MAINLOOP_CLOSE : LXC_MAINLOOP_CONTINUE;
+               return hdlr->init_died ? LXC_MAINLOOP_CLOSE
+                                      : LXC_MAINLOOP_CONTINUE;
         }
  
         if (siginfo.ssi_signo != SIGCHLD) {
                 kill(hdlr->pid, siginfo.ssi_signo);
                 INFO("Forwarded signal %d to pid %d", siginfo.ssi_signo, hdlr->pid);
-               return hdlr->init_died ? LXC_MAINLOOP_CLOSE : LXC_MAINLOOP_CONTINUE;
+               return hdlr->init_died ? LXC_MAINLOOP_CLOSE
+                                      : LXC_MAINLOOP_CONTINUE;
         }
  
         /* More robustness, protect ourself from a SIGCHLD sent
@@ -398,18 +391,24 @@ static int signal_handler(int fd, uint32_t events, void *data,
         if (siginfo.ssi_pid != hdlr->pid) {
                 NOTICE("Received %d from pid %d instead of container init %d",
                        siginfo.ssi_signo, siginfo.ssi_pid, hdlr->pid);
-               return hdlr->init_died ? LXC_MAINLOOP_CLOSE : LXC_MAINLOOP_CONTINUE;
+               return hdlr->init_died ? LXC_MAINLOOP_CLOSE
+                                      : LXC_MAINLOOP_CONTINUE;
         }
  
         if (siginfo.ssi_code == CLD_STOPPED) {
                 INFO("Container init process was stopped");
-               return hdlr->init_died ? LXC_MAINLOOP_CLOSE : LXC_MAINLOOP_CONTINUE;
-       } else if (siginfo.ssi_code == CLD_CONTINUED) {
+               return hdlr->init_died ? LXC_MAINLOOP_CLOSE
+                                      : LXC_MAINLOOP_CONTINUE;
+       }
+
+       if (siginfo.ssi_code == CLD_CONTINUED) {
                 INFO("Container init process was continued");
-               return hdlr->init_died ? LXC_MAINLOOP_CLOSE : LXC_MAINLOOP_CONTINUE;
+               return hdlr->init_died ? LXC_MAINLOOP_CLOSE
+                                      : LXC_MAINLOOP_CONTINUE;
         }
  
         DEBUG("Container init process %d exited", hdlr->pid);
+
         return LXC_MAINLOOP_CLOSE;
  }
  
@@ -450,17 +449,9 @@ int lxc_serve_state_clients(const char *name, struct lxc_handler *handler,
                 TRACE("Sending state %s to state client %d",
                       lxc_state2str(state), client->clientfd);
  
-       again:
-               ret = send(client->clientfd, &msg, sizeof(msg), 0);
-               if (ret <= 0) {
-                       if (errno == EINTR) {
-                               TRACE("Caught EINTR; retrying");
-                               goto again;
-                       }
-
-                       ERROR("%s - Failed to send message to client",
-                             strerror(errno));
-               }
+               ret = lxc_send_nointr(client->clientfd, &msg, sizeof(msg), MSG_NOSIGNAL);
+               if (ret <= 0)
+                       SYSERROR("Failed to send message to client");
  
                 /* kick client from list */
                 lxc_list_del(cur);
@@ -478,7 +469,7 @@ static int lxc_serve_state_socket_pair(const char *name,
  {
         ssize_t ret;
  
-       if (!handler->backgrounded ||
+       if (!handler->daemonize ||
              handler->state_socket_pair[1] < 0 ||
             state == STARTING)
                 return 0;
@@ -490,11 +481,17 @@ static int lxc_serve_state_socket_pair(const char *name,
  again:
         ret = lxc_abstract_unix_send_credential(handler->state_socket_pair[1],
                                                 &(int){state}, sizeof(int));
-       if (ret != sizeof(int)) {
+       if (ret < 0) {
+               SYSERROR("Failed to send state to %d", handler->state_socket_pair[1]);
+
                 if (errno == EINTR)
                         goto again;
-               SYSERROR("Failed to send state to %d",
-                        handler->state_socket_pair[1]);
+
+               return -1;
+       }
+
+       if (ret != sizeof(int)) {
+               ERROR("Message too long : %d", handler->state_socket_pair[1]);
                 return -1;
         }
  
@@ -649,7 +646,7 @@ void lxc_free_handler(struct lxc_handler *handler)
  
         if (handler->conf && handler->conf->reboot == REBOOT_NONE)
                 if (handler->conf->maincmd_fd >= 0)
-                       close(handler->conf->maincmd_fd);
+                       lxc_abstract_unix_close(handler->conf->maincmd_fd);
  
         if (handler->state_socket_pair[0] >= 0)
                 close(handler->state_socket_pair[0]);
@@ -657,6 +654,9 @@ void lxc_free_handler(struct lxc_handler *handler)
         if (handler->state_socket_pair[1] >= 0)
                 close(handler->state_socket_pair[1]);
  
+       if (handler->cgroup_ops)
+               cgroup_exit(handler->cgroup_ops);
+
         handler->conf = NULL;
         free(handler);
         handler = NULL;
@@ -671,6 +671,7 @@ struct lxc_handler *lxc_init_handler(const char *name, struct lxc_conf *conf,
         handler = malloc(sizeof(*handler));
         if (!handler)
                 return NULL;
+
         memset(handler, 0, sizeof(*handler));
  
         /* Note that am_guest_unpriv() checks the effective uid. We
@@ -704,6 +705,7 @@ struct lxc_handler *lxc_init_handler(const char *name, struct lxc_conf *conf,
                         ERROR("Failed to create anonymous pair of unix sockets");
                         goto on_error;
                 }
+
                 TRACE("Created anonymous pair {%d,%d} of unix sockets",
                       handler->state_socket_pair[0],
                       handler->state_socket_pair[1]);
@@ -716,6 +718,7 @@ struct lxc_handler *lxc_init_handler(const char *name, struct lxc_conf *conf,
                         goto on_error;
                 }
         }
+
         TRACE("Unix domain socket %d for command server is ready",
               handler->conf->maincmd_fd);
  
@@ -733,6 +736,8 @@ int lxc_init(const char *name, struct lxc_handler *handler)
         const char *loglevel;
         struct lxc_conf *conf = handler->conf;
  
+       handler->monitor_pid = lxc_raw_getpid();
+
         lsm_init();
         TRACE("Initialized LSM");
  
@@ -843,28 +848,45 @@ int lxc_init(const char *name, struct lxc_handler *handler)
         ret = lxc_terminal_map_ids(conf, &conf->console);
         if (ret < 0) {
                 ERROR("Failed to chown console");
-               goto out_restore_sigmask;
+               goto out_delete_terminal;
         }
         TRACE("Chowned console");
  
-       handler->cgroup_ops = cgroup_init(handler);
+       handler->cgroup_ops = cgroup_init(handler->conf);
         if (!handler->cgroup_ops) {
                 ERROR("Failed to initialize cgroup driver");
-               goto out_restore_sigmask;
+               goto out_delete_terminal;
         }
         TRACE("Initialized cgroup driver");
  
+       ret = lsm_process_prepare(conf, handler->lxcpath);
+       if (ret < 0) {
+               ERROR("Failed to initialize LSM");
+               goto out_destroy_cgroups;
+       }
+       TRACE("Initialized LSM");
+
         INFO("Container \"%s\" is initialized", name);
         return 0;
  
+out_destroy_cgroups:
+       handler->cgroup_ops->payload_destroy(handler->cgroup_ops, handler);
+       handler->cgroup_ops->monitor_destroy(handler->cgroup_ops, handler);
+
+out_delete_terminal:
+       lxc_terminal_delete(&handler->conf->console);
+
  out_restore_sigmask:
         (void)pthread_sigmask(SIG_SETMASK, &handler->oldmask, NULL);
+
  out_delete_tty:
         lxc_delete_tty(&conf->ttys);
+
  out_aborting:
         (void)lxc_set_state(name, handler, ABORTING);
+
  out_close_maincmd_fd:
-       close(conf->maincmd_fd);
+       lxc_abstract_unix_close(conf->maincmd_fd);
         conf->maincmd_fd = -1;
         return -1;
  }
@@ -941,8 +963,10 @@ void lxc_fini(const char *name, struct lxc_handler *handler)
         while (namespace_count--)
                 free(namespaces[namespace_count]);
  
-       cgroup_ops->destroy(cgroup_ops, handler);
-       cgroup_exit(cgroup_ops);
+       lsm_process_cleanup(handler->conf, handler->lxcpath);
+
+       cgroup_ops->payload_destroy(cgroup_ops, handler);
+       cgroup_ops->monitor_destroy(cgroup_ops, handler);
  
         if (handler->conf->reboot == REBOOT_NONE) {
                 /* For all new state clients simply close the command socket.
@@ -951,7 +975,7 @@ void lxc_fini(const char *name, struct lxc_handler *handler)
                  * the command socket causing a new process to get ECONNREFUSED
                  * because we haven't yet closed the command socket.
                  */
-               close(handler->conf->maincmd_fd);
+               lxc_abstract_unix_close(handler->conf->maincmd_fd);
                 handler->conf->maincmd_fd = -1;
                 TRACE("Closed command socket");
  
@@ -1036,10 +1060,11 @@ static int do_start(void *data)
  {
         int ret;
         char path[PATH_MAX];
-       bool have_cap_setgid;
         uid_t new_uid;
         gid_t new_gid;
         struct lxc_list *iterator;
+       uid_t nsuid = 0;
+       gid_t nsgid = 0;
         int devnull_fd = -1;
         struct lxc_handler *handler = data;
  
@@ -1051,7 +1076,7 @@ static int do_start(void *data)
          * exit before we set the pdeath signal leading to a unsupervized
          * container.
          */
-       ret = lxc_set_death_signal(SIGKILL);
+       ret = lxc_set_death_signal(SIGKILL, 0);
         if (ret < 0) {
                 SYSERROR("Failed to set PR_SET_PDEATHSIG to SIGKILL");
                 goto out_warn_father;
@@ -1059,7 +1084,7 @@ static int do_start(void *data)
  
         ret = lxc_ambient_caps_up();
         if (ret < 0) {
-               SYSERROR("Failed to raise ambient capabilities");
+               ERROR("Failed to raise ambient capabilities");
                 goto out_warn_father;
         }
  
@@ -1107,30 +1132,29 @@ static int do_start(void *data)
          * privilege over our namespace.
          */
         if (!lxc_list_empty(&handler->conf->id_map)) {
-               uid_t nsuid = (handler->conf->root_nsuid_map != NULL)
-                                 ? 0
-                                 : handler->conf->init_uid;
-               gid_t nsgid = (handler->conf->root_nsgid_map != NULL)
-                                 ? 0
-                                 : handler->conf->init_gid;
-
-               ret = lxc_switch_uid_gid(nsuid, nsgid);
-               if (ret < 0)
+               if (!handler->conf->root_nsuid_map)
+                       nsuid = handler->conf->init_uid;
+
+               if (!handler->conf->root_nsgid_map)
+                       nsgid = handler->conf->init_gid;
+
+               if (!lxc_switch_uid_gid(nsuid, nsgid))
                         goto out_warn_father;
  
                 /* Drop groups only after we switched to a valid gid in the new
                  * user namespace.
                  */
-               ret = lxc_setgroups(0, NULL);
-               if (ret < 0 && (handler->am_root || errno != EPERM))
+               if (!lxc_setgroups(0, NULL) &&
+                   (handler->am_root || errno != EPERM))
                         goto out_warn_father;
  
-               ret = prctl(PR_SET_DUMPABLE, 1, 0, 0, 0);
+               ret = prctl(PR_SET_DUMPABLE, prctl_arg(1), prctl_arg(0),
+                           prctl_arg(0), prctl_arg(0));
                 if (ret < 0)
                         goto out_warn_father;
  
                 /* set{g,u}id() clears deathsignal */
-               ret = lxc_set_death_signal(SIGKILL);
+               ret = lxc_set_death_signal(SIGKILL, 0);
                 if (ret < 0) {
                         SYSERROR("Failed to set PR_SET_PDEATHSIG to SIGKILL");
                         goto out_warn_father;
@@ -1158,7 +1182,7 @@ static int do_start(void *data)
          * means that migration won't work, but at least we won't spew output
          * where it isn't wanted.
          */
-       if (handler->backgrounded && !handler->conf->autodev) {
+       if (handler->daemonize && !handler->conf->autodev) {
                 ret = access(path, F_OK);
                 if (ret != 0) {
                         devnull_fd = open_devnull();
@@ -1220,7 +1244,7 @@ static int do_start(void *data)
         }
  
         /* Set the label to change to when we exec(2) the container's init. */
-       ret = lsm_process_label_set(NULL, handler->conf, 1, 1);
+       ret = lsm_process_label_set(NULL, handler->conf, true);
         if (ret < 0)
                 goto out_warn_father;
  
@@ -1228,7 +1252,8 @@ static int do_start(void *data)
          * before we aren't allowed anymore.
          */
         if (handler->conf->no_new_privs) {
-               ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+               ret = prctl(PR_SET_NO_NEW_PRIVS, prctl_arg(1), prctl_arg(0),
+                           prctl_arg(0), prctl_arg(0));
                 if (ret < 0) {
                         SYSERROR("Could not set PR_SET_NO_NEW_PRIVS to block "
                                  "execve() gainable privileges");
@@ -1245,7 +1270,7 @@ static int do_start(void *data)
          * make sure that that pty is stdin,stdout,stderr.
          */
          if (handler->conf->console.slave >= 0) {
-                if (handler->backgrounded || !handler->conf->is_execute)
+                if (handler->daemonize || !handler->conf->is_execute)
                          ret = set_stdfds(handler->conf->console.slave);
                  else
                          ret = lxc_terminal_set_stdfds(handler->conf->console.slave);
@@ -1272,13 +1297,13 @@ static int do_start(void *data)
  
         close(handler->sigfd);
  
-       if (devnull_fd < 0) {
-               devnull_fd = open_devnull();
-               if (devnull_fd < 0)
-                       goto out_warn_father;
-       }
+       if (handler->conf->console.slave < 0 && handler->daemonize) {
+               if (devnull_fd < 0) {
+                       devnull_fd = open_devnull();
+                       if (devnull_fd < 0)
+                               goto out_warn_father;
+               }
  
-       if (handler->conf->console.slave < 0 && handler->backgrounded) {
                 ret = set_stdfds(devnull_fd);
                 if (ret < 0) {
                         ERROR("Failed to redirect std{in,out,err} to \"/dev/null\"");
@@ -1343,31 +1368,42 @@ static int do_start(void *data)
         new_uid = handler->conf->init_uid;
         new_gid = handler->conf->init_gid;
  
-       /* If we are in a new user namespace we already dropped all groups when
-       *  we switched to root in the new user namespace further above. Only
-       *  drop groups if we can, so ensure that we have necessary privilege.
-        */
-       #if HAVE_LIBCAP
-       have_cap_setgid = lxc_proc_cap_is_set(CAP_SETGID, CAP_EFFECTIVE);
-       #else
-       have_cap_setgid = false;
-       #endif
-       if (lxc_list_empty(&handler->conf->id_map) && have_cap_setgid) {
-               ret = lxc_setgroups(0, NULL);
-               if (ret < 0)
-                       goto out_warn_father;
-       }
+       /* Avoid unnecessary syscalls. */
+       if (new_uid == nsuid)
+               new_uid = LXC_INVALID_UID;
  
-       ret = lxc_switch_uid_gid(new_uid, new_gid);
-       if (ret < 0)
+       if (new_gid == nsgid)
+               new_gid = LXC_INVALID_GID;
+
+       if (!lxc_switch_uid_gid(new_uid, new_gid))
                 goto out_warn_father;
  
+       /* If we are in a new user namespace we already dropped all groups when
+        * we switched to root in the new user namespace further above. Only
+        * drop groups if we can, so ensure that we have necessary privilege.
+        */
+       if (lxc_list_empty(&handler->conf->id_map))
+               #if HAVE_LIBCAP
+               if (lxc_proc_cap_is_set(CAP_SETGID, CAP_EFFECTIVE))
+               #endif
+                       if (!lxc_setgroups(0, NULL))
+                               goto out_warn_father;
+
         ret = lxc_ambient_caps_down();
         if (ret < 0) {
-               SYSERROR("Failed to clear ambient capabilities");
+               ERROR("Failed to clear ambient capabilities");
                 goto out_warn_father;
         }
  
+       if (handler->conf->monitor_signal_pdeath != SIGKILL) {
+               ret = lxc_set_death_signal(handler->conf->monitor_signal_pdeath, 0);
+               if (ret < 0) {
+                       SYSERROR("Failed to set PR_SET_PDEATHSIG to %d",
+                                handler->conf->monitor_signal_pdeath);
+                       goto out_warn_father;
+               }
+       }
+
         /* After this call, we are in error because this ops should not return
          * as it execs.
          */
@@ -1416,9 +1452,9 @@ static int lxc_recv_ttys_from_child(struct lxc_handler *handler)
                 TRACE("Received pty with master fd %d and slave fd %d from "
                       "parent", tty->master, tty->slave);
         }
+
         if (ret < 0)
-               ERROR("Failed to receive %zu ttys from child: %s", ttys->max,
-                     strerror(errno));
+               SYSERROR("Failed to receive %zu ttys from child", ttys->max);
         else
                 TRACE("Received %zu ttys from child", ttys->max);
  
@@ -1505,6 +1541,75 @@ static inline int do_share_ns(void *arg)
         return 0;
  }
  
+static int lxc_setup_shmount(struct lxc_conf *conf)
+{
+       size_t len_cont;
+       char *full_cont_path;
+       int ret = -1;
+
+       /* Construct the shmount path under the container root. */
+       len_cont = strlen(conf->rootfs.mount) + 1 + strlen(conf->shmount.path_cont);
+       /* +1 for the terminating '\0' */
+       full_cont_path = malloc(len_cont + 1);
+       if (!full_cont_path) {
+               SYSERROR("Not enough memory");
+               return -ENOMEM;
+       }
+
+       ret = snprintf(full_cont_path, len_cont + 1, "%s/%s",
+                      conf->rootfs.mount, conf->shmount.path_cont);
+       if (ret < 0 || ret >= len_cont + 1) {
+               SYSERROR("Failed to create filename");
+               free(full_cont_path);
+               return -1;
+       }
+
+       /* Check if shmount point is already set up. */
+       if (is_shared_mountpoint(conf->shmount.path_host)) {
+               INFO("Path \"%s\" is already MS_SHARED. Reusing",
+                    conf->shmount.path_host);
+               free(full_cont_path);
+               return 0;
+       }
+
+       /* Create host and cont mount paths */
+       ret = mkdir_p(conf->shmount.path_host, 0711);
+       if (ret < 0 && errno != EEXIST) {
+               SYSERROR("Failed to create directory \"%s\"",
+                        conf->shmount.path_host);
+               free(full_cont_path);
+               return ret;
+       }
+
+       ret = mkdir_p(full_cont_path, 0711);
+       if (ret < 0 && errno != EEXIST) {
+               SYSERROR("Failed to create directory \"%s\"", full_cont_path);
+               free(full_cont_path);
+               return ret;
+       }
+
+       /* Prepare host mountpoint */
+       ret = mount("tmpfs", conf->shmount.path_host, "tmpfs", 0,
+                   "size=100k,mode=0711");
+       if (ret < 0) {
+               SYSERROR("Failed to mount \"%s\"", conf->shmount.path_host);
+               free(full_cont_path);
+               return ret;
+       }
+
+       ret = mount(conf->shmount.path_host, conf->shmount.path_host, "none",
+                   MS_REC | MS_SHARED, "");
+       if (ret < 0) {
+               SYSERROR("Failed to make shared \"%s\"", conf->shmount.path_host);
+               free(full_cont_path);
+               return ret;
+       }
+
+       INFO("Setup shared mount point \"%s\"", conf->shmount.path_host);
+       free(full_cont_path);
+       return 0;
+}
+
  /* lxc_spawn() performs crucial setup tasks and clone()s the new process which
   * exec()s the requested container binary.
   * Note that lxc_spawn() runs in the parent namespaces. Any operations performed
@@ -1544,15 +1649,22 @@ static int lxc_spawn(struct lxc_handler *handler)
  
         ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0,
                          handler->data_sock);
-       if (ret < 0) {
-               lxc_sync_fini(handler);
-               return -1;
-       }
+       if (ret < 0)
+               goto out_sync_fini;
  
         ret = resolve_clone_flags(handler);
-       if (ret < 0) {
-               lxc_sync_fini(handler);
-               return -1;
+       if (ret < 0)
+               goto out_sync_fini;
+
+       if (conf->shmount.path_host) {
+               if (!conf->shmount.path_cont)
+                       goto out_sync_fini;
+
+               ret = lxc_setup_shmount(conf);
+               if (ret < 0) {
+                       ERROR("Failed to setup shared mount point");
+                       goto out_sync_fini;
+               }
         }
  
         if (handler->ns_clone_flags & CLONE_NEWNET) {
@@ -1566,8 +1678,7 @@ static int lxc_spawn(struct lxc_handler *handler)
                         ret = lxc_find_gateway_addresses(handler);
                         if (ret < 0) {
                                 ERROR("Failed to find gateway addresses");
-                               lxc_sync_fini(handler);
-                               return -1;
+                               goto out_sync_fini;
                         }
  
                         /* That should be done before the clone because we will
@@ -1576,13 +1687,12 @@ static int lxc_spawn(struct lxc_handler *handler)
                         ret = lxc_create_network_priv(handler);
                         if (ret < 0) {
                                 ERROR("Failed to create the network");
-                               lxc_sync_fini(handler);
-                               return -1;
+                               goto out_delete_net;
                         }
                 }
         }
  
-       if (!cgroup_ops->create(cgroup_ops, handler)) {
+       if (!cgroup_ops->payload_create(cgroup_ops, handler)) {
                 ERROR("Failed creating cgroups");
                 goto out_delete_net;
         }
@@ -1676,7 +1786,7 @@ static int lxc_spawn(struct lxc_handler *handler)
                 goto out_delete_net;
         }
  
-       if (!cgroup_ops->enter(cgroup_ops, handler->pid))
+       if (!cgroup_ops->payload_enter(cgroup_ops, handler->pid))
                 goto out_delete_net;
  
         if (!cgroup_ops->chown(cgroup_ops, handler->conf))
@@ -1686,12 +1796,18 @@ static int lxc_spawn(struct lxc_handler *handler)
         ret = lxc_try_preserve_ns(handler->pid, "net");
         if (ret < 0) {
                 if (ret != -EOPNOTSUPP) {
-                       ERROR("%s - Failed to preserve net namespace", strerror(errno));
+                       SYSERROR("Failed to preserve net namespace");
                         goto out_delete_net;
                 }
         } else {
                 handler->nsfd[LXC_NS_NET] = ret;
                 DEBUG("Preserved net namespace via fd %d", ret);
+
+               ret = lxc_netns_set_nsid(handler->nsfd[LXC_NS_NET]);
+               if (ret < 0)
+                       SYSWARN("Failed to allocate new network namespace id");
+               else
+                       TRACE("Allocated new network namespace id");
         }
  
         /* Create the network configuration. */
@@ -1755,8 +1871,7 @@ static int lxc_spawn(struct lxc_handler *handler)
                 ret = lxc_try_preserve_ns(handler->pid, "cgroup");
                 if (ret < 0) {
                         if (ret != -EOPNOTSUPP) {
-                               ERROR("%s - Failed to preserve cgroup namespace",
-                                     strerror(errno));
+                               SYSERROR("Failed to preserve cgroup namespace");
                                 goto out_delete_net;
                         }
                 } else {
@@ -1831,6 +1946,8 @@ out_delete_net:
  
  out_abort:
         lxc_abort(name, handler);
+
+out_sync_fini:
         lxc_sync_fini(handler);
         if (handler->pinfd >= 0) {
                 close(handler->pinfd);
@@ -1842,10 +1959,11 @@ out_abort:
  
  int __lxc_start(const char *name, struct lxc_handler *handler,
                 struct lxc_operations* ops, void *data, const char *lxcpath,
-               bool backgrounded, int *error_num)
+               bool daemonize, int *error_num)
  {
         int ret, status;
         struct lxc_conf *conf = handler->conf;
+       struct cgroup_ops *cgroup_ops;
  
         ret = lxc_init(name, handler);
         if (ret < 0) {
@@ -1854,13 +1972,24 @@ int __lxc_start(const char *name, struct lxc_handler *handler,
         }
         handler->ops = ops;
         handler->data = data;
-       handler->backgrounded = backgrounded;
+       handler->daemonize = daemonize;
+       cgroup_ops = handler->cgroup_ops;
  
         if (!attach_block_device(handler->conf)) {
                 ERROR("Failed to attach block device");
                 goto out_fini_nonet;
         }
  
+       if (!cgroup_ops->monitor_create(cgroup_ops, handler)) {
+               ERROR("Failed to create monitor cgroup");
+               goto out_fini_nonet;
+       }
+
+       if (!cgroup_ops->monitor_enter(cgroup_ops, handler->monitor_pid)) {
+               ERROR("Failed to enter monitor cgroup");
+               goto out_fini_nonet;
+       }
+
         if (geteuid() == 0 && !lxc_list_empty(&conf->id_map)) {
                 /* If the backing store is a device, mount it here and now. */
                 if (rootfs_is_blockdev(conf)) {
@@ -1872,7 +2001,7 @@ int __lxc_start(const char *name, struct lxc_handler *handler,
                         INFO("Unshared CLONE_NEWNS");
  
                         remount_all_slave();
-                       ret = do_rootfs_setup(conf, name, lxcpath);
+                       ret = lxc_setup_rootfs_prepare_root(conf, name, lxcpath);
                         if (ret < 0) {
                                 ERROR("Error setting up rootfs mount as root before spawn");
                                 goto out_fini_nonet;
@@ -1990,20 +2119,20 @@ static struct lxc_operations start_ops = {
  };
  
  int lxc_start(const char *name, char *const argv[], struct lxc_handler *handler,
-             const char *lxcpath, bool backgrounded, int *error_num)
+             const char *lxcpath, bool daemonize, int *error_num)
  {
         struct start_args start_arg = {
                 .argv = argv,
         };
  
         TRACE("Doing lxc_start");
-       return __lxc_start(name, handler, &start_ops, &start_arg, lxcpath, backgrounded, error_num);
+       return __lxc_start(name, handler, &start_ops, &start_arg, lxcpath, daemonize, error_num);
  }
  
  static void lxc_destroy_container_on_signal(struct lxc_handler *handler,
                                             const char *name)
  {
-       char destroy[MAXPATHLEN];
+       char destroy[PATH_MAX];
         struct lxc_container *c;
         int ret = 0;
         bool bret = true;
@@ -2017,8 +2146,8 @@ static void lxc_destroy_container_on_signal(struct lxc_handler *handler,
         }
         INFO("Destroyed rootfs for container \"%s\"", name);
  
-       ret = snprintf(destroy, MAXPATHLEN, "%s/%s", handler->lxcpath, name);
-       if (ret < 0 || ret >= MAXPATHLEN) {
+       ret = snprintf(destroy, PATH_MAX, "%s/%s", handler->lxcpath, name);
+       if (ret < 0 || ret >= PATH_MAX) {
                 ERROR("Error destroying directory for container \"%s\"", name);
                 return;
         }