]> git.proxmox.com Git - mirror_lxc.git/blobdiff - src/lxc/lxccontainer.c
fix potential out of bounds pointer deref
[mirror_lxc.git] / src / lxc / lxccontainer.c
index 1e257c0b9cc95f5df239021140922142e073e7a2..aaa9f2aebe5616a9b20a671cf2555c7b037e4740 100644 (file)
  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  */
 
+#define _GNU_SOURCE
+#include <pthread.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <sched.h>
+#include "config.h"
 #include "lxc.h"
 #include "state.h"
 #include "lxccontainer.h"
 #include "conf.h"
-#include "config.h"
 #include "confile.h"
+#include "console.h"
 #include "cgroup.h"
 #include "commands.h"
+#include "version.h"
 #include "log.h"
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/wait.h>
-#include <errno.h>
+#include "bdev.h"
+#include "utils.h"
 #include <lxc/utils.h>
+#include <lxc/monitor.h>
+#include <sched.h>
+#include <arpa/inet.h>
+#include <ifaddrs.h>
 
 lxc_log_define(lxc_container, lxc);
 
+static bool file_exists(char *f)
+{
+       struct stat statbuf;
+
+       return stat(f, &statbuf) == 0;
+}
+
+/*
+ * A few functions to help detect when a container creation failed.
+ * If a container creation was killed partway through, then trying
+ * to actually start that container could harm the host.  We detect
+ * this by creating a 'partial' file under the container directory,
+ * and keeping an advisory lock.  When container creation completes,
+ * we remove that file.  When we load or try to start a container, if
+ * we find that file, without a flock, we remove the container.
+ */
+int ongoing_create(struct lxc_container *c)
+{
+       int len = strlen(c->config_path) + strlen(c->name) + 10;
+       char *path = alloca(len);
+       int fd, ret;
+       struct flock lk;
+
+       ret = snprintf(path, len, "%s/%s/partial", c->config_path, c->name);
+       if (ret < 0 || ret >= len) {
+               ERROR("Error writing partial pathname");
+               return -1;
+       }
+
+       if (!file_exists(path))
+               return 0;
+       if (process_lock())
+               return -1;
+       if ((fd = open(path, O_RDWR)) < 0) {
+               // give benefit of the doubt
+               SYSERROR("Error opening partial file");
+               process_unlock();
+               return 0;
+       }
+       lk.l_type = F_WRLCK;
+       lk.l_whence = SEEK_SET;
+       lk.l_start = 0;
+       lk.l_len = 0;
+       lk.l_pid = -1;
+       if (fcntl(fd, F_GETLK, &lk) == 0 && lk.l_pid != -1) {
+               // create is still ongoing
+               close(fd);
+               process_unlock();
+               return 1;
+       }
+       // create completed but partial is still there.
+       close(fd);
+       process_unlock();
+       return 2;
+}
+
+int create_partial(struct lxc_container *c)
+{
+       // $lxcpath + '/' + $name + '/partial' + \0
+       int len = strlen(c->config_path) + strlen(c->name) + 10;
+       char *path = alloca(len);
+       int fd, ret;
+       struct flock lk;
+
+       ret = snprintf(path, len, "%s/%s/partial", c->config_path, c->name);
+       if (ret < 0 || ret >= len) {
+               ERROR("Error writing partial pathname");
+               return -1;
+       }
+       if (process_lock())
+               return -1;
+       if ((fd=open(path, O_RDWR | O_CREAT | O_EXCL, 0755)) < 0) {
+               SYSERROR("Erorr creating partial file");
+               process_unlock();
+               return -1;
+       }
+       lk.l_type = F_WRLCK;
+       lk.l_whence = SEEK_SET;
+       lk.l_start = 0;
+       lk.l_len = 0;
+       if (fcntl(fd, F_SETLKW, &lk) < 0) {
+               SYSERROR("Error locking partial file %s", path);
+               close(fd);
+               process_unlock();
+               return -1;
+       }
+       process_unlock();
+
+       return fd;
+}
+
+void remove_partial(struct lxc_container *c, int fd)
+{
+       // $lxcpath + '/' + $name + '/partial' + \0
+       int len = strlen(c->config_path) + strlen(c->name) + 10;
+       char *path = alloca(len);
+       int ret;
+
+       close(fd);
+       ret = snprintf(path, len, "%s/%s/partial", c->config_path, c->name);
+       if (ret < 0 || ret >= len) {
+               ERROR("Error writing partial pathname");
+               return;
+       }
+       if (process_lock())
+               return;
+       if (unlink(path) < 0)
+               SYSERROR("Error unlink partial file %s", path);
+       process_unlock();
+}
+
 /* LOCKING
- * c->privlock protects the struct lxc_container from multiple threads.
- * c->slock protects the on-disk container data
+ * 1. container_mem_lock(c) protects the struct lxc_container from multiple threads.
+ * 2. container_disk_lock(c) protects the on-disk container data - in particular the
+ *    container configuration file.
+ *    The container_disk_lock also takes the container_mem_lock.
+ * 3. thread_mutex protects process data (ex: fd table) from multiple threads.
  * NOTHING mutexes two independent programs with their own struct
  * lxc_container for the same c->name, between API calls.  For instance,
  * c->config_read(); c->start();  Between those calls, data on disk
@@ -47,7 +173,7 @@ lxc_log_define(lxc_container, lxc);
  * due to hung callers.  So I prefer to keep the locks only within our own
  * functions, not across functions.
  *
- * If you're going to fork while holding a lxccontainer, increment
+ * If you're going to clone while holding a lxccontainer, increment
  * c->numthreads (under privlock) before forking.  When deleting,
  * decrement numthreads under privlock, then if it hits 0 you can delete.
  * Do not ever use a lxccontainer whose numthreads you did not bump.
@@ -67,12 +193,11 @@ static void lxc_container_free(struct lxc_container *c)
                c->error_string = NULL;
        }
        if (c->slock) {
-               sem_close(c->slock);
+               lxc_putlock(c->slock);
                c->slock = NULL;
        }
        if (c->privlock) {
-               sem_destroy(c->privlock);
-               free(c->privlock);
+               lxc_putlock(c->privlock);
                c->privlock = NULL;
        }
        if (c->name) {
@@ -90,12 +215,40 @@ static void lxc_container_free(struct lxc_container *c)
        free(c);
 }
 
+/*
+ * Consider the following case:
+freer                         |    racing get()er
+==================================================================
+lxc_container_put()           |   lxc_container_get()
+\ lxclock(c->privlock)        |   c->numthreads < 1? (no)
+\ c->numthreads = 0           |   \ lxclock(c->privlock) -> waits
+\ lxcunlock()                 |   \
+\ lxc_container_free()        |   \ lxclock() returns
+                              |   \ c->numthreads < 1 -> return 0
+\ \ (free stuff)              |
+\ \ sem_destroy(privlock)     |
+
+ * When the get()er checks numthreads the first time, one of the following
+ * is true:
+ * 1. freer has set numthreads = 0.  get() returns 0
+ * 2. freer is between lxclock and setting numthreads to 0.  get()er will
+ *    sem_wait on privlock, get lxclock after freer() drops it, then see
+ *    numthreads is 0 and exit without touching lxclock again..
+ * 3. freer has not yet locked privlock.  If get()er runs first, then put()er
+ *    will see --numthreads = 1 and not call lxc_container_free().
+*/
+
 int lxc_container_get(struct lxc_container *c)
 {
        if (!c)
                return 0;
 
-       if (lxclock(c->privlock, 0))
+       // if someone else has already started freeing the container, don't
+       // try to take the lock, which may be invalid
+       if (c->numthreads < 1)
+               return 0;
+
+       if (container_mem_lock(c))
                return 0;
        if (c->numthreads < 1) {
                // bail without trying to unlock, bc the privlock is now probably
@@ -103,7 +256,7 @@ int lxc_container_get(struct lxc_container *c)
                return 0;
        }
        c->numthreads++;
-       lxcunlock(c->privlock);
+       container_mem_unlock(c);
        return 1;
 }
 
@@ -111,24 +264,17 @@ int lxc_container_put(struct lxc_container *c)
 {
        if (!c)
                return -1;
-       if (lxclock(c->privlock, 0))
+       if (container_mem_lock(c))
                return -1;
        if (--c->numthreads < 1) {
-               lxcunlock(c->privlock);
+               container_mem_unlock(c);
                lxc_container_free(c);
                return 1;
        }
-       lxcunlock(c->privlock);
+       container_mem_unlock(c);
        return 0;
 }
 
-static bool file_exists(char *f)
-{
-       struct stat statbuf;
-
-       return stat(f, &statbuf) == 0;
-}
-
 static bool lxcapi_is_defined(struct lxc_container *c)
 {
        struct stat statbuf;
@@ -138,7 +284,7 @@ static bool lxcapi_is_defined(struct lxc_container *c)
        if (!c)
                return false;
 
-       if (lxclock(c->privlock, 0))
+       if (container_mem_lock(c))
                return false;
        if (!c->configfile)
                goto out;
@@ -148,27 +294,21 @@ static bool lxcapi_is_defined(struct lxc_container *c)
        ret = true;
 
 out:
-       lxcunlock(c->privlock);
+       container_mem_unlock(c);
        return ret;
 }
 
 static const char *lxcapi_state(struct lxc_container *c)
 {
-       const char *ret;
        lxc_state_t s;
 
        if (!c)
                return NULL;
-       if (lxclock(c->slock, 0))
-               return NULL;
        s = lxc_getstate(c->name, c->config_path);
-       ret = lxc_state2str(s);
-       lxcunlock(c->slock);
-
-       return ret;
+       return lxc_state2str(s);
 }
 
-static bool is_stopped_nolock(struct lxc_container *c)
+static bool is_stopped(struct lxc_container *c)
 {
        lxc_state_t s;
        s = lxc_getstate(c->name, c->config_path);
@@ -193,10 +333,7 @@ static bool lxcapi_freeze(struct lxc_container *c)
        if (!c)
                return false;
 
-       if (lxclock(c->slock, 0))
-               return false;
-       ret = lxc_freeze(c->name);
-       lxcunlock(c->slock);
+       ret = lxc_freeze(c->name, c->config_path);
        if (ret)
                return false;
        return true;
@@ -208,26 +345,34 @@ static bool lxcapi_unfreeze(struct lxc_container *c)
        if (!c)
                return false;
 
-       if (lxclock(c->slock, 0))
-               return false;
-       ret = lxc_unfreeze(c->name);
-       lxcunlock(c->slock);
+       ret = lxc_unfreeze(c->name, c->config_path);
        if (ret)
                return false;
        return true;
 }
 
-static pid_t lxcapi_init_pid(struct lxc_container *c)
+static int lxcapi_console_getfd(struct lxc_container *c, int *ttynum, int *masterfd)
 {
-       pid_t ret;
+       int ttyfd;
        if (!c)
                return -1;
 
-       if (lxclock(c->slock, 0))
+       ttyfd = lxc_console_getfd(c, ttynum, masterfd);
+       return ttyfd;
+}
+
+static int lxcapi_console(struct lxc_container *c, int ttynum, int stdinfd,
+                         int stdoutfd, int stderrfd, int escape)
+{
+       return lxc_console(c, ttynum, stdinfd, stdoutfd, stderrfd, escape);
+}
+
+static pid_t lxcapi_init_pid(struct lxc_container *c)
+{
+       if (!c)
                return -1;
-       ret = get_init_pid(c->name, c->config_path);
-       lxcunlock(c->slock);
-       return ret;
+
+       return lxc_cmd_get_init_pid(c->name, c->config_path);
 }
 
 static bool load_config_locked(struct lxc_container *c, const char *fname)
@@ -241,7 +386,8 @@ static bool load_config_locked(struct lxc_container *c, const char *fname)
 
 static bool lxcapi_load_config(struct lxc_container *c, const char *alt_file)
 {
-       bool ret = false;
+       bool ret = false, need_disklock = false;
+       int lret;
        const char *fname;
        if (!c)
                return false;
@@ -251,10 +397,27 @@ static bool lxcapi_load_config(struct lxc_container *c, const char *alt_file)
                fname = alt_file;
        if (!fname)
                return false;
-       if (lxclock(c->slock, 0))
+       /*
+        * If we're reading something other than the container's config,
+        * we only need to lock the in-memory container.  If loading the
+        * container's config file, take the disk lock.
+        */
+       if (strcmp(fname, c->configfile) == 0)
+               need_disklock = true;
+
+       if (need_disklock)
+               lret = container_disk_lock(c);
+       else
+               lret = container_mem_lock(c);
+       if (lret)
                return false;
+
        ret = load_config_locked(c, fname);
-       lxcunlock(c->slock);
+
+       if (need_disklock)
+               container_disk_unlock(c);
+       else
+               container_mem_unlock(c);
        return ret;
 }
 
@@ -262,7 +425,12 @@ static void lxcapi_want_daemonize(struct lxc_container *c)
 {
        if (!c)
                return;
+       if (container_mem_lock(c)) {
+               ERROR("Error getting mem lock");
+               return;
+       }
        c->daemonize = 1;
+       container_mem_unlock(c);
 }
 
 static bool lxcapi_wait(struct lxc_container *c, const char *state, int timeout)
@@ -313,15 +481,28 @@ static bool lxcapi_start(struct lxc_container *c, int useinit, char * const argv
        if (!c->lxc_conf)
                return false;
 
+       if ((ret = ongoing_create(c)) < 0) {
+               ERROR("Error checking for incomplete creation");
+               return false;
+       }
+       if (ret == 2) {
+               ERROR("Error: %s creation was not completed", c->name);
+               c->destroy(c);
+               return false;
+       } else if (ret == 1) {
+               ERROR("Error: creation of %s is ongoing", c->name);
+               return false;
+       }
+
        /* is this app meant to be run through lxcinit, as in lxc-execute? */
        if (useinit && !argv)
                return false;
 
-       if (lxclock(c->privlock, 0))
+       if (container_mem_lock(c))
                return false;
        conf = c->lxc_conf;
        daemonize = c->daemonize;
-       lxcunlock(c->privlock);
+       container_mem_unlock(c);
 
        if (useinit) {
                ret = lxc_execute(c->name, argv, 1, conf, c->config_path);
@@ -340,13 +521,22 @@ static bool lxcapi_start(struct lxc_container *c, int useinit, char * const argv
        if (daemonize) {
                if (!lxc_container_get(c))
                        return false;
+               lxc_monitord_spawn(c->config_path);
+
+               if (process_lock())
+                       return false;
                pid_t pid = fork();
                if (pid < 0) {
                        lxc_container_put(c);
+                       process_unlock();
                        return false;
                }
-               if (pid != 0)
-                       return wait_on_daemonized_start(c);
+               if (pid != 0) {
+                       ret = wait_on_daemonized_start(c);
+                       process_unlock();
+                       return ret;
+               }
+               process_unlock();
                /* second fork to be reparented by init */
                pid = fork();
                if (pid < 0) {
@@ -363,27 +553,12 @@ static bool lxcapi_start(struct lxc_container *c, int useinit, char * const argv
                close(0);
                close(1);
                close(2);
-               open("/dev/null", O_RDONLY);
+               open("/dev/zero", O_RDONLY);
                open("/dev/null", O_RDWR);
                open("/dev/null", O_RDWR);
                setsid();
        }
 
-       if (clearenv()) {
-               SYSERROR("failed to clear environment");
-               /* don't error out though */
-       }
-
-       if (putenv("container=lxc")) {
-               fprintf(stderr, "failed to set environment variable");
-               if (daemonize) {
-                       lxc_container_put(c);
-                       exit(1);
-               } else {
-                       return false;
-               }
-       }
-
 reboot:
        conf->reboot = 0;
        ret = lxc_start(c->name, argv, conf, c->config_path);
@@ -425,8 +600,10 @@ static bool lxcapi_startl(struct lxc_container *c, int useinit, ...)
                        break;
                n_inargs++;
                temp = realloc(inargs, n_inargs * sizeof(*inargs));
-               if (!temp)
+               if (!temp) {
+                       va_end(ap);
                        goto out;
+               }
                inargs = temp;
                inargs[n_inargs - 1] = strdup(arg);  // not sure if it's safe not to copy
        }
@@ -464,7 +641,7 @@ static bool lxcapi_stop(struct lxc_container *c)
        if (!c)
                return false;
 
-       ret = lxc_stop(c->name, c->config_path);
+       ret = lxc_cmd_stop(c->name, c->config_path);
 
        return ret == 0;
 }
@@ -508,19 +685,59 @@ static bool create_container_dir(struct lxc_container *c)
        return ret == 0;
 }
 
+static const char *lxcapi_get_config_path(struct lxc_container *c);
+static bool lxcapi_set_config_item(struct lxc_container *c, const char *key, const char *v);
+
+/*
+ * do_bdev_create: thin wrapper around bdev_create().  Like bdev_create(),
+ * it returns a mounted bdev on success, NULL on error.
+ */
+static struct bdev *do_bdev_create(struct lxc_container *c, const char *type,
+                        struct bdev_specs *specs)
+{
+       char *dest;
+       const char *lxcpath = lxcapi_get_config_path(c);
+       size_t len;
+       struct bdev *bdev;
+       int ret;
+
+       /* lxcpath/lxcname/rootfs */
+       len = strlen(c->name) + strlen(lxcpath) + 9;
+       dest = alloca(len);
+       ret = snprintf(dest, len, "%s/%s/rootfs", lxcpath, c->name);
+       if (ret < 0 || ret >= len)
+               return NULL;
+
+       bdev = bdev_create(dest, type, c->name, specs);
+       if (!bdev)
+               return NULL;
+       lxcapi_set_config_item(c, "lxc.rootfs", bdev->src);
+       return bdev;
+}
+
+static bool lxcapi_destroy(struct lxc_container *c);
 /*
- * backing stores not (yet) supported
- * for ->create, argv contains the arguments to pass to the template,
- * terminated by NULL.  If no arguments, you can just pass NULL.
+ * lxcapi_create:
+ * create a container with the given parameters.
+ * @c: container to be created.  It has the lxcpath, name, and a starting
+ *     configuration already set
+ * @t: the template to execute to instantiate the root filesystem and
+ *     adjust the configuration.
+ * @bdevtype: backing store type to use.  If NULL, dir will be used.
+ * @specs: additional parameters for the backing store, i.e. LVM vg to
+ *         use.
+ *
+ * @argv: the arguments to pass to the template, terminated by NULL.  If no
+ * arguments, you can just pass NULL.
  */
-static bool lxcapi_create(struct lxc_container *c, char *t, char *const argv[])
+static bool lxcapi_create(struct lxc_container *c, const char *t,
+               const char *bdevtype, struct bdev_specs *specs,
+               char *const argv[])
 {
        bool bret = false;
        pid_t pid;
-       int ret, status;
-       char *tpath = NULL;
-       int len, nargs = 0;
-       char **newargv;
+       char *tpath = NULL, **newargv;
+       int partial_fd, ret, len, nargs = 0;
 
        if (!c)
                return false;
@@ -537,22 +754,66 @@ static bool lxcapi_create(struct lxc_container *c, char *t, char *const argv[])
                goto out;
        }
 
-       if (!create_container_dir(c))
-               goto out;
-
        if (!c->save_config(c, NULL)) {
                ERROR("failed to save starting configuration for %s\n", c->name);
                goto out;
        }
 
-       /* we're going to fork.  but since we'll wait for our child, we
-          don't need to lxc_container_get */
+       /* container is already created if we have a config and rootfs.path is accessible */
+       if (lxcapi_is_defined(c) && c->lxc_conf && c->lxc_conf->rootfs.path && access(c->lxc_conf->rootfs.path, F_OK) == 0)
+               goto out;
 
-       if (lxclock(c->slock, 0)) {
-               ERROR("failed to grab global container lock for %s\n", c->name);
+       /* Mark that this container is being created */
+       if ((partial_fd = create_partial(c)) < 0)
                goto out;
+
+       /* no need to get disk lock bc we have the partial locked */
+
+       /*
+        * Create the backing store
+        * Note we can't do this in the same task as we use to execute the
+        * template because of the way zfs works.
+        * After you 'zfs create', zfs mounts the fs only in the initial
+        * namespace.
+        */
+       pid = fork();
+       if (pid < 0) {
+               SYSERROR("failed to fork task for container creation template\n");
+               goto out_unlock;
+       }
+
+       if (pid == 0) { // child
+               struct bdev *bdev = NULL;
+
+               if (!(bdev = do_bdev_create(c, bdevtype, specs))) {
+                       ERROR("Error creating backing store type %s for %s",
+                               bdevtype ? bdevtype : "(none)", c->name);
+                       exit(1);
+               }
+
+               /* save config file again to store the new rootfs location */
+               if (!c->save_config(c, NULL)) {
+                       ERROR("failed to save starting configuration for %s\n", c->name);
+                       // parent task won't see bdev in config so we delete it
+                       bdev->ops->umount(bdev);
+                       bdev->ops->destroy(bdev);
+                       exit(1);
+               }
+               exit(0);
        }
+       if (wait_for_pid(pid) != 0)
+               goto out;
+
+       /* reload config to get the rootfs */
+       if (c->lxc_conf)
+               lxc_conf_free(c->lxc_conf);
+       c->lxc_conf = NULL;
+       if (!load_config_locked(c, c->configfile))
+               goto out;
 
+       /*
+        * now execute the template
+        */
        pid = fork();
        if (pid < 0) {
                SYSERROR("failed to fork task for container creation template\n");
@@ -560,27 +821,46 @@ static bool lxcapi_create(struct lxc_container *c, char *t, char *const argv[])
        }
 
        if (pid == 0) { // child
-               char *patharg, *namearg;
+               char *patharg, *namearg, *rootfsarg, *src;
+               struct bdev *bdev = NULL;
                int i;
 
-               close(0);
-               close(1);
-               close(2);
-               open("/dev/null", O_RDONLY);
-               open("/dev/null", O_RDWR);
-               open("/dev/null", O_RDWR);
+               if (unshare(CLONE_NEWNS) < 0) {
+                       ERROR("error unsharing mounts");
+                       exit(1);
+               }
+
+               src = c->lxc_conf->rootfs.path;
+               /*
+                * for an overlayfs create, what the user wants is the template to fill
+                * in what will become the readonly lower layer.  So don't mount for
+                * the template
+                */
+               if (strncmp(src, "overlayfs:", 10) == 0) {
+                       src = overlayfs_getlower(src+10);
+               }
+               bdev = bdev_init(src, c->lxc_conf->rootfs.mount, NULL);
+               if (!bdev) {
+                       ERROR("Error opening rootfs");
+                       exit(1);
+               }
+
+               if (bdev->ops->mount(bdev) < 0) {
+                       ERROR("Error mounting rootfs");
+                       exit(1);
+               }
 
                /*
                 * create our new array, pre-pend the template name and
                 * base args
                 */
                if (argv)
-                       for (; argv[nargs]; nargs++) ;
-               nargs += 3;  // template, path and name args
+                       for (nargs = 0; argv[nargs]; nargs++) ;
+               nargs += 4;  // template, path, rootfs and name args
                newargv = malloc(nargs * sizeof(*newargv));
                if (!newargv)
                        exit(1);
-               newargv[0] = t;
+               newargv[0] = (char *)t;
 
                len = strlen(c->config_path) + strlen(c->name) + strlen("--path=") + 2;
                patharg = malloc(len);
@@ -599,10 +879,19 @@ static bool lxcapi_create(struct lxc_container *c, char *t, char *const argv[])
                        exit(1);
                newargv[2] = namearg;
 
+               len = strlen("--rootfs=") + 1 + strlen(bdev->dest);
+               rootfsarg = malloc(len);
+               if (!rootfsarg)
+                       exit(1);
+               ret = snprintf(rootfsarg, len, "--rootfs=%s", bdev->dest);
+               if (ret < 0 || ret >= len)
+                       exit(1);
+               newargv[3] = rootfsarg;
+
                /* add passed-in args */
                if (argv)
-                       for (i = 3; i < nargs; i++)
-                               newargv[i] = argv[i-3];
+                       for (i = 4; i < nargs; i++)
+                               newargv[i] = argv[i-4];
 
                /* add trailing NULL */
                nargs++;
@@ -612,31 +901,13 @@ static bool lxcapi_create(struct lxc_container *c, char *t, char *const argv[])
                newargv[nargs - 1] = NULL;
 
                /* execute */
-               ret = execv(tpath, newargv);
+               execv(tpath, newargv);
                SYSERROR("failed to execute template %s", tpath);
                exit(1);
        }
 
-again:
-       ret = waitpid(pid, &status, 0);
-       if (ret == -1) {
-               if (errno == -EINTR)
-                       goto again;
-               SYSERROR("waitpid failed");
-               goto out_unlock;
-       }
-       if (ret != pid)
-               goto again;
-       if (!WIFEXITED(status))  { // did not exit normally
-               // we could set an error code and string inside the
-               // container_struct here if we like
-               ERROR("container creation template exited abnormally\n");
-               goto out_unlock;
-       }
-
-       if (WEXITSTATUS(status) != 0) {
-               ERROR("container creation template for %s exited with %d\n",
-                     c->name, WEXITSTATUS(status));
+       if (wait_for_pid(pid) != 0) {
+               ERROR("container creation template for %s failed\n", c->name);
                goto out_unlock;
        }
 
@@ -648,13 +919,33 @@ again:
        bret = load_config_locked(c, c->configfile);
 
 out_unlock:
-       lxcunlock(c->slock);
+       if (partial_fd >= 0)
+               remove_partial(c, partial_fd);
 out:
        if (tpath)
                free(tpath);
+       if (!bret && c)
+               lxcapi_destroy(c);
        return bret;
 }
 
+static bool lxcapi_reboot(struct lxc_container *c)
+{
+       pid_t pid;
+
+       if (!c)
+               return false;
+       if (!c->is_running(c))
+               return false;
+       pid = c->init_pid(c);
+       if (pid <= 0)
+               return false;
+       if (kill(pid, SIGINT) < 0)
+               return false;
+       return true;
+
+}
+
 static bool lxcapi_shutdown(struct lxc_container *c, int timeout)
 {
        bool retv;
@@ -679,7 +970,8 @@ static bool lxcapi_shutdown(struct lxc_container *c, int timeout)
        return retv;
 }
 
-static bool lxcapi_createl(struct lxc_container *c, char *t, ...)
+static bool lxcapi_createl(struct lxc_container *c, const char *t,
+               const char *bdevtype, struct bdev_specs *specs, ...)
 {
        bool bret = false;
        char **args = NULL, **temp;
@@ -693,7 +985,7 @@ static bool lxcapi_createl(struct lxc_container *c, char *t, ...)
         * since we're going to wait for create to finish, I don't think we
         * need to get a copy of the arguments.
         */
-       va_start(ap, t);
+       va_start(ap, specs);
        while (1) {
                char *arg;
                arg = va_arg(ap, char *);
@@ -701,15 +993,18 @@ static bool lxcapi_createl(struct lxc_container *c, char *t, ...)
                        break;
                nargs++;
                temp = realloc(args, (nargs+1) * sizeof(*args));
-               if (!temp)
+               if (!temp) {
+                       va_end(ap);
                        goto out;
+               }
                args = temp;
                args[nargs - 1] = arg;
        }
        va_end(ap);
-       args[nargs] = NULL;
+       if (args)
+               args[nargs] = NULL;
 
-       bret = c->create(c, t, args);
+       bret = c->create(c, t, bdevtype, specs, args);
 
 out:
        if (args)
@@ -723,25 +1018,135 @@ static bool lxcapi_clear_config_item(struct lxc_container *c, const char *key)
 
        if (!c || !c->lxc_conf)
                return false;
-       if (lxclock(c->privlock, 0)) {
+       if (container_mem_lock(c))
                return false;
-       }
        ret = lxc_clear_config_item(c->lxc_conf, key);
-       lxcunlock(c->privlock);
+       container_mem_unlock(c);
        return ret == 0;
 }
 
+char** lxcapi_get_ips(struct lxc_container *c, char* interface, char* family, int scope)
+{
+       int count = 0;
+       struct ifaddrs *interfaceArray = NULL, *tempIfAddr = NULL;
+       char addressOutputBuffer[INET6_ADDRSTRLEN];
+       void *tempAddrPtr = NULL;
+       char **addresses = NULL, **temp;
+       char *address = NULL;
+       char new_netns_path[MAXPATHLEN];
+       int old_netns = -1, new_netns = -1, ret = 0;
+
+       if (!c->is_running(c))
+               goto out;
+
+       /* Save reference to old netns */
+       old_netns = open("/proc/self/ns/net", O_RDONLY);
+       if (old_netns < 0) {
+               SYSERROR("failed to open /proc/self/ns/net");
+               goto out;
+       }
+
+       /* Switch to new netns */
+       ret = snprintf(new_netns_path, MAXPATHLEN, "/proc/%d/ns/net", c->init_pid(c));
+       if (ret < 0 || ret >= MAXPATHLEN)
+               goto out;
+
+       new_netns = open(new_netns_path, O_RDONLY);
+       if (new_netns < 0) {
+               SYSERROR("failed to open %s", new_netns_path);
+               goto out;
+       }
+
+       if (setns(new_netns, CLONE_NEWNET)) {
+               SYSERROR("failed to setns");
+               goto out;
+       }
+
+       /* Grab the list of interfaces */
+       if (getifaddrs(&interfaceArray)) {
+               SYSERROR("failed to get interfaces list");
+               goto out;
+       }
+
+       /* Iterate through the interfaces */
+       for (tempIfAddr = interfaceArray; tempIfAddr != NULL; tempIfAddr = tempIfAddr->ifa_next) {
+               if(tempIfAddr->ifa_addr->sa_family == AF_INET) {
+                       if (family && strcmp(family, "inet"))
+                               continue;
+                       tempAddrPtr = &((struct sockaddr_in *)tempIfAddr->ifa_addr)->sin_addr;
+               }
+               else {
+                       if (family && strcmp(family, "inet6"))
+                               continue;
+
+                       if (((struct sockaddr_in6 *)tempIfAddr->ifa_addr)->sin6_scope_id != scope)
+                               continue;
+
+                       tempAddrPtr = &((struct sockaddr_in6 *)tempIfAddr->ifa_addr)->sin6_addr;
+               }
+
+               if (interface && strcmp(interface, tempIfAddr->ifa_name))
+                       continue;
+               else if (!interface && strcmp("lo", tempIfAddr->ifa_name) == 0)
+                       continue;
+
+               address = (char *)inet_ntop(tempIfAddr->ifa_addr->sa_family,
+                                          tempAddrPtr,
+                                          addressOutputBuffer,
+                                          sizeof(addressOutputBuffer));
+               if (!address)
+                       continue;
+
+               count += 1;
+               temp = realloc(addresses, count * sizeof(*addresses));
+               if (!temp) {
+                       count--;
+                       goto out;
+               }
+               addresses = temp;
+               addresses[count - 1] = strdup(address);
+       }
+
+out:
+       if(interfaceArray)
+               freeifaddrs(interfaceArray);
+
+       /* Switch back to original netns */
+       if (old_netns >= 0 && setns(old_netns, CLONE_NEWNET))
+               SYSERROR("failed to setns");
+       if (new_netns >= 0)
+               close(new_netns);
+       if (old_netns >= 0)
+               close(old_netns);
+
+       /* Append NULL to the array */
+       if (count) {
+               count++;
+               temp = realloc(addresses, count * sizeof(*addresses));
+               if (!temp) {
+                       int i;
+                       for (i = 0; i < count-1; i++)
+                               free(addresses[i]);
+                       free(addresses);
+                       return NULL;
+               }
+               addresses = temp;
+               addresses[count - 1] = NULL;
+       }
+
+       return addresses;
+}
+
 static int lxcapi_get_config_item(struct lxc_container *c, const char *key, char *retv, int inlen)
 {
        int ret;
 
        if (!c || !c->lxc_conf)
                return -1;
-       if (lxclock(c->privlock, 0)) {
+       if (container_mem_lock(c))
                return -1;
-       }
        ret = lxc_get_config_item(c->lxc_conf, key, retv, inlen);
-       lxcunlock(c->privlock);
+       container_mem_unlock(c);
        return ret;
 }
 
@@ -756,77 +1161,107 @@ static int lxcapi_get_keys(struct lxc_container *c, const char *key, char *retv,
         */
        if (!c || !c->lxc_conf)
                return -1;
-       if (lxclock(c->privlock, 0))
+       if (container_mem_lock(c))
                return -1;
        int ret = -1;
        if (strncmp(key, "lxc.network.", 12) == 0)
                ret =  lxc_list_nicconfigs(c->lxc_conf, key, retv, inlen);
-       lxcunlock(c->privlock);
+       container_mem_unlock(c);
        return ret;
 }
 
-
-/* default config file - should probably come through autoconf */
-#define LXC_DEFAULT_CONFIG "/etc/lxc/default.conf"
 static bool lxcapi_save_config(struct lxc_container *c, const char *alt_file)
 {
+       FILE *fout;
+       bool ret = false, need_disklock = false;
+       int lret;
+
        if (!alt_file)
                alt_file = c->configfile;
        if (!alt_file)
                return false;  // should we write to stdout if no file is specified?
-       if (!c->lxc_conf)
+
+       // If we haven't yet loaded a config, load the stock config
+       if (!c->lxc_conf) {
                if (!c->load_config(c, LXC_DEFAULT_CONFIG)) {
                        ERROR("Error loading default configuration file %s while saving %s\n", LXC_DEFAULT_CONFIG, c->name);
                        return false;
                }
+       }
 
-       FILE *fout = fopen(alt_file, "w");
-       if (!fout)
+       if (!create_container_dir(c))
                return false;
-       if (lxclock(c->privlock, 0)) {
-               fclose(fout);
+
+       /*
+        * If we're writing to the container's config file, take the
+        * disk lock.  Otherwise just take the memlock to protect the
+        * struct lxc_container while we're traversing it.
+        */
+       if (strcmp(c->configfile, alt_file) == 0)
+               need_disklock = true;
+
+       if (need_disklock)
+               lret = container_disk_lock(c);
+       else
+               lret = container_mem_lock(c);
+
+       if (lret)
                return false;
-       }
+
+       fout = fopen(alt_file, "w");
+       if (!fout)
+               goto out;
        write_config(fout, c->lxc_conf);
        fclose(fout);
-       lxcunlock(c->privlock);
-       return true;
+       ret = true;
+
+out:
+       if (need_disklock)
+               container_disk_unlock(c);
+       else
+               container_mem_unlock(c);
+       return ret;
 }
 
+// do we want the api to support --force, or leave that to the caller?
 static bool lxcapi_destroy(struct lxc_container *c)
 {
-       pid_t pid;
-       int ret, status;
+       struct bdev *r = NULL;
+       bool ret = false;
 
-       if (!c)
+       if (!c || !lxcapi_is_defined(c))
                return false;
 
-       pid = fork();
-       if (pid < 0)
+       if (container_disk_lock(c))
                return false;
-       if (pid == 0) { // child
-               ret = execlp("lxc-destroy", "lxc-destroy", "-n", c->name, NULL);
-               perror("execl");
-               exit(1);
+
+       if (!is_stopped(c)) {
+               // we should queue some sort of error - in c->error_string?
+               ERROR("container %s is not stopped", c->name);
+               goto out;
        }
 
-again:
-       ret = waitpid(pid, &status, 0);
-       if (ret == -1) {
-               if (errno == -EINTR)
-                       goto again;
-               perror("waitpid");
-               return false;
+       if (c->lxc_conf && c->lxc_conf->rootfs.path && c->lxc_conf->rootfs.mount)
+               r = bdev_init(c->lxc_conf->rootfs.path, c->lxc_conf->rootfs.mount, NULL);
+       if (r) {
+               if (r->ops->destroy(r) < 0) {
+                       ERROR("Error destroying rootfs for %s", c->name);
+                       goto out;
+               }
        }
-       if (ret != pid)
-               goto again;
-       if (!WIFEXITED(status))  { // did not exit normally
-               // we could set an error code and string inside the
-               // container_struct here if we like
-               return false;
+
+       const char *p1 = lxcapi_get_config_path(c);
+       char *path = alloca(strlen(p1) + strlen(c->name) + 2);
+       sprintf(path, "%s/%s", p1, c->name);
+       if (lxc_rmdir_onedev(path) < 0) {
+               ERROR("Error destroying container directory for %s", c->name);
+               goto out;
        }
+       ret = true;
 
-       return WEXITSTATUS(status) == 0;
+out:
+       container_disk_unlock(c);
+       return ret;
 }
 
 static bool lxcapi_set_config_item(struct lxc_container *c, const char *key, const char *v)
@@ -838,7 +1273,7 @@ static bool lxcapi_set_config_item(struct lxc_container *c, const char *key, con
        if (!c)
                return false;
 
-       if (lxclock(c->privlock, 0))
+       if (container_mem_lock(c))
                return false;
 
        if (!c->lxc_conf)
@@ -853,7 +1288,7 @@ static bool lxcapi_set_config_item(struct lxc_container *c, const char *key, con
                b = true;
 
 err:
-       lxcunlock(c->privlock);
+       container_mem_unlock(c);
        return b;
 }
 
@@ -914,7 +1349,7 @@ static bool lxcapi_set_config_path(struct lxc_container *c, const char *path)
        if (!c)
                return b;
 
-       if (lxclock(c->privlock, 0))
+       if (container_mem_lock(c))
                return b;
 
        p = strdup(path);
@@ -940,7 +1375,7 @@ static bool lxcapi_set_config_path(struct lxc_container *c, const char *path)
 err:
        if (oldpath)
                free(oldpath);
-       lxcunlock(c->privlock);
+       container_mem_unlock(c);
        return b;
 }
 
@@ -948,42 +1383,38 @@ err:
 static bool lxcapi_set_cgroup_item(struct lxc_container *c, const char *subsys, const char *value)
 {
        int ret;
-       bool b = false;
 
        if (!c)
                return false;
 
-       if (lxclock(c->privlock, 0))
+       if (is_stopped(c))
                return false;
 
-       if (is_stopped_nolock(c))
-               goto err;
+       if (container_disk_lock(c))
+               return false;
 
-       ret = lxc_cgroup_set(c->name, subsys, value);
-       if (!ret)
-               b = true;
-err:
-       lxcunlock(c->privlock);
-       return b;
+       ret = lxc_cgroup_set(c->name, subsys, value, c->config_path);
+
+       container_disk_unlock(c);
+       return ret == 0;
 }
 
 static int lxcapi_get_cgroup_item(struct lxc_container *c, const char *subsys, char *retv, int inlen)
 {
-       int ret = -1;
+       int ret;
 
        if (!c || !c->lxc_conf)
                return -1;
 
-       if (lxclock(c->privlock, 0))
+       if (is_stopped(c))
                return -1;
 
-       if (is_stopped_nolock(c))
-               goto out;
+       if (container_disk_lock(c))
+               return -1;
 
-       ret = lxc_cgroup_get(c->name, subsys, retv, inlen);
+       ret = lxc_cgroup_get(c->name, subsys, retv, inlen, c->config_path);
 
-out:
-       lxcunlock(c->privlock);
+       container_disk_unlock(c);
        return ret;
 }
 
@@ -992,6 +1423,560 @@ const char *lxc_get_default_config_path(void)
        return default_lxc_path();
 }
 
+const char *lxc_get_default_lvm_vg(void)
+{
+       return default_lvm_vg();
+}
+
+const char *lxc_get_default_zfs_root(void)
+{
+       return default_zfs_root();
+}
+
+const char *lxc_get_version(void)
+{
+       return lxc_version();
+}
+
+static int copy_file(char *old, char *new)
+{
+       int in, out;
+       ssize_t len, ret;
+       char buf[8096];
+       struct stat sbuf;
+
+       if (file_exists(new)) {
+               ERROR("copy destination %s exists", new);
+               return -1;
+       }
+       ret = stat(old, &sbuf);
+       if (ret < 0) {
+               SYSERROR("stat'ing %s", old);
+               return -1;
+       }
+
+       in = open(old, O_RDONLY);
+       if (in < 0) {
+               SYSERROR("opening original file %s", old);
+               return -1;
+       }
+       out = open(new, O_CREAT | O_EXCL | O_WRONLY, 0644);
+       if (out < 0) {
+               SYSERROR("opening new file %s", new);
+               close(in);
+               return -1;
+       }
+
+       while (1) {
+               len = read(in, buf, 8096);
+               if (len < 0) {
+                       SYSERROR("reading old file %s", old);
+                       goto err;
+               }
+               if (len == 0)
+                       break;
+               ret = write(out, buf, len);
+               if (ret < len) {  // should we retry?
+                       SYSERROR("write to new file %s was interrupted", new);
+                       goto err;
+               }
+       }
+       close(in);
+       close(out);
+
+       // we set mode, but not owner/group
+       ret = chmod(new, sbuf.st_mode);
+       if (ret) {
+               SYSERROR("setting mode on %s", new);
+               return -1;
+       }
+
+       return 0;
+
+err:
+       close(in);
+       close(out);
+       return -1;
+}
+
+/*
+ * we're being passed result of two strstrs(x, y).  We want to write
+ * all data up to the first found string, or to end of the string if
+ * neither string was found.
+ * This function will return the earliest found string if any, or else
+ * NULL
+ */
+static const char *lowest_nonnull(const char *p1, const char *p2)
+{
+       if (!p1)
+               return p2;
+       if (!p2)
+               return p1;
+       return p1 < p2 ? p1 : p2;
+}
+
+static int is_word_sep(char c)
+{
+       switch(c) {
+       case '\0':
+       case '\n':
+       case '\r':
+       case '\t':
+       case ' ':
+       case '=':
+       case '.':
+       case ',':
+       case '_':
+       case '-':
+       case '/':
+               return 1;
+       default: return 0;
+       }
+}
+
+static const char *find_first_wholeword(const char *p0, const char *word)
+{
+       const char *p = p0;
+
+       if (!p)
+               return NULL;
+
+       while ((p = strstr(p, word)) != NULL) {
+               if ((p == p0 || is_word_sep(*(p-1))) &&
+                   is_word_sep(p[strlen(word)]))
+                       return p;
+               p++;
+       }
+       return NULL;
+}
+
+static int update_name_and_paths(const char *path, struct lxc_container *oldc,
+               const char *newname, const char *newpath)
+{
+       FILE *f;
+       long flen;
+       char *contents;
+       const char *p0, *p1, *p2, *end;
+       const char *oldpath = oldc->get_config_path(oldc);
+       const char *oldname = oldc->name;
+
+       f = fopen(path, "r");
+       if (!f) {
+               SYSERROR("opening old config");
+               return -1;
+       }
+       if (fseek(f, 0, SEEK_END) < 0) {
+               SYSERROR("seeking to end of old config");
+               fclose(f);
+               return -1;
+       }
+       flen = ftell(f);
+       if (flen < 0) {
+               fclose(f);
+               SYSERROR("telling size of old config");
+               return -1;
+       }
+       if (fseek(f, 0, SEEK_SET) < 0) {
+               fclose(f);
+               SYSERROR("rewinding old config");
+               return -1;
+       }
+       contents = malloc(flen+1);
+       if (!contents) {
+               SYSERROR("out of memory");
+               fclose(f);
+               return -1;
+       }
+       if (fread(contents, 1, flen, f) != flen) {
+               free(contents);
+               fclose(f);
+               SYSERROR("reading old config");
+               return -1;
+       }
+       contents[flen] = '\0';
+       if (fclose(f) < 0) {
+               free(contents);
+               SYSERROR("closing old config");
+               return -1;
+       }
+
+       f = fopen(path, "w");
+       if (!f) {
+               SYSERROR("reopening config");
+               free(contents);
+               return -1;
+       }
+
+       p0 = contents;
+       end = contents + flen;
+       while (1) {
+               p1 = find_first_wholeword(p0, oldpath);
+               p2 = find_first_wholeword(p0, oldname);
+               if (!p1 && !p2) {
+                       // write the rest and be done
+                       if (fwrite(p0, 1, (end-p0), f) != (end-p0)) {
+                               SYSERROR("writing new config");
+                               free(contents);
+                               fclose(f);
+                               return -1;
+                       }
+                       free(contents);
+                       fclose(f);
+                       // success
+                       return 0;
+               } else {
+                       const char *p = lowest_nonnull(p1, p2);
+                       const char *new = (p == p2) ? newname : newpath;
+                       if (fwrite(p0, 1, (p-p0), f) != (p-p0)) {
+                               SYSERROR("writing new config");
+                               free(contents);
+                               fclose(f);
+                               return -1;
+                       }
+                       p0 = p;
+                       // now write the newpath or newname
+                       if (fwrite(new, 1, strlen(new), f) != strlen(new)) {
+                               SYSERROR("writing new name or path in new config");
+                               free(contents);
+                               fclose(f);
+                               return -1;
+                       }
+                       p0 += (p == p2) ? strlen(oldname) : strlen(oldpath);
+               }
+       }
+}
+
+static int copyhooks(struct lxc_container *oldc, struct lxc_container *c)
+{
+       int i;
+       int ret;
+       struct lxc_list *it;
+
+       for (i=0; i<NUM_LXC_HOOKS; i++) {
+               lxc_list_for_each(it, &c->lxc_conf->hooks[i]) {
+                       char *hookname = it->elem;
+                       char *fname = rindex(hookname, '/');
+                       char tmppath[MAXPATHLEN];
+                       if (!fname) // relative path - we don't support, but maybe we should
+                               return 0;
+                       // copy the script, and change the entry in confile
+                       ret = snprintf(tmppath, MAXPATHLEN, "%s/%s/%s",
+                                       c->config_path, c->name, fname+1);
+                       if (ret < 0 || ret >= MAXPATHLEN)
+                               return -1;
+                       ret = copy_file(it->elem, tmppath);
+                       if (ret < 0)
+                               return -1;
+                       free(it->elem);
+                       it->elem = strdup(tmppath);
+                       if (!it->elem) {
+                               ERROR("out of memory copying hook path");
+                               return -1;
+                       }
+                       update_name_and_paths(it->elem, oldc, c->name, c->get_config_path(c));
+               }
+       }
+
+       c->save_config(c, NULL);
+       return 0;
+}
+
+static void new_hwaddr(char *hwaddr)
+{
+       FILE *f = fopen("/dev/urandom", "r");
+       if (f) {
+               unsigned int seed;
+               int ret = fread(&seed, sizeof(seed), 1, f);
+               if (ret != 1)
+                       seed = time(NULL);
+               fclose(f);
+               srand(seed);
+       } else
+               srand(time(NULL));
+       snprintf(hwaddr, 18, "00:16:3e:%02x:%02x:%02x",
+                       rand() % 255, rand() % 255, rand() % 255);
+}
+
+static void network_new_hwaddrs(struct lxc_container *c)
+{
+       struct lxc_list *it;
+
+       lxc_list_for_each(it, &c->lxc_conf->network) {
+               struct lxc_netdev *n = it->elem;
+               if (n->hwaddr)
+                       new_hwaddr(n->hwaddr);
+       }
+}
+
+static int copy_fstab(struct lxc_container *oldc, struct lxc_container *c)
+{
+       char newpath[MAXPATHLEN];
+       char *oldpath = oldc->lxc_conf->fstab;
+       int ret;
+
+       if (!oldpath)
+               return 0;
+
+       char *p = rindex(oldpath, '/');
+       if (!p)
+               return -1;
+       ret = snprintf(newpath, MAXPATHLEN, "%s/%s%s",
+                       c->config_path, c->name, p);
+       if (ret < 0 || ret >= MAXPATHLEN) {
+               ERROR("error printing new path for %s", oldpath);
+               return -1;
+       }
+       if (file_exists(newpath)) {
+               ERROR("error: fstab file %s exists", newpath);
+               return -1;
+       }
+
+       if (copy_file(oldpath, newpath) < 0) {
+               ERROR("error: copying %s to %s", oldpath, newpath);
+               return -1;
+       }
+       free(c->lxc_conf->fstab);
+       c->lxc_conf->fstab = strdup(newpath);
+       if (!c->lxc_conf->fstab) {
+               ERROR("error: allocating pathname");
+               return -1;
+       }
+
+       return 0;
+}
+
+static int copy_storage(struct lxc_container *c0, struct lxc_container *c,
+               const char *newtype, int flags, const char *bdevdata, unsigned long newsize)
+{
+       struct bdev *bdev;
+
+       bdev = bdev_copy(c0->lxc_conf->rootfs.path, c0->name, c->name,
+                       c0->config_path, c->config_path, newtype, !!(flags & LXC_CLONE_SNAPSHOT),
+                       bdevdata, newsize);
+       if (!bdev) {
+               ERROR("error copying storage");
+               return -1;
+       }
+       free(c->lxc_conf->rootfs.path);
+       c->lxc_conf->rootfs.path = strdup(bdev->src);
+       bdev_put(bdev);
+       if (!c->lxc_conf->rootfs.path)
+               return -1;
+       // here we could also update all lxc.mount.entries or even
+       // items in the lxc.mount fstab list.  As discussed on m-l,
+       // we could do either any source paths starting with the
+       // lxcpath/oldname, or simply anythign which is not a virtual
+       // fs or a bind mount.
+       return 0;
+}
+
+static int clone_update_rootfs(struct lxc_container *c, int flags, char **hookargs)
+{
+       int ret = -1;
+       char path[MAXPATHLEN];
+       struct bdev *bdev;
+       FILE *fout;
+       pid_t pid;
+       struct lxc_conf *conf = c->lxc_conf;
+
+       /* update hostname in rootfs */
+       /* we're going to mount, so run in a clean namespace to simplify cleanup */
+
+       pid = fork();
+       if (pid < 0)
+               return -1;
+       if (pid > 0)
+               return wait_for_pid(pid);
+
+       if (unshare(CLONE_NEWNS) < 0) {
+               ERROR("error unsharing mounts");
+               exit(1);
+       }
+       bdev = bdev_init(c->lxc_conf->rootfs.path, c->lxc_conf->rootfs.mount, NULL);
+       if (!bdev)
+               exit(1);
+       if (bdev->ops->mount(bdev) < 0)
+               exit(1);
+
+       if (!lxc_list_empty(&conf->hooks[LXCHOOK_CLONE])) {
+               /* Start of environment variable setup for hooks */
+               if (setenv("LXC_NAME", c->name, 1)) {
+                       SYSERROR("failed to set environment variable for container name");
+               }
+               if (setenv("LXC_CONFIG_FILE", conf->rcfile, 1)) {
+                       SYSERROR("failed to set environment variable for config path");
+               }
+               if (setenv("LXC_ROOTFS_MOUNT", conf->rootfs.mount, 1)) {
+                       SYSERROR("failed to set environment variable for rootfs mount");
+               }
+               if (setenv("LXC_ROOTFS_PATH", conf->rootfs.path, 1)) {
+                       SYSERROR("failed to set environment variable for rootfs mount");
+               }
+
+               if (run_lxc_hooks(c->name, "clone", conf, hookargs)) {
+                       ERROR("Error executing clone hook for %s", c->name);
+                       exit(1);
+               }
+       }
+
+       if (!(flags & LXC_CLONE_KEEPNAME)) {
+               ret = snprintf(path, MAXPATHLEN, "%s/etc/hostname", bdev->dest);
+               if (ret < 0 || ret >= MAXPATHLEN)
+                       exit(1);
+               if (!(fout = fopen(path, "w"))) {
+                       SYSERROR("unable to open %s: ignoring\n", path);
+                       exit(0);
+               }
+               if (fprintf(fout, "%s", c->name) < 0)
+                       exit(1);
+               if (fclose(fout) < 0)
+                       exit(1);
+       }
+       exit(0);
+}
+
+/*
+ * We want to support:
+sudo lxc-clone -o o1 -n n1 -s -L|-fssize fssize -v|--vgname vgname \
+        -p|--lvprefix lvprefix -t|--fstype fstype  -B backingstore
+
+-s [ implies overlayfs]
+-s -B overlayfs
+-s -B aufs
+
+only rootfs gets converted (copied/snapshotted) on clone.
+*/
+
+static int create_file_dirname(char *path)
+{
+       char *p = rindex(path, '/');
+       int ret;
+
+       if (!p)
+               return -1;
+       *p = '\0';
+       ret = mkdir(path, 0755);
+       if (ret && errno != EEXIST)
+               SYSERROR("creating container path %s\n", path);
+       *p = '/';
+       return ret;
+}
+
+struct lxc_container *lxcapi_clone(struct lxc_container *c, const char *newname,
+               const char *lxcpath, int flags,
+               const char *bdevtype, const char *bdevdata, unsigned long newsize,
+               char **hookargs)
+{
+       struct lxc_container *c2 = NULL;
+       char newpath[MAXPATHLEN];
+       int ret, storage_copied = 0;
+       const char *n, *l;
+       FILE *fout;
+
+       if (!c || !c->is_defined(c))
+               return NULL;
+
+       if (container_mem_lock(c))
+               return NULL;
+
+       if (!is_stopped(c)) {
+               ERROR("error: Original container (%s) is running", c->name);
+               goto out;
+       }
+
+       // Make sure the container doesn't yet exist.
+       n = newname ? newname : c->name;
+       l = lxcpath ? lxcpath : c->get_config_path(c);
+       ret = snprintf(newpath, MAXPATHLEN, "%s/%s/config", l, n);
+       if (ret < 0  || ret >= MAXPATHLEN) {
+               SYSERROR("clone: failed making config pathname");
+               goto out;
+       }
+       if (file_exists(newpath)) {
+               ERROR("error: clone: %s exists", newpath);
+               goto out;
+       }
+
+       if (create_file_dirname(newpath) < 0) {
+               ERROR("Error creating container dir for %s", newpath);
+               goto out;
+       }
+
+       // copy the configuration, tweak it as needed,
+       fout = fopen(newpath, "w");
+       if (!fout) {
+               SYSERROR("open %s", newpath);
+               goto out;
+       }
+       write_config(fout, c->lxc_conf);
+       fclose(fout);
+
+       if (update_name_and_paths(newpath, c, n, l) < 0) {
+               ERROR("Error updating name in cloned config");
+               goto out;
+       }
+
+       sprintf(newpath, "%s/%s/rootfs", l, n);
+       if (mkdir(newpath, 0755) < 0) {
+               SYSERROR("error creating %s", newpath);
+               goto out;
+       }
+
+       c2 = lxc_container_new(n, l);
+       if (!c2) {
+               ERROR("clone: failed to create new container (%s %s)", n, l);
+               goto out;
+       }
+
+       // copy hooks if requested
+       if (flags & LXC_CLONE_COPYHOOKS) {
+               ret = copyhooks(c, c2);
+               if (ret < 0) {
+                       ERROR("error copying hooks");
+                       goto out;
+               }
+       }
+
+       if (copy_fstab(c, c2) < 0) {
+               ERROR("error copying fstab");
+               goto out;
+       }
+
+       // update macaddrs
+       if (!(flags & LXC_CLONE_KEEPMACADDR))
+               network_new_hwaddrs(c2);
+
+       // copy/snapshot rootfs's
+       ret = copy_storage(c, c2, bdevtype, flags, bdevdata, newsize);
+       if (ret < 0)
+               goto out;
+
+       // We've now successfully created c2's storage, so clear it out if we
+       // fail after this
+       storage_copied = 1;
+
+       if (!c2->save_config(c2, NULL))
+               goto out;
+
+       if (clone_update_rootfs(c2, flags, hookargs) < 0)
+               goto out;
+
+       // TODO: update c's lxc.snapshot = count
+       container_mem_unlock(c);
+       return c2;
+
+out:
+       container_mem_unlock(c);
+       if (c2) {
+               if (!storage_copied)
+                       c2->lxc_conf->rootfs.path = NULL;
+               c2->destroy(c2);
+               lxc_container_put(c2);
+       }
+
+       return NULL;
+}
+
 struct lxc_container *lxc_container_new(const char *name, const char *configpath)
 {
        struct lxc_container *c;
@@ -1021,14 +2006,12 @@ struct lxc_container *lxc_container_new(const char *name, const char *configpath
        strcpy(c->name, name);
 
        c->numthreads = 1;
-       c->slock = lxc_newlock(name);
-       if (!c->slock) {
+       if (!(c->slock = lxc_newlock(c->config_path, name))) {
                fprintf(stderr, "failed to create lock\n");
                goto err;
        }
 
-       c->privlock = lxc_newlock(NULL);
-       if (!c->privlock) {
+       if (!(c->privlock = lxc_newlock(NULL, NULL))) {
                fprintf(stderr, "failed to alloc privlock\n");
                goto err;
        }
@@ -1041,12 +2024,21 @@ struct lxc_container *lxc_container_new(const char *name, const char *configpath
        if (file_exists(c->configfile))
                lxcapi_load_config(c, NULL);
 
+       if (ongoing_create(c) == 2) {
+               ERROR("Error: %s creation was not completed", c->name);
+               lxcapi_destroy(c);
+               lxc_conf_free(c->lxc_conf);
+               c->lxc_conf = NULL;
+       }
+
        // assign the member functions
        c->is_defined = lxcapi_is_defined;
        c->state = lxcapi_state;
        c->is_running = lxcapi_is_running;
        c->freeze = lxcapi_freeze;
        c->unfreeze = lxcapi_unfreeze;
+       c->console = lxcapi_console;
+       c->console_getfd = lxcapi_console_getfd;
        c->init_pid = lxcapi_init_pid;
        c->load_config = lxcapi_load_config;
        c->want_daemonize = lxcapi_want_daemonize;
@@ -1062,15 +2054,18 @@ struct lxc_container *lxc_container_new(const char *name, const char *configpath
        c->create = lxcapi_create;
        c->createl = lxcapi_createl;
        c->shutdown = lxcapi_shutdown;
+       c->reboot = lxcapi_reboot;
        c->clear_config_item = lxcapi_clear_config_item;
        c->get_config_item = lxcapi_get_config_item;
        c->get_cgroup_item = lxcapi_get_cgroup_item;
        c->set_cgroup_item = lxcapi_set_cgroup_item;
        c->get_config_path = lxcapi_get_config_path;
        c->set_config_path = lxcapi_set_config_path;
+       c->clone = lxcapi_clone;
+       c->get_ips = lxcapi_get_ips;
 
        /* we'll allow the caller to update these later */
-       if (lxc_log_init(NULL, "none", NULL, "lxc_container", 0)) {
+       if (lxc_log_init(NULL, "none", NULL, "lxc_container", 0, c->config_path)) {
                fprintf(stderr, "failed to open log\n");
                goto err;
        }