]> git.proxmox.com Git - mirror_lxc.git/blobdiff - src/lxc/lxccontainer.c
Add support for checkpoint and restore via CRIU
[mirror_lxc.git] / src / lxc / lxccontainer.c
index aad692f3fdd9f544a183daa00c57cab8cfe78b21..ed6f8de977e84c075b770f7438bf793c4961cabe 100644 (file)
@@ -34,6 +34,8 @@
 #include <arpa/inet.h>
 #include <libgen.h>
 #include <stdint.h>
+#include <grp.h>
+#include <sys/syscall.h>
 
 #include <lxc/lxccontainer.h>
 #include <lxc/version.h>
@@ -53,6 +55,7 @@
 #include "monitor.h"
 #include "namespace.h"
 #include "lxclock.h"
+#include "sync.h"
 
 #if HAVE_IFADDRS_H
 #include <ifaddrs.h>
 
 #define NOT_SUPPORTED_ERROR "the requested function %s is not currently supported with unprivileged containers"
 
-lxc_log_define(lxc_container, lxc);
-
-static bool file_exists(const char *f)
+/* Define faccessat() if missing from the C library */
+#ifndef HAVE_FACCESSAT
+static int faccessat(int __fd, const char *__file, int __type, int __flag)
 {
-       struct stat statbuf;
-
-       return stat(f, &statbuf) == 0;
+#ifdef __NR_faccessat
+return syscall(__NR_faccessat, __fd, __file, __type, __flag);
+#else
+errno = ENOSYS;
+return -1;
+#endif
 }
+#endif
+
+
+lxc_log_define(lxc_container, lxc);
 
 static bool config_file_exists(const char *lxcpath, const char *cname)
 {
@@ -401,9 +411,13 @@ static bool load_config_locked(struct lxc_container *c, const char *fname)
 {
        if (!c->lxc_conf)
                c->lxc_conf = lxc_conf_init();
-       if (c->lxc_conf && !lxc_config_read(fname, c->lxc_conf))
-               return true;
-       return false;
+       if (!c->lxc_conf)
+               return false;
+       if (lxc_config_read(fname, c->lxc_conf, false) != 0)
+               return false;
+       if (!clone_update_unexp_network(c->lxc_conf))
+               return false;
+       return true;
 }
 
 static bool lxcapi_load_config(struct lxc_container *c, const char *alt_file)
@@ -539,7 +553,7 @@ static bool lxcapi_start(struct lxc_container *c, int useinit, char * const argv
        FILE *pid_fp = NULL;
        char *default_args[] = {
                "/sbin/init",
-               '\0',
+               NULL,
        };
 
        /* container exists */
@@ -614,6 +628,7 @@ static bool lxcapi_start(struct lxc_container *c, int useinit, char * const argv
                        SYSERROR("Error chdir()ing to /.");
                        return false;
                }
+               lxc_check_inherited(conf, -1);
                close(0);
                close(1);
                close(2);
@@ -653,6 +668,7 @@ static bool lxcapi_start(struct lxc_container *c, int useinit, char * const argv
 reboot:
        conf->reboot = 0;
        ret = lxc_start(c->name, argv, conf, c->config_path);
+       c->error_num = ret;
 
        if (conf->reboot) {
                INFO("container requested reboot");
@@ -720,6 +736,31 @@ static bool lxcapi_stop(struct lxc_container *c)
        return ret == 0;
 }
 
+static int do_create_container_dir(const char *path, struct lxc_conf *conf)
+{
+       int ret = -1, lasterr;
+       char *p = alloca(strlen(path)+1);
+       mode_t mask = umask(0002);
+       ret = mkdir(path, 0770);
+       lasterr = errno;
+       umask(mask);
+       errno = lasterr;
+       if (ret) {
+               if (errno == EEXIST)
+                       ret = 0;
+               else {
+                       SYSERROR("failed to create container path %s", path);
+                       return -1;
+               }
+       }
+       strcpy(p, path);
+       if (!lxc_list_empty(&conf->id_map) && chown_mapped_root(p, conf) != 0) {
+               ERROR("Failed to chown container dir");
+               ret = -1;
+       }
+       return ret;
+}
+
 /*
  * create the standard expected container dir
  */
@@ -737,13 +778,7 @@ static bool create_container_dir(struct lxc_container *c)
                free(s);
                return false;
        }
-       ret = mkdir(s, 0755);
-       if (ret) {
-               if (errno == EEXIST)
-                       ret = 0;
-               else
-                       SYSERROR("failed to create container path for %s\n", c->name);
-       }
+       ret = do_create_container_dir(s, c->lxc_conf);
        free(s);
        return ret == 0;
 }
@@ -780,7 +815,7 @@ static struct bdev *do_bdev_create(struct lxc_container *c, const char *type,
 
        bdev = bdev_create(dest, type, c->name, specs);
        if (!bdev) {
-               ERROR("Failed to create backing store type %s\n", type);
+               ERROR("Failed to create backing store type %s", type);
                return NULL;
        }
 
@@ -789,9 +824,10 @@ static struct bdev *do_bdev_create(struct lxc_container *c, const char *type,
        /* if we are not root, chown the rootfs dir to root in the
         * target uidmap */
 
-       if (geteuid() != 0) {
+       if (geteuid() != 0 || (c->lxc_conf && !lxc_list_empty(&c->lxc_conf->id_map))) {
                if (chown_mapped_root(bdev->dest, c->lxc_conf) < 0) {
-                       ERROR("Error chowning %s to container root\n", bdev->dest);
+                       ERROR("Error chowning %s to container root", bdev->dest);
+                       suggest_default_idmap();
                        bdev_put(bdev);
                        return NULL;
                }
@@ -826,7 +862,7 @@ static char *get_template_path(const char *t)
                return NULL;
        }
        if (access(tpath, X_OK) < 0) {
-               SYSERROR("bad template: %s\n", t);
+               SYSERROR("bad template: %s", t);
                free(tpath);
                return NULL;
        }
@@ -852,7 +888,7 @@ static bool create_run_template(struct lxc_container *c, char *tpath, bool quiet
 
        pid = fork();
        if (pid < 0) {
-               SYSERROR("failed to fork task for container creation template\n");
+               SYSERROR("failed to fork task for container creation template");
                return false;
        }
 
@@ -875,14 +911,16 @@ static bool create_run_template(struct lxc_container *c, char *tpath, bool quiet
 
                src = c->lxc_conf->rootfs.path;
                /*
-                * for an overlayfs create, what the user wants is the template to fill
+                * for an overlay create, what the user wants is the template to fill
                 * in what will become the readonly lower layer.  So don't mount for
                 * the template
                 */
-               if (strncmp(src, "overlayfs:", 10) == 0) {
-                       src = overlayfs_getlower(src+10);
-               }
-               bdev = bdev_init(src, c->lxc_conf->rootfs.mount, NULL);
+               if (strncmp(src, "overlayfs:", 10) == 0)
+                       src = overlay_getlower(src+10);
+               if (strncmp(src, "aufs:", 5) == 0)
+                       src = overlay_getlower(src+5);
+
+               bdev = bdev_init(c->lxc_conf, src, c->lxc_conf->rootfs.mount, NULL);
                if (!bdev) {
                        ERROR("Error opening rootfs");
                        exit(1);
@@ -894,15 +932,15 @@ static bool create_run_template(struct lxc_container *c, char *tpath, bool quiet
                                exit(1);
                        }
                        if (detect_shared_rootfs()) {
-                               if (mount("", "", NULL, MS_SLAVE|MS_REC, 0)) {
+                               if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL)) {
                                        SYSERROR("Failed to make / rslave to run template");
                                        ERROR("Continuing...");
                                }
                        }
                }
-               if (strcmp(bdev->type, "dir") != 0) {
+               if (strcmp(bdev->type, "dir") && strcmp(bdev->type, "btrfs")) {
                        if (geteuid() != 0) {
-                               ERROR("non-root users can only create directory-backed containers");
+                               ERROR("non-root users can only create btrfs and directory-backed containers");
                                exit(1);
                        }
                        if (bdev->ops->mount(bdev) < 0) {
@@ -973,9 +1011,10 @@ static bool create_run_template(struct lxc_container *c, char *tpath, bool quiet
                 * and we append "--mapped-uid x", where x is the mapped uid
                 * for our geteuid()
                 */
-               if (geteuid() != 0 && !lxc_list_empty(&conf->id_map)) {
+               if (!lxc_list_empty(&conf->id_map)) {
                        int n2args = 1;
                        char txtuid[20];
+                       char txtgid[20];
                        char **n2 = malloc(n2args * sizeof(*n2));
                        struct lxc_list *it;
                        struct id_map *map;
@@ -1003,13 +1042,13 @@ static bool create_run_template(struct lxc_container *c, char *tpath, bool quiet
                                if (ret < 0 || ret >= 200)
                                        exit(1);
                        }
-                       int hostid_mapped = mapped_hostid(geteuid(), conf);
+                       int hostid_mapped = mapped_hostid(geteuid(), conf, ID_TYPE_UID);
                        int extraargs = hostid_mapped >= 0 ? 1 : 3;
                        n2 = realloc(n2, (nargs + n2args + extraargs) * sizeof(char *));
                        if (!n2)
                                exit(1);
                        if (hostid_mapped < 0) {
-                               hostid_mapped = find_unmapped_nsuid(conf);
+                               hostid_mapped = find_unmapped_nsuid(conf, ID_TYPE_UID);
                                n2[n2args++] = "-m";
                                if (hostid_mapped < 0) {
                                        ERROR("Could not find free uid to map");
@@ -1027,22 +1066,49 @@ static bool create_run_template(struct lxc_container *c, char *tpath, bool quiet
                                        exit(1);
                                }
                        }
+                       int hostgid_mapped = mapped_hostid(getegid(), conf, ID_TYPE_GID);
+                       extraargs = hostgid_mapped >= 0 ? 1 : 3;
+                       n2 = realloc(n2, (nargs + n2args + extraargs) * sizeof(char *));
+                       if (!n2)
+                               exit(1);
+                       if (hostgid_mapped < 0) {
+                               hostgid_mapped = find_unmapped_nsuid(conf, ID_TYPE_GID);
+                               n2[n2args++] = "-m";
+                               if (hostgid_mapped < 0) {
+                                       ERROR("Could not find free uid to map");
+                                       exit(1);
+                               }
+                               n2[n2args++] = malloc(200);
+                               if (!n2[n2args-1]) {
+                                       SYSERROR("out of memory");
+                                       exit(1);
+                               }
+                               ret = snprintf(n2[n2args-1], 200, "g:%d:%d:1",
+                                       hostgid_mapped, getegid());
+                               if (ret < 0 || ret >= 200) {
+                                       ERROR("string too long");
+                                       exit(1);
+                               }
+                       }
                        n2[n2args++] = "--";
                        for (i = 0; i < nargs; i++)
                                n2[i + n2args] = newargv[i];
                        n2args += nargs;
                        // Finally add "--mapped-uid $uid" to tell template what to chown
                        // cached images to
-                       n2args += 2;
+                       n2args += 4;
                        n2 = realloc(n2, n2args * sizeof(char *));
                        if (!n2) {
                                SYSERROR("out of memory");
                                exit(1);
                        }
                        // note n2[n2args-1] is NULL
-                       n2[n2args-3] = "--mapped-uid";
+                       n2[n2args-5] = "--mapped-uid";
                        snprintf(txtuid, 20, "%d", hostid_mapped);
-                       n2[n2args-2] = txtuid;
+                       n2[n2args-4] = txtuid;
+                       n2[n2args-3] = "--mapped-gid";
+                       snprintf(txtgid, 20, "%d", hostgid_mapped);
+                       n2[n2args-2] = txtgid;
                        n2[n2args-1] = NULL;
                        free(newargv);
                        newargv = n2;
@@ -1054,7 +1120,7 @@ static bool create_run_template(struct lxc_container *c, char *tpath, bool quiet
        }
 
        if (wait_for_pid(pid) != 0) {
-               ERROR("container creation template for %s failed\n", c->name);
+               ERROR("container creation template for %s failed", c->name);
                return false;
        }
 
@@ -1097,7 +1163,7 @@ static bool prepend_lxc_header(char *path, const char *t, char *const argv[])
 #if HAVE_LIBGNUTLS
        tpath = get_template_path(t);
        if (!tpath) {
-               ERROR("bad template: %s\n", t);
+               ERROR("bad template: %s", t);
                goto out_free_contents;
        }
 
@@ -1131,7 +1197,7 @@ static bool prepend_lxc_header(char *path, const char *t, char *const argv[])
                fprintf(f, "%02x", md_value[i]);
        fprintf(f, "\n");
 #endif
-       fprintf(f, "# For additional config options, please look at lxc.conf(5)\n");
+       fprintf(f, "# For additional config options, please look at lxc.container.conf(5)\n");
        if (fwrite(contents, 1, flen, f) != flen) {
                SYSERROR("Writing original contents");
                free(contents);
@@ -1157,13 +1223,17 @@ out_error:
 
 static void lxcapi_clear_config(struct lxc_container *c)
 {
-       if (c && c->lxc_conf) {
-               lxc_conf_free(c->lxc_conf);
-               c->lxc_conf = NULL;
+       if (c) {
+               if (c->lxc_conf) {
+                       lxc_conf_free(c->lxc_conf);
+                       c->lxc_conf = NULL;
+               }
        }
 }
 
 static bool lxcapi_destroy(struct lxc_container *c);
+static bool container_destroy(struct lxc_container *c);
+static bool get_snappath_dir(struct lxc_container *c, char *snappath);
 /*
  * lxcapi_create:
  * create a container with the given parameters.
@@ -1193,7 +1263,7 @@ static bool lxcapi_create(struct lxc_container *c, const char *t,
        if (t) {
                tpath = get_template_path(t);
                if (!tpath) {
-                       ERROR("bad template: %s\n", t);
+                       ERROR("bad template: %s", t);
                        goto out;
                }
        }
@@ -1212,7 +1282,7 @@ static bool lxcapi_create(struct lxc_container *c, const char *t,
 
        if (!c->lxc_conf) {
                if (!c->load_config(c, lxc_global_config_value("lxc.default_config"))) {
-                       ERROR("Error loading default configuration file %s\n", lxc_global_config_value("lxc.default_config"));
+                       ERROR("Error loading default configuration file %s", lxc_global_config_value("lxc.default_config"));
                        goto free_tpath;
                }
        }
@@ -1253,7 +1323,7 @@ static bool lxcapi_create(struct lxc_container *c, const char *t,
         */
        pid = fork();
        if (pid < 0) {
-               SYSERROR("failed to fork task for container creation template\n");
+               SYSERROR("failed to fork task for container creation template");
                goto out_unlock;
        }
 
@@ -1268,7 +1338,7 @@ static bool lxcapi_create(struct lxc_container *c, const char *t,
 
                /* save config file again to store the new rootfs location */
                if (!c->save_config(c, NULL)) {
-                       ERROR("failed to save starting configuration for %s\n", c->name);
+                       ERROR("failed to save starting configuration for %s", c->name);
                        // parent task won't see bdev in config so we delete it
                        bdev->ops->umount(bdev);
                        bdev->ops->destroy(bdev);
@@ -1305,7 +1375,7 @@ out_unlock:
                remove_partial(c, partial_fd);
 out:
        if (!ret && c)
-               lxcapi_destroy(c);
+               container_destroy(c);
 free_tpath:
        if (tpath)
                free(tpath);
@@ -1338,8 +1408,6 @@ static bool lxcapi_shutdown(struct lxc_container *c, int timeout)
        if (!c)
                return false;
 
-       if (!timeout)
-               timeout = -1;
        if (!c->is_running(c))
                return true;
        pid = c->init_pid(c);
@@ -1349,10 +1417,6 @@ static bool lxcapi_shutdown(struct lxc_container *c, int timeout)
                haltsignal = c->lxc_conf->haltsignal;
        kill(pid, haltsignal);
        retv = c->wait(c, "STOPPED", timeout);
-       if (!retv && timeout > 0) {
-               c->stop(c);
-               retv = c->wait(c, "STOPPED", 0); // 0 means don't wait
-       }
        return retv;
 }
 
@@ -1385,6 +1449,20 @@ out:
        return bret;
 }
 
+static void do_clear_unexp_config_line(struct lxc_conf *conf, const char *key)
+{
+       if (strcmp(key, "lxc.cgroup") == 0)
+               clear_unexp_config_line(conf, key, true);
+       else if (strcmp(key, "lxc.network") == 0)
+               clear_unexp_config_line(conf, key, true);
+       else if (strcmp(key, "lxc.hook") == 0)
+               clear_unexp_config_line(conf, key, true);
+       else
+               clear_unexp_config_line(conf, key, false);
+       if (!do_append_unexp_config_line(conf, key, ""))
+               WARN("Error clearing configuration for %s", key);
+}
+
 static bool lxcapi_clear_config_item(struct lxc_container *c, const char *key)
 {
        int ret;
@@ -1394,6 +1472,8 @@ static bool lxcapi_clear_config_item(struct lxc_container *c, const char *key)
        if (container_mem_lock(c))
                return false;
        ret = lxc_clear_config_item(c->lxc_conf, key);
+       if (!ret)
+               do_clear_unexp_config_line(c->lxc_conf, key);
        container_mem_unlock(c);
        return ret == 0;
 }
@@ -1409,7 +1489,7 @@ static inline bool enter_to_ns(struct lxc_container *c) {
        init_pid = c->init_pid(c);
 
        /* Switch to new userns */
-       if (geteuid() && access("/proc/self/ns/user", F_OK) == 0) {
+       if ((geteuid() != 0 || (c->lxc_conf && !lxc_list_empty(&c->lxc_conf->id_map))) && access("/proc/self/ns/user", F_OK) == 0) {
                ret = snprintf(new_userns_path, MAXPATHLEN, "/proc/%d/ns/user", init_pid);
                if (ret < 0 || ret >= MAXPATHLEN)
                        goto out;
@@ -1535,7 +1615,7 @@ static char** lxcapi_get_interfaces(struct lxc_container *c)
 
        pid = fork();
        if (pid < 0) {
-               SYSERROR("failed to fork task to get interfaces information\n");
+               SYSERROR("failed to fork task to get interfaces information");
                close(pipefd[0]);
                close(pipefd[1]);
                return NULL;
@@ -1622,7 +1702,7 @@ static char** lxcapi_get_ips(struct lxc_container *c, const char* interface, con
 
        pid = fork();
        if (pid < 0) {
-               SYSERROR("failed to fork task to get container ips\n");
+               SYSERROR("failed to fork task to get container ips");
                close(pipefd[0]);
                close(pipefd[1]);
                return NULL;
@@ -1785,7 +1865,7 @@ static bool lxcapi_save_config(struct lxc_container *c, const char *alt_file)
        // If we haven't yet loaded a config, load the stock config
        if (!c->lxc_conf) {
                if (!c->load_config(c, lxc_global_config_value("lxc.default_config"))) {
-                       ERROR("Error loading default configuration file %s while saving %s\n", lxc_global_config_value("lxc.default_config"), c->name);
+                       ERROR("Error loading default configuration file %s while saving %s", lxc_global_config_value("lxc.default_config"), c->name);
                        return false;
                }
        }
@@ -1896,7 +1976,7 @@ static void mod_all_rdeps(struct lxc_container *c, bool inc)
                return;
        while (getline(&lxcpath, &pathlen, f) != -1) {
                if (getline(&lxcname, &namelen, f) == -1) {
-                       ERROR("badly formatted file %s\n", path);
+                       ERROR("badly formatted file %s", path);
                        goto out;
                }
                strip_newline(lxcpath);
@@ -1917,7 +1997,7 @@ out:
        fclose(f);
 }
 
-static bool has_snapshots(struct lxc_container *c)
+static bool has_fs_snapshots(struct lxc_container *c)
 {
        char path[MAXPATHLEN];
        int ret, v;
@@ -1941,16 +2021,74 @@ out:
        return bret;
 }
 
+static bool has_snapshots(struct lxc_container *c)
+{
+       char path[MAXPATHLEN];
+       struct dirent dirent, *direntp;
+       int count=0;
+       DIR *dir;
+
+       if (!get_snappath_dir(c, path))
+               return false;
+       dir = opendir(path);
+       if (!dir)
+               return false;
+       while (!readdir_r(dir, &dirent, &direntp)) {
+               if (!direntp)
+                       break;
+
+               if (!strcmp(direntp->d_name, "."))
+                       continue;
+
+               if (!strcmp(direntp->d_name, ".."))
+                       continue;
+               count++;
+               break;
+       }
+       closedir(dir);
+       return count > 0;
+}
+
 static int lxc_rmdir_onedev_wrapper(void *data)
 {
        char *arg = (char *) data;
-       return lxc_rmdir_onedev(arg);
+       return lxc_rmdir_onedev(arg, "snaps");
 }
 
-// do we want the api to support --force, or leave that to the caller?
-static bool lxcapi_destroy(struct lxc_container *c)
+static int do_bdev_destroy(struct lxc_conf *conf)
+{
+       struct bdev *r;
+       int ret = 0;
+
+       r = bdev_init(conf, conf->rootfs.path, conf->rootfs.mount, NULL);
+       if (!r)
+               return -1;
+
+       if (r->ops->destroy(r) < 0)
+               ret = -1;
+       bdev_put(r);
+       return ret;
+}
+
+static int bdev_destroy_wrapper(void *data)
+{
+       struct lxc_conf *conf = data;
+
+       if (setgid(0) < 0) {
+               ERROR("Failed to setgid to 0");
+               return -1;
+       }
+       if (setgroups(0, NULL) < 0)
+               WARN("Failed to clear groups");
+       if (setuid(0) < 0) {
+               ERROR("Failed to setuid to 0");
+               return -1;
+       }
+       return do_bdev_destroy(conf);
+}
+
+static bool container_destroy(struct lxc_container *c)
 {
-       struct bdev *r = NULL;
        bool bret = false;
        int ret;
 
@@ -1966,20 +2104,14 @@ static bool lxcapi_destroy(struct lxc_container *c)
                goto out;
        }
 
-       if (c->lxc_conf && has_snapshots(c)) {
-               ERROR("container %s has dependent snapshots", c->name);
-               goto out;
-       }
-
-       if (!am_unpriv() && c->lxc_conf && c->lxc_conf->rootfs.path && c->lxc_conf->rootfs.mount) {
-               r = bdev_init(c->lxc_conf->rootfs.path, c->lxc_conf->rootfs.mount, NULL);
-               if (r) {
-                       if (r->ops->destroy(r) < 0) {
-                               bdev_put(r);
-                               ERROR("Error destroying rootfs for %s", c->name);
-                               goto out;
-                       }
-                       bdev_put(r);
+       if (c->lxc_conf && c->lxc_conf->rootfs.path && c->lxc_conf->rootfs.mount) {
+               if (am_unpriv())
+                       ret = userns_exec_1(c->lxc_conf, bdev_destroy_wrapper, c->lxc_conf);
+               else
+                       ret = do_bdev_destroy(c->lxc_conf);
+               if (ret < 0) {
+                       ERROR("Error destroying rootfs for %s", c->name);
+                       goto out;
                }
        }
 
@@ -1991,7 +2123,7 @@ static bool lxcapi_destroy(struct lxc_container *c)
        if (am_unpriv())
                ret = userns_exec_1(c->lxc_conf, lxc_rmdir_onedev_wrapper, path);
        else
-               ret = lxc_rmdir_onedev(path);
+               ret = lxc_rmdir_onedev(path, "snaps");
        if (ret < 0) {
                ERROR("Error destroying container directory for %s", c->name);
                goto out;
@@ -2003,6 +2135,36 @@ out:
        return bret;
 }
 
+static bool lxcapi_destroy(struct lxc_container *c)
+{
+       if (!c || !lxcapi_is_defined(c))
+               return false;
+       if (has_snapshots(c)) {
+               ERROR("Container %s has snapshots;  not removing", c->name);
+               return false;
+       }
+
+       if (has_fs_snapshots(c)) {
+               ERROR("container %s has snapshots on its rootfs", c->name);
+               return false;
+       }
+
+       return container_destroy(c);
+}
+
+static bool lxcapi_snapshot_destroy_all(struct lxc_container *c);
+
+static bool lxcapi_destroy_with_snapshots(struct lxc_container *c)
+{
+       if (!c || !lxcapi_is_defined(c))
+               return false;
+       if (!lxcapi_snapshot_destroy_all(c)) {
+               ERROR("Error deleting all snapshots");
+               return false;
+       }
+       return lxcapi_destroy(c);
+}
+
 static bool set_config_item_locked(struct lxc_container *c, const char *key, const char *v)
 {
        struct lxc_config_t *config;
@@ -2014,7 +2176,9 @@ static bool set_config_item_locked(struct lxc_container *c, const char *key, con
        config = lxc_getconfig(key);
        if (!config)
                return false;
-       return (0 == config->cb(key, v, c->lxc_conf));
+       if (config->cb(key, v, c->lxc_conf) != 0)
+               return false;
+       return do_append_unexp_config_line(c->lxc_conf, key, v);
 }
 
 static bool lxcapi_set_config_item(struct lxc_container *c, const char *key, const char *v)
@@ -2270,6 +2434,10 @@ static int copyhooks(struct lxc_container *oldc, struct lxc_container *c)
                }
        }
 
+       if (!clone_update_unexp_hooks(c->lxc_conf)) {
+               ERROR("Error saving new hooks in clone");
+               return -1;
+       }
        c->save_config(c, NULL);
        return 0;
 }
@@ -2311,6 +2479,8 @@ static int copy_fstab(struct lxc_container *oldc, struct lxc_container *c)
        if (!oldpath)
                return 0;
 
+       clear_unexp_config_line(c->lxc_conf, "lxc.mount", false);
+
        char *p = strrchr(oldpath, '/');
        if (!p)
                return -1;
@@ -2335,6 +2505,10 @@ static int copy_fstab(struct lxc_container *oldc, struct lxc_container *c)
                ERROR("error: allocating pathname");
                return -1;
        }
+       if (!do_append_unexp_config_line(c->lxc_conf, "lxc.mount", newpath)) {
+               ERROR("error saving new lxctab");
+               return -1;
+       }
 
        return 0;
 }
@@ -2404,6 +2578,12 @@ static int copy_storage(struct lxc_container *c0, struct lxc_container *c,
                ERROR("Out of memory while setting storage path");
                return -1;
        }
+       // We will simply append a new lxc.rootfs entry to the unexpanded config
+       clear_unexp_config_line(c->lxc_conf, "lxc.rootfs", false);
+       if (!do_append_unexp_config_line(c->lxc_conf, "lxc.rootfs", c->lxc_conf->rootfs.path)) {
+               ERROR("Error saving new rootfs to cloend config");
+               return -1;
+       }
        if (flags & LXC_CLONE_SNAPSHOT)
                copy_rdepends(c, c0);
        if (need_rdep) {
@@ -2447,19 +2627,30 @@ static int clone_update_rootfs(struct clone_update_data *data)
                ERROR("Failed to setuid to 0");
                return -1;
        }
+       if (setgroups(0, NULL) < 0)
+               WARN("Failed to clear groups");
 
        if (unshare(CLONE_NEWNS) < 0)
                return -1;
-       bdev = bdev_init(c->lxc_conf->rootfs.path, c->lxc_conf->rootfs.mount, NULL);
+       bdev = bdev_init(c->lxc_conf, c->lxc_conf->rootfs.path, c->lxc_conf->rootfs.mount, NULL);
        if (!bdev)
                return -1;
        if (strcmp(bdev->type, "dir") != 0) {
                if (unshare(CLONE_NEWNS) < 0) {
                        ERROR("error unsharing mounts");
+                       bdev_put(bdev);
                        return -1;
                }
-               if (bdev->ops->mount(bdev) < 0)
+               if (detect_shared_rootfs()) {
+                       if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL)) {
+                               SYSERROR("Failed to make / rslave");
+                               ERROR("Continuing...");
+                       }
+               }
+               if (bdev->ops->mount(bdev) < 0) {
+                       bdev_put(bdev);
                        return -1;
+               }
        } else { // TODO come up with a better way
                if (bdev->dest)
                        free(bdev->dest);
@@ -2486,18 +2677,21 @@ static int clone_update_rootfs(struct clone_update_data *data)
 
                if (run_lxc_hooks(c->name, "clone", conf, c->get_config_path(c), hookargs)) {
                        ERROR("Error executing clone hook for %s", c->name);
+                       bdev_put(bdev);
                        return -1;
                }
        }
 
        if (!(flags & LXC_CLONE_KEEPNAME)) {
                ret = snprintf(path, MAXPATHLEN, "%s/etc/hostname", bdev->dest);
+               bdev_put(bdev);
+
                if (ret < 0 || ret >= MAXPATHLEN)
                        return -1;
                if (!file_exists(path))
                        return 0;
                if (!(fout = fopen(path, "w"))) {
-                       SYSERROR("unable to open %s: ignoring\n", path);
+                       SYSERROR("unable to open %s: ignoring", path);
                        return 0;
                }
                if (fprintf(fout, "%s", c->name) < 0) {
@@ -2507,6 +2701,9 @@ static int clone_update_rootfs(struct clone_update_data *data)
                if (fclose(fout) < 0)
                        return -1;
        }
+       else
+               bdev_put(bdev);
+
        return 0;
 }
 
@@ -2528,17 +2725,15 @@ sudo lxc-clone -o o1 -n n1 -s -L|-fssize fssize -v|--vgname vgname \
 only rootfs gets converted (copied/snapshotted) on clone.
 */
 
-static int create_file_dirname(char *path)
+static int create_file_dirname(char *path, struct lxc_conf *conf)
 {
        char *p = strrchr(path, '/');
-       int ret;
+       int ret = -1;
 
        if (!p)
                return -1;
        *p = '\0';
-       ret = mkdir(path, 0755);
-       if (ret && errno != EEXIST)
-               SYSERROR("creating container path %s\n", path);
+        ret = do_create_container_dir(path, conf);
        *p = '/';
        return ret;
 }
@@ -2551,7 +2746,7 @@ static struct lxc_container *lxcapi_clone(struct lxc_container *c, const char *n
        struct lxc_container *c2 = NULL;
        char newpath[MAXPATHLEN];
        int ret, storage_copied = 0;
-       const char *n, *l;
+       char *origroot = NULL;
        struct clone_update_data data;
        FILE *fout;
        pid_t pid;
@@ -2568,9 +2763,11 @@ static struct lxc_container *lxcapi_clone(struct lxc_container *c, const char *n
        }
 
        // Make sure the container doesn't yet exist.
-       n = newname ? newname : c->name;
-       l = lxcpath ? lxcpath : c->get_config_path(c);
-       ret = snprintf(newpath, MAXPATHLEN, "%s/%s/config", l, n);
+       if (!newname)
+               newname = c->name;
+       if (!lxcpath)
+               lxcpath = c->get_config_path(c);
+       ret = snprintf(newpath, MAXPATHLEN, "%s/%s/config", lxcpath, newname);
        if (ret < 0 || ret >= MAXPATHLEN) {
                SYSERROR("clone: failed making config pathname");
                goto out;
@@ -2580,13 +2777,17 @@ static struct lxc_container *lxcapi_clone(struct lxc_container *c, const char *n
                goto out;
        }
 
-       ret = create_file_dirname(newpath);
+       ret = create_file_dirname(newpath, c->lxc_conf);
        if (ret < 0 && errno != EEXIST) {
                ERROR("Error creating container dir for %s", newpath);
                goto out;
        }
 
        // copy the configuration, tweak it as needed,
+       if (c->lxc_conf->rootfs.path) {
+               origroot = c->lxc_conf->rootfs.path;
+               c->lxc_conf->rootfs.path = NULL;
+       }
        fout = fopen(newpath, "w");
        if (!fout) {
                SYSERROR("open %s", newpath);
@@ -2594,8 +2795,9 @@ static struct lxc_container *lxcapi_clone(struct lxc_container *c, const char *n
        }
        write_config(fout, c->lxc_conf);
        fclose(fout);
+       c->lxc_conf->rootfs.path = origroot;
 
-       sprintf(newpath, "%s/%s/rootfs", l, n);
+       sprintf(newpath, "%s/%s/rootfs", lxcpath, newname);
        if (mkdir(newpath, 0755) < 0) {
                SYSERROR("error creating %s", newpath);
                goto out;
@@ -2603,17 +2805,25 @@ static struct lxc_container *lxcapi_clone(struct lxc_container *c, const char *n
 
        if (am_unpriv()) {
                if (chown_mapped_root(newpath, c->lxc_conf) < 0) {
-                       ERROR("Error chowning %s to container root\n", newpath);
+                       ERROR("Error chowning %s to container root", newpath);
                        goto out;
                }
        }
 
-       c2 = lxc_container_new(n, l);
+       c2 = lxc_container_new(newname, lxcpath);
        if (!c2) {
-               ERROR("clone: failed to create new container (%s %s)", n, l);
+               ERROR("clone: failed to create new container (%s %s)", newname,
+                               lxcpath);
                goto out;
        }
 
+       // copy/snapshot rootfs's
+       ret = copy_storage(c, c2, bdevtype, flags, bdevdata, newsize);
+       if (ret < 0)
+               goto out;
+
+       clear_unexp_config_line(c2->lxc_conf, "lxc.utsname", false);
+
        // update utsname
        if (!set_config_item_locked(c2, "lxc.utsname", newname)) {
                ERROR("Error setting new hostname");
@@ -2633,13 +2843,13 @@ static struct lxc_container *lxcapi_clone(struct lxc_container *c, const char *n
        }
 
        // update macaddrs
-       if (!(flags & LXC_CLONE_KEEPMACADDR))
+       if (!(flags & LXC_CLONE_KEEPMACADDR)) {
                network_new_hwaddrs(c2);
-
-       // copy/snapshot rootfs's
-       ret = copy_storage(c, c2, bdevtype, flags, bdevdata, newsize);
-       if (ret < 0)
-               goto out;
+               if (!clone_update_unexp_network(c2->lxc_conf)) {
+                       ERROR("Error updating network for clone");
+                       goto out;
+               }
+       }
 
        // We've now successfully created c2's storage, so clear it out if we
        // fail after this
@@ -2691,10 +2901,14 @@ static bool lxcapi_rename(struct lxc_container *c, const char *newname)
        struct bdev *bdev;
        struct lxc_container *newc;
 
-       if (!c || !c->name || !c->config_path)
+       if (!c || !c->name || !c->config_path || !c->lxc_conf)
                return false;
 
-       bdev = bdev_init(c->lxc_conf->rootfs.path, c->lxc_conf->rootfs.mount, NULL);
+       if (has_fs_snapshots(c) || has_snapshots(c)) {
+               ERROR("Renaming a container with snapshots is not supported");
+               return false;
+       }
+       bdev = bdev_init(c->lxc_conf, c->lxc_conf->rootfs.path, c->lxc_conf->rootfs.mount, NULL);
        if (!bdev) {
                ERROR("Failed to find original backing store type");
                return false;
@@ -2710,7 +2924,7 @@ static bool lxcapi_rename(struct lxc_container *c, const char *newname)
        if (newc && lxcapi_is_defined(newc))
                lxc_container_put(newc);
 
-       if (!lxcapi_destroy(c)) {
+       if (!container_destroy(c)) {
                ERROR("Could not destroy existing container %s", c->name);
                return false;
        }
@@ -2760,16 +2974,51 @@ static int get_next_index(const char *lxcpath, char *cname)
        }
 }
 
+static bool get_snappath_dir(struct lxc_container *c, char *snappath)
+{
+       int ret;
+       /*
+        * If the old style snapshot path exists, use it
+        * /var/lib/lxc -> /var/lib/lxcsnaps
+        */
+       ret = snprintf(snappath, MAXPATHLEN, "%ssnaps", c->config_path);
+       if (ret < 0 || ret >= MAXPATHLEN)
+               return false;
+       if (dir_exists(snappath)) {
+               ret = snprintf(snappath, MAXPATHLEN, "%ssnaps/%s", c->config_path, c->name);
+               if (ret < 0 || ret >= MAXPATHLEN)
+                       return false;
+               return true;
+       }
+
+       /*
+        * Use the new style path
+        * /var/lib/lxc -> /var/lib/lxc + c->name + /snaps + \0
+        */
+       ret = snprintf(snappath, MAXPATHLEN, "%s/%s/snaps", c->config_path, c->name);
+       if (ret < 0 || ret >= MAXPATHLEN)
+               return false;
+       return true;
+}
+
 static int lxcapi_snapshot(struct lxc_container *c, const char *commentfile)
 {
        int i, flags, ret;
        struct lxc_container *c2;
        char snappath[MAXPATHLEN], newname[20];
 
-       // /var/lib/lxc -> /var/lib/lxcsnaps \0
-       ret = snprintf(snappath, MAXPATHLEN, "%ssnaps/%s", c->config_path, c->name);
-       if (ret < 0 || ret >= MAXPATHLEN)
+       if (!c || !lxcapi_is_defined(c))
+               return -1;
+
+       if (!bdev_can_backup(c->lxc_conf)) {
+               ERROR("%s's backing store cannot be backed up.", c->name);
+               ERROR("Your container must use another backing store type.");
+               return -1;
+       }
+
+       if (!get_snappath_dir(c, snappath))
                return -1;
+
        i = get_next_index(snappath, c->name);
 
        if (mkdir_p(snappath, 0755) < 0) {
@@ -2787,9 +3036,16 @@ static int lxcapi_snapshot(struct lxc_container *c, const char *commentfile)
         */
        flags = LXC_CLONE_SNAPSHOT | LXC_CLONE_KEEPMACADDR | LXC_CLONE_KEEPNAME |
                LXC_CLONE_KEEPBDEVTYPE | LXC_CLONE_MAYBE_SNAPSHOT;
+       if (bdev_is_dir(c->lxc_conf, c->lxc_conf->rootfs.path)) {
+               ERROR("Snapshot of directory-backed container requested.");
+               ERROR("Making a copy-clone.  If you do want snapshots, then");
+               ERROR("please create an aufs or overlayfs clone first, snapshot that");
+               ERROR("and keep the original container pristine.");
+               flags &= ~LXC_CLONE_SNAPSHOT | LXC_CLONE_MAYBE_SNAPSHOT;
+       }
        c2 = c->clone(c, newname, snappath, flags, NULL, NULL, 0, NULL);
        if (!c2) {
-               ERROR("clone of %s:%s failed\n", c->config_path, c->name);
+               ERROR("clone of %s:%s failed", c->config_path, c->name);
                return -1;
        }
 
@@ -2810,7 +3066,7 @@ static int lxcapi_snapshot(struct lxc_container *c, const char *commentfile)
        sprintf(dfnam, "%s/%s/ts", snappath, newname);
        f = fopen(dfnam, "w");
        if (!f) {
-               ERROR("Failed to open %s\n", dfnam);
+               ERROR("Failed to open %s", dfnam);
                return -1;
        }
        if (fprintf(f, "%s", buffer) < 0) {
@@ -2896,7 +3152,7 @@ static char *get_timestamp(char* snappath, char *name)
 static int lxcapi_snapshot_list(struct lxc_container *c, struct lxc_snapshot **ret_snaps)
 {
        char snappath[MAXPATHLEN], path2[MAXPATHLEN];
-       int dirlen, count = 0, ret;
+       int count = 0, ret;
        struct dirent dirent, *direntp;
        struct lxc_snapshot *snaps =NULL, *nsnaps;
        DIR *dir;
@@ -2904,9 +3160,7 @@ static int lxcapi_snapshot_list(struct lxc_container *c, struct lxc_snapshot **r
        if (!c || !lxcapi_is_defined(c))
                return -1;
 
-       // snappath is ${lxcpath}snaps/${lxcname}/
-       dirlen = snprintf(snappath, MAXPATHLEN, "%ssnaps/%s", c->config_path, c->name);
-       if (dirlen < 0 || dirlen >= MAXPATHLEN) {
+       if (!get_snappath_dir(c, snappath)) {
                ERROR("path name too long");
                return -1;
        }
@@ -2974,7 +3228,7 @@ out_free:
 static bool lxcapi_snapshot_restore(struct lxc_container *c, const char *snapname, const char *newname)
 {
        char clonelxcpath[MAXPATHLEN];
-       int ret;
+       int flags = 0;
        struct lxc_container *snap, *rest;
        struct bdev *bdev;
        bool b = false;
@@ -2982,7 +3236,12 @@ static bool lxcapi_snapshot_restore(struct lxc_container *c, const char *snapnam
        if (!c || !c->name || !c->config_path)
                return false;
 
-       bdev = bdev_init(c->lxc_conf->rootfs.path, c->lxc_conf->rootfs.mount, NULL);
+       if (has_fs_snapshots(c)) {
+               ERROR("container rootfs has dependent snapshots");
+               return false;
+       }
+
+       bdev = bdev_init(c->lxc_conf, c->lxc_conf->rootfs.path, c->lxc_conf->rootfs.mount, NULL);
        if (!bdev) {
                ERROR("Failed to find original backing store type");
                return false;
@@ -2990,15 +3249,8 @@ static bool lxcapi_snapshot_restore(struct lxc_container *c, const char *snapnam
 
        if (!newname)
                newname = c->name;
-       if (strcmp(c->name, newname) == 0) {
-               if (!lxcapi_destroy(c)) {
-                       ERROR("Could not destroy existing container %s", newname);
-                       bdev_put(bdev);
-                       return false;
-               }
-       }
-       ret = snprintf(clonelxcpath, MAXPATHLEN, "%ssnaps/%s", c->config_path, c->name);
-       if (ret < 0 || ret >= MAXPATHLEN) {
+
+       if (!get_snappath_dir(c, clonelxcpath)) {
                bdev_put(bdev);
                return false;
        }
@@ -3012,7 +3264,19 @@ static bool lxcapi_snapshot_restore(struct lxc_container *c, const char *snapnam
                return false;
        }
 
-       rest = lxcapi_clone(snap, newname, c->config_path, 0, bdev->type, NULL, 0, NULL);
+       if (strcmp(c->name, newname) == 0) {
+               if (!container_destroy(c)) {
+                       ERROR("Could not destroy existing container %s", newname);
+                       lxc_container_put(snap);
+                       bdev_put(bdev);
+                       return false;
+               }
+       }
+
+       if (strcmp(bdev->type, "dir") != 0 && strcmp(bdev->type, "loop") != 0)
+               flags = LXC_CLONE_SNAPSHOT | LXC_CLONE_MAYBE_SNAPSHOT;
+       rest = lxcapi_clone(snap, newname, c->config_path, flags,
+                       bdev->type, NULL, 0, NULL);
        bdev_put(bdev);
        if (rest && lxcapi_is_defined(rest))
                b = true;
@@ -3022,21 +3286,13 @@ static bool lxcapi_snapshot_restore(struct lxc_container *c, const char *snapnam
        return b;
 }
 
-static bool lxcapi_snapshot_destroy(struct lxc_container *c, const char *snapname)
+static bool do_snapshot_destroy(const char *snapname, const char *clonelxcpath)
 {
-       int ret;
-       char clonelxcpath[MAXPATHLEN];
        struct lxc_container *snap = NULL;
-
-       if (!c || !c->name || !c->config_path)
-               return false;
-
-       ret = snprintf(clonelxcpath, MAXPATHLEN, "%ssnaps/%s", c->config_path, c->name);
-       if (ret < 0 || ret >= MAXPATHLEN)
-               goto err;
+       bool bret = false;
 
        snap = lxc_container_new(snapname, clonelxcpath);
-       if (!snap || !lxcapi_is_defined(snap)) {
+       if (!snap) {
                ERROR("Could not find snapshot %s", snapname);
                goto err;
        }
@@ -3045,48 +3301,151 @@ static bool lxcapi_snapshot_destroy(struct lxc_container *c, const char *snapnam
                ERROR("Could not destroy snapshot %s", snapname);
                goto err;
        }
-       lxc_container_put(snap);
+       bret = true;
 
-       return true;
 err:
        if (snap)
                lxc_container_put(snap);
-       return false;
-}
-
-static bool lxcapi_may_control(struct lxc_container *c)
-{
-       return lxc_try_cmd(c->name, c->config_path) == 0;
+       return bret;
 }
 
-static bool add_remove_device_node(struct lxc_container *c, const char *src_path, const char *dest_path, bool add)
+static bool remove_all_snapshots(const char *path)
 {
-       int ret;
-       struct stat st;
-       char path[MAXPATHLEN];
-       char value[MAX_BUFFER];
-       char *directory_path = NULL;
-       const char *p;
+       DIR *dir;
+       struct dirent dirent, *direntp;
+       bool bret = true;
 
-       /* make sure container is running */
-       if (!c->is_running(c)) {
-               ERROR("container is not running");
-               goto out;
+       dir = opendir(path);
+       if (!dir) {
+               SYSERROR("opendir on snapshot path %s", path);
+               return false;
+       }
+       while (!readdir_r(dir, &dirent, &direntp)) {
+               if (!direntp)
+                       break;
+               if (!strcmp(direntp->d_name, "."))
+                       continue;
+               if (!strcmp(direntp->d_name, ".."))
+                       continue;
+               if (!do_snapshot_destroy(direntp->d_name, path)) {
+                       bret = false;
+                       continue;
+               }
+       }
+
+       closedir(dir);
+
+       if (rmdir(path))
+               SYSERROR("Error removing directory %s", path);
+
+       return bret;
+}
+
+static bool lxcapi_snapshot_destroy(struct lxc_container *c, const char *snapname)
+{
+       char clonelxcpath[MAXPATHLEN];
+
+       if (!c || !c->name || !c->config_path || !snapname)
+               return false;
+
+       if (!get_snappath_dir(c, clonelxcpath))
+               return false;
+
+       return do_snapshot_destroy(snapname, clonelxcpath);
+}
+
+static bool lxcapi_snapshot_destroy_all(struct lxc_container *c)
+{
+       char clonelxcpath[MAXPATHLEN];
+
+       if (!c || !c->name || !c->config_path)
+               return false;
+
+       if (!get_snappath_dir(c, clonelxcpath))
+               return false;
+
+       return remove_all_snapshots(clonelxcpath);
+}
+
+static bool lxcapi_may_control(struct lxc_container *c)
+{
+       return lxc_try_cmd(c->name, c->config_path) == 0;
+}
+
+static bool do_add_remove_node(pid_t init_pid, const char *path, bool add,
+               struct stat *st)
+{
+       char chrootpath[MAXPATHLEN];
+       char *directory_path = NULL;
+       pid_t pid;
+       int ret;
+
+       if ((pid = fork()) < 0) {
+               SYSERROR("failed to fork a child helper");
+               return false;
+       }
+       if (pid) {
+               if (wait_for_pid(pid) != 0) {
+                       ERROR("Failed to create note in guest");
+                       return false;
+               }
+               return true;
        }
 
-       /* use src_path if dest_path is NULL otherwise use dest_path */
-       p = dest_path ? dest_path : src_path;
-
        /* prepare the path */
-       ret = snprintf(path, MAXPATHLEN, "/proc/%d/root/%s", c->init_pid(c), p);
+       ret = snprintf(chrootpath, MAXPATHLEN, "/proc/%d/root", init_pid);
        if (ret < 0 || ret >= MAXPATHLEN)
-               goto out;
-       remove_trailing_slashes(path);
+               return false;
+
+       if (chroot(chrootpath) < 0)
+               exit(1);
+       if (chdir("/") < 0)
+               exit(1);
+       /* remove path if it exists */
+       if(faccessat(AT_FDCWD, path, F_OK, AT_SYMLINK_NOFOLLOW) == 0) {
+               if (unlink(path) < 0) {
+                       ERROR("unlink failed");
+                       exit(1);
+               }
+       }
+       if (!add)
+               exit(0);
+
+       /* create any missing directories */
+       directory_path = dirname(strdup(path));
+       if (mkdir_p(directory_path, 0755) < 0 && errno != EEXIST) {
+               ERROR("failed to create directory");
+               exit(1);
+       }
+
+       /* create the device node */
+       if (mknod(path, st->st_mode, st->st_rdev) < 0) {
+               ERROR("mknod failed");
+               exit(1);
+       }
+
+       exit(0);
+}
+
+static bool add_remove_device_node(struct lxc_container *c, const char *src_path, const char *dest_path, bool add)
+{
+       int ret;
+       struct stat st;
+       char value[MAX_BUFFER];
+       const char *p;
+
+       /* make sure container is running */
+       if (!c->is_running(c)) {
+               ERROR("container is not running");
+               return false;
+       }
+
+       /* use src_path if dest_path is NULL otherwise use dest_path */
+       p = dest_path ? dest_path : src_path;
 
-       p = add ? src_path : path;
        /* make sure we can access p */
        if(access(p, F_OK) < 0 || stat(p, &st) < 0)
-               goto out;
+               return false;
 
        /* continue if path is character device or block device */
        if (S_ISCHR(st.st_mode))
@@ -3094,55 +3453,29 @@ static bool add_remove_device_node(struct lxc_container *c, const char *src_path
        else if (S_ISBLK(st.st_mode))
                ret = snprintf(value, MAX_BUFFER, "b %d:%d rwm", major(st.st_rdev), minor(st.st_rdev));
        else
-               goto out;
+               return false;
 
        /* check snprintf return code */
        if (ret < 0 || ret >= MAX_BUFFER)
-               goto out;
+               return false;
 
-       directory_path = dirname(strdup(path));
-       /* remove path and directory_path (if empty) */
-       if(access(path, F_OK) == 0) {
-               if (unlink(path) < 0) {
-                       ERROR("unlink failed");
-                       goto out;
-               }
-               if (rmdir(directory_path) < 0 && errno != ENOTEMPTY) {
-                       ERROR("rmdir failed");
-                       goto out;
-               }
-       }
+       if (!do_add_remove_node(c->init_pid(c), p, add, &st))
+               return false;
 
+       /* add or remove device to/from cgroup access list */
        if (add) {
-               /* create the missing directories */
-               if (mkdir_p(directory_path, 0755) < 0) {
-                       ERROR("failed to create directory");
-                       goto out;
-               }
-
-               /* create the device node */
-               if (mknod(path, st.st_mode, st.st_rdev) < 0) {
-                       ERROR("mknod failed");
-                       goto out;
-               }
-
-               /* add device node to device list */
                if (!c->set_cgroup_item(c, "devices.allow", value)) {
                        ERROR("set_cgroup_item failed while adding the device node");
-                       goto out;
+                       return false;
                }
        } else {
-               /* remove device node from device list */
                if (!c->set_cgroup_item(c, "devices.deny", value)) {
                        ERROR("set_cgroup_item failed while removing the device node");
-                       goto out;
+                       return false;
                }
        }
+
        return true;
-out:
-       if (directory_path)
-               free(directory_path);
-       return false;
 }
 
 static bool lxcapi_add_device_node(struct lxc_container *c, const char *src_path, const char *dest_path)
@@ -3163,6 +3496,469 @@ static bool lxcapi_remove_device_node(struct lxc_container *c, const char *src_p
        return add_remove_device_node(c, src_path, dest_path, false);
 }
 
+struct criu_opts {
+       /* The type of criu invocation, one of "dump" or "restore" */
+       char *action;
+
+       /* The directory to pass to criu */
+       char *directory;
+
+       /* The container to dump */
+       struct lxc_container *c;
+
+       /* Enable criu verbose mode? */
+       bool verbose;
+
+       /* dump: stop the container or not after dumping? */
+       bool stop;
+
+       /* restore: the file to write the init process' pid into */
+       char *pidfile;
+};
+
+/*
+ * @out must be 128 bytes long
+ */
+static int read_criu_file(const char *directory, const char *file, int netnr, char *out)
+{
+       char path[PATH_MAX];
+       int ret;
+       FILE *f;
+
+       ret = snprintf(path, PATH_MAX,  "%s/%s%d", directory, file, netnr);
+       if (ret < 0 || ret >= PATH_MAX) {
+               ERROR("%s: path too long", __func__);
+               return -1;
+       }
+
+       f = fopen(path, "r");
+       if (!f)
+               return -1;
+
+       ret = fscanf(f, "%127s", out);
+       fclose(f);
+       if (ret <= 0)
+               return -1;
+
+       return 0;
+}
+
+static void exec_criu(struct criu_opts *opts)
+{
+       char **argv, log[PATH_MAX];
+       int static_args = 13, argc = 0, i, ret;
+
+       /* The command line always looks like:
+        * criu $(action) --tcp-established --file-locks --link-remap --manage-cgroups \
+        *     --action-script foo.sh -D $(directory) -o $(directory)/$(action).log
+        * +1 for final NULL */
+
+       if (strcmp(opts->action, "dump") == 0) {
+               /* -t pid */
+               static_args += 2;
+
+               /* --leave-running */
+               if (!opts->stop)
+                       static_args++;
+       } else if (strcmp(opts->action, "restore") == 0) {
+               /* --root $(lxc_mount_point) --restore-detached --pidfile $foo */
+               static_args += 5;
+       } else {
+               return;
+       }
+
+       if (opts->verbose)
+               static_args++;
+
+       ret = snprintf(log, PATH_MAX, "%s/%s.log", opts->directory, opts->action);
+       if (ret < 0 || ret >= PATH_MAX) {
+               ERROR("logfile name too long\n");
+               return;
+       }
+
+       argv = malloc(static_args * sizeof(*argv));
+       if (!argv)
+               return;
+
+       memset(argv, 0, static_args * sizeof(*argv));
+
+#define DECLARE_ARG(arg)                       \
+       do {                                    \
+               argv[argc++] = strdup(arg);     \
+               if (!argv[argc-1])              \
+                       goto err;               \
+       } while (0)
+
+       argv[argc++] = on_path("criu", NULL);
+       if (!argv[argc-1]) {
+               ERROR("Couldn't find criu binary\n");
+               goto err;
+       }
+
+       DECLARE_ARG(opts->action);
+       DECLARE_ARG("--tcp-established");
+       DECLARE_ARG("--file-locks");
+       DECLARE_ARG("--link-remap");
+       DECLARE_ARG("--manage-cgroups");
+       DECLARE_ARG("--action-script");
+       DECLARE_ARG(LIBEXECDIR "/lxc/lxc-restore-net");
+       DECLARE_ARG("-D");
+       DECLARE_ARG(opts->directory);
+       DECLARE_ARG("-o");
+       DECLARE_ARG(log);
+
+       if (opts->verbose)
+               DECLARE_ARG("-vvvvvv");
+
+       if (strcmp(opts->action, "dump") == 0) {
+               char pid[32];
+
+               if (sprintf(pid, "%d", lxcapi_init_pid(opts->c)) < 0)
+                       goto err;
+
+               DECLARE_ARG("-t");
+               DECLARE_ARG(pid);
+               if (!opts->stop)
+                       DECLARE_ARG("--leave-running");
+       } else if (strcmp(opts->action, "restore") == 0) {
+               int netnr = 0;
+               struct lxc_list *it;
+
+               DECLARE_ARG("--root");
+               DECLARE_ARG(opts->c->lxc_conf->rootfs.mount);
+               DECLARE_ARG("--restore-detached");
+               DECLARE_ARG("--pidfile");
+               DECLARE_ARG(opts->pidfile);
+
+               lxc_list_for_each(it, &opts->c->lxc_conf->network) {
+                       char eth[128], veth[128], buf[257];
+                       void *m;
+
+                       if (read_criu_file(opts->directory, "veth", netnr, veth))
+                               goto err;
+                       if (read_criu_file(opts->directory, "eth", netnr, eth))
+                               goto err;
+                       ret = snprintf(buf, 257, "%s=%s", eth, veth);
+                       if (ret < 0 || ret >= 257)
+                               goto err;
+
+                       /* final NULL and --veth-pair eth0:vethASDF */
+                       m = realloc(argv, (argc + 1 + 2) * sizeof(*argv));
+                       if (!m)
+                               goto err;
+                       argv = m;
+
+                       DECLARE_ARG("--veth-pair");
+                       DECLARE_ARG(buf);
+                       argv[argc] = NULL;
+
+                       netnr++;
+               }
+       }
+
+#undef DECLARE_ARG
+
+       execv(argv[0], argv);
+err:
+       for (i = 0; argv[i]; i++)
+               free(argv[i]);
+       free(argv);
+}
+
+/* Check and make sure the container has a configuration that we know CRIU can
+ * dump. */
+static bool criu_ok(struct lxc_container *c)
+{
+       struct lxc_list *it;
+       bool found_deny_rule = false;
+
+       if (geteuid()) {
+               ERROR("Must be root to checkpoint\n");
+               return false;
+       }
+
+       /* We only know how to restore containers with veth networks. */
+       lxc_list_for_each(it, &c->lxc_conf->network) {
+               struct lxc_netdev *n = it->elem;
+               if (n->type != LXC_NET_VETH && n->type != LXC_NET_NONE) {
+                       ERROR("Found network that is not VETH or NONE\n");
+                       return false;
+               }
+       }
+
+       // These requirements come from http://criu.org/LXC
+       if (c->lxc_conf->console.path &&
+                       strcmp(c->lxc_conf->console.path, "none") != 0) {
+               ERROR("lxc.console must be none\n");
+               return false;
+       }
+
+       if (c->lxc_conf->tty != 0) {
+               ERROR("lxc.tty must be 0\n");
+               return false;
+       }
+
+       lxc_list_for_each(it, &c->lxc_conf->cgroup) {
+               struct lxc_cgroup *cg = it->elem;
+               if (strcmp(cg->subsystem, "devices.deny") == 0 &&
+                               strcmp(cg->value, "c 5:1 rwm") == 0) {
+
+                       found_deny_rule = true;
+                       break;
+               }
+       }
+
+       if (!found_deny_rule) {
+               ERROR("couldn't find devices.deny = c 5:1 rwm");
+               return false;
+       }
+
+       return true;
+}
+
+static bool lxcapi_checkpoint(struct lxc_container *c, char *directory, bool stop, bool verbose)
+{
+       int netnr, status;
+       struct lxc_list *it;
+       bool error = false;
+       pid_t pid;
+
+       if (!criu_ok(c))
+               return false;
+
+       if (mkdir(directory, 0700) < 0 && errno != EEXIST)
+               return false;
+
+       netnr = 0;
+       lxc_list_for_each(it, &c->lxc_conf->network) {
+               char *veth = NULL, *bridge = NULL, veth_path[PATH_MAX], eth[128];
+               struct lxc_netdev *n = it->elem;
+               int pret;
+
+               pret = snprintf(veth_path, PATH_MAX, "lxc.network.%d.veth.pair", netnr);
+               if (pret < 0 || pret >= PATH_MAX) {
+                       error = true;
+                       goto out;
+               }
+
+               veth = lxcapi_get_running_config_item(c, veth_path);
+               if (!veth) {
+                       /* criu_ok() checks that all interfaces are
+                        * LXC_NET{VETH,NONE}, and VETHs should have this
+                        * config */
+                       assert(n->type == LXC_NET_NONE);
+                       break;
+               }
+
+               pret = snprintf(veth_path, PATH_MAX, "lxc.network.%d.link", netnr);
+               if (pret < 0 || pret >= PATH_MAX) {
+                       error = true;
+                       goto out;
+               }
+
+               bridge = lxcapi_get_running_config_item(c, veth_path);
+               if (!bridge) {
+                       error = true;
+                       goto out;
+               }
+
+               pret = snprintf(veth_path, PATH_MAX, "%s/veth%d", directory, netnr);
+               if (pret < 0 || pret >= PATH_MAX || print_to_file(veth_path, veth) < 0) {
+                       error = true;
+                       goto out;
+               }
+
+               pret = snprintf(veth_path, PATH_MAX, "%s/bridge%d", directory, netnr);
+               if (pret < 0 || pret >= PATH_MAX || print_to_file(veth_path, bridge) < 0) {
+                       error = true;
+                       goto out;
+               }
+
+               if (n->name) {
+                       if (strlen(n->name) >= 128) {
+                               error = true;
+                               goto out;
+                       }
+                       strncpy(eth, n->name, 128);
+               } else
+                       sprintf(eth, "eth%d", netnr);
+
+               pret = snprintf(veth_path, PATH_MAX, "%s/eth%d", directory, netnr);
+               if (pret < 0 || pret >= PATH_MAX || print_to_file(veth_path, eth) < 0)
+                       error = true;
+
+out:
+               free(veth);
+               free(bridge);
+               if (error)
+                       return false;
+       }
+
+       pid = fork();
+       if (pid < 0)
+               return false;
+
+       if (pid == 0) {
+               struct criu_opts os;
+
+               os.action = "dump";
+               os.directory = directory;
+               os.c = c;
+               os.stop = stop;
+               os.verbose = verbose;
+
+               /* exec_criu() returning is an error */
+               exec_criu(&os);
+               exit(1);
+       } else {
+               pid_t w = waitpid(pid, &status, 0);
+               if (w == -1) {
+                       perror("waitpid");
+                       return false;
+               }
+
+               if (WIFEXITED(status)) {
+                       return !WEXITSTATUS(status);
+               }
+
+               return false;
+       }
+}
+
+static bool lxcapi_restore(struct lxc_container *c, char *directory, bool verbose)
+{
+       pid_t pid;
+       struct lxc_list *it;
+       struct lxc_rootfs *rootfs;
+       char pidfile[L_tmpnam];
+
+       if (!criu_ok(c))
+               return false;
+
+       if (geteuid()) {
+               ERROR("Must be root to restore\n");
+               return false;
+       }
+
+       if (!tmpnam(pidfile))
+               return false;
+
+       struct lxc_handler *handler;
+
+       handler = lxc_init(c->name, c->lxc_conf, c->config_path);
+       if (!handler)
+               return false;
+
+       pid = fork();
+       if (pid < 0)
+               return false;
+
+       if (pid == 0) {
+               struct criu_opts os;
+
+               if (unshare(CLONE_NEWNS))
+                       return false;
+
+               /* CRIU needs the lxc root bind mounted so that it is the root of some
+                * mount. */
+               rootfs = &c->lxc_conf->rootfs;
+
+               if (rootfs_is_blockdev(c->lxc_conf)) {
+                       if (do_rootfs_setup(c->lxc_conf, c->name, c->config_path) < 0)
+                               return false;
+               }
+               else {
+                       if (mkdir(rootfs->mount, 0755) < 0 && errno != EEXIST)
+                               return false;
+
+                       if (mount(rootfs->path, rootfs->mount, NULL, MS_BIND, NULL) < 0) {
+                               rmdir(rootfs->mount);
+                               return false;
+                       }
+               }
+
+               os.action = "restore";
+               os.directory = directory;
+               os.c = c;
+               os.pidfile = pidfile;
+               os.verbose = verbose;
+
+               /* exec_criu() returning is an error */
+               exec_criu(&os);
+               umount(rootfs->mount);
+               rmdir(rootfs->mount);
+               exit(1);
+       } else {
+               int status;
+               pid_t w = waitpid(pid, &status, 0);
+
+               if (w == -1) {
+                       perror("waitpid");
+                       return false;
+               }
+
+               if (WIFEXITED(status)) {
+                       if (WEXITSTATUS(status)) {
+                               return false;
+                       }
+                       else {
+                               int netnr = 0, ret;
+                               bool error = false;
+                               FILE *f = fopen(pidfile, "r");
+                               if (!f) {
+                                       perror("reading pidfile");
+                                       ERROR("couldn't read restore's init pidfile %s\n", pidfile);
+                                       return false;
+                               }
+
+                               ret = fscanf(f, "%d", (int*) &handler->pid);
+                               fclose(f);
+                               if (ret != 1) {
+                                       ERROR("reading restore pid failed");
+                                       return false;
+                               }
+
+                               if (container_mem_lock(c))
+                                       return false;
+
+                               lxc_list_for_each(it, &c->lxc_conf->network) {
+                                       char eth[128], veth[128];
+                                       struct lxc_netdev *netdev = it->elem;
+
+                                       if (read_criu_file(directory, "veth", netnr, veth)) {
+                                               error = true;
+                                               goto out_unlock;
+                                       }
+                                       if (read_criu_file(directory, "eth", netnr, eth)) {
+                                               error = true;
+                                               goto out_unlock;
+                                       }
+                                       netdev->priv.veth_attr.pair = strdup(veth);
+                                       if (!netdev->priv.veth_attr.pair) {
+                                               error = true;
+                                               goto out_unlock;
+                                       }
+                                       netnr++;
+                               }
+out_unlock:
+                               container_mem_unlock(c);
+                               if (error)
+                                       return false;
+
+                               if (lxc_set_state(c->name, handler, RUNNING))
+                                       return false;
+                       }
+               }
+
+               if (lxc_poll(c->name, handler)) {
+                       lxc_abort(c->name, handler);
+                       return false;
+               }
+       }
+
+       return true;
+}
+
 static int lxcapi_attach_run_waitl(struct lxc_container *c, lxc_attach_options_t *options, const char *program, const char *arg, ...)
 {
        va_list ap;
@@ -3191,6 +3987,9 @@ struct lxc_container *lxc_container_new(const char *name, const char *configpath
 {
        struct lxc_container *c;
 
+       if (!name)
+               return NULL;
+
        c = malloc(sizeof(*c));
        if (!c) {
                fprintf(stderr, "failed to malloc lxc_container\n");
@@ -3204,7 +4003,7 @@ struct lxc_container *lxc_container_new(const char *name, const char *configpath
                c->config_path = strdup(lxc_global_config_value("lxc.lxcpath"));
 
        if (!c->config_path) {
-               fprintf(stderr, "Out of memory");
+               fprintf(stderr, "Out of memory\n");
                goto err;
        }
 
@@ -3232,12 +4031,12 @@ struct lxc_container *lxc_container_new(const char *name, const char *configpath
                goto err;
        }
 
-       if (file_exists(c->configfile))
-               lxcapi_load_config(c, NULL);
+       if (file_exists(c->configfile) && !lxcapi_load_config(c, NULL))
+               goto err;
 
        if (ongoing_create(c) == 2) {
                ERROR("Error: %s creation was not completed", c->name);
-               lxcapi_destroy(c);
+               container_destroy(c);
                lxcapi_clear_config(c);
        }
        c->daemonize = true;
@@ -3262,6 +4061,7 @@ struct lxc_container *lxc_container_new(const char *name, const char *configpath
        c->wait = lxcapi_wait;
        c->set_config_item = lxcapi_set_config_item;
        c->destroy = lxcapi_destroy;
+       c->destroy_with_snapshots = lxcapi_destroy_with_snapshots;
        c->rename = lxcapi_rename;
        c->save_config = lxcapi_save_config;
        c->get_keys = lxcapi_get_keys;
@@ -3287,9 +4087,12 @@ struct lxc_container *lxc_container_new(const char *name, const char *configpath
        c->snapshot_list = lxcapi_snapshot_list;
        c->snapshot_restore = lxcapi_snapshot_restore;
        c->snapshot_destroy = lxcapi_snapshot_destroy;
+       c->snapshot_destroy_all = lxcapi_snapshot_destroy_all;
        c->may_control = lxcapi_may_control;
        c->add_device_node = lxcapi_add_device_node;
        c->remove_device_node = lxcapi_remove_device_node;
+       c->checkpoint = lxcapi_checkpoint;
+       c->restore = lxcapi_restore;
 
        /* we'll allow the caller to update these later */
        if (lxc_log_init(NULL, "none", NULL, "lxc_container", 0, c->config_path)) {