cgfsng: s/25/INTTYPE_TO_STRLEN(pid_t)/g

[mirror_lxc.git] / src / lxc / cgroups / cgfsng.c
diff --git a/src/lxc/cgroups/cgfsng.c b/src/lxc/cgroups/cgfsng.c

index 5dee010de9e19c4309058595e4b8f3517f991175..7388ad7675525a4a2fc112f9499c42b5b9f4cf71 100644 (file)
--- a/src/lxc/cgroups/cgfsng.c
+++ b/src/lxc/cgroups/cgfsng.c
@@ -55,142 +55,19 @@
  #include "commands.h"
  #include "conf.h"
  #include "log.h"
+#include "macro.h"
  #include "storage/storage.h"
  #include "utils.h"
  
-lxc_log_define(lxc_cgfsng, lxc);
+#ifndef HAVE_STRLCPY
+#include "include/strlcpy.h"
+#endif
  
-static struct cgroup_ops cgfsng_ops;
+#ifndef HAVE_STRLCAT
+#include "include/strlcat.h"
+#endif
  
-/* A descriptor for a mounted hierarchy
- *
- * @controllers
- * - legacy hierarchy
- *   Either NULL, or a null-terminated list of all the co-mounted controllers.
- * - unified hierarchy
- *   Either NULL, or a null-terminated list of all enabled controllers.
- *
- * @mountpoint
- * - The mountpoint we will use.
- * - legacy hierarchy
- *   It will be either /sys/fs/cgroup/controller or
- *   /sys/fs/cgroup/controllerlist.
- * - unified hierarchy
- *   It will either be /sys/fs/cgroup or /sys/fs/cgroup/<mountpoint-name>
- *   depending on whether this is a hybrid cgroup layout (mix of legacy and
- *   unified hierarchies) or a pure unified cgroup layout.
- *
- * @base_cgroup
- * - The cgroup under which the container cgroup path
- *   is created. This will be either the caller's cgroup (if not root), or
- *   init's cgroup (if root).
- *
- * @fullcgpath
- * - The full path to the containers cgroup.
- *
- * @version
- * - legacy hierarchy
- *   If the hierarchy is a legacy hierarchy this will be set to
- *   CGROUP_SUPER_MAGIC.
- * - unified hierarchy
- *   If the hierarchy is a legacy hierarchy this will be set to
- *   CGROUP2_SUPER_MAGIC.
- */
-struct hierarchy {
-       char **controllers;
-       char *mountpoint;
-       char *base_cgroup;
-       char *fullcgpath;
-       int version;
-};
-
-/* The cgroup data which is attached to the lxc_handler.
- *
- * @cgroup_pattern
- * - A copy of lxc.cgroup.pattern.
- *
- * @container_cgroup
- * - If not null, the cgroup which was created for the container. For each
- *   hierarchy, it is created under the @hierarchy->base_cgroup directory.
- *   Relative to the base_cgroup it is the same for all hierarchies.
- *
- * @name
- * - The name of the container.
- *
- * @cgroup_meta
- * - A copy of the container's cgroup information. This overrides
- *   @cgroup_pattern.
- *
- * @cgroup_layout
- * - What cgroup layout the container is running with.
- *   - CGROUP_LAYOUT_UNKNOWN
- *     The cgroup layout could not be determined. This should be treated as an
- *     error condition.
- *   - CGROUP_LAYOUT_LEGACY
- *     The container is running with all controllers mounted into legacy cgroup
- *     hierarchies.
- *   - CGROUP_LAYOUT_HYBRID
- *     The container is running with at least one controller mounted into a
- *     legacy cgroup hierarchy and a mountpoint for the unified hierarchy.  The
- *     unified hierarchy can be empty (no controllers enabled) or non-empty
- *     (controllers enabled).
- *   - CGROUP_LAYOUT_UNIFIED
- *     The container is running on a pure unified cgroup hierarchy. The unified
- *     hierarchy can be empty (no controllers enabled) or non-empty (controllers
- *     enabled).
- */
-struct cgfsng_handler_data {
-       char *cgroup_pattern;
-       char *container_cgroup; /* cgroup we created for the container */
-       char *name; /* container name */
-       /* per-container cgroup information */
-       struct lxc_cgroup cgroup_meta;
-       cgroup_layout_t cgroup_layout;
-};
-
-/* @hierarchies
- * - A NULL-terminated array of struct hierarchy, one per legacy hierarchy. No
- *   duplicates. First sufficient, writeable mounted hierarchy wins.
- */
-struct hierarchy **hierarchies;
-/* Pointer to the unified hierarchy in the null terminated list @hierarchies.
- * This is merely a convenience for hybrid cgroup layouts to easily retrieve the
- * unified hierarchy without iterating throught @hierarchies.
- */
-struct hierarchy *unified;
-/*
- * @cgroup_layout
- * - What cgroup layout the container is running with.
- *   - CGROUP_LAYOUT_UNKNOWN
- *     The cgroup layout could not be determined. This should be treated as an
- *     error condition.
- *   - CGROUP_LAYOUT_LEGACY
- *     The container is running with all controllers mounted into legacy cgroup
- *     hierarchies.
- *   - CGROUP_LAYOUT_HYBRID
- *     The container is running with at least one controller mounted into a
- *     legacy cgroup hierarchy and a mountpoint for the unified hierarchy.  The
- *     unified hierarchy can be empty (no controllers enabled) or non-empty
- *     (controllers enabled).
- *   - CGROUP_LAYOUT_UNIFIED
- *     The container is running on a pure unified cgroup hierarchy. The unified
- *     hierarchy can be empty (no controllers enabled) or non-empty (controllers
- *     enabled).
- */
-cgroup_layout_t cgroup_layout;
-/* What controllers is the container supposed to use. */
-char *cgroup_use;
-
-/* @lxc_cgfsng_debug
- * - Whether to print debug info to stdout for the cgfsng driver.
- */
-static bool lxc_cgfsng_debug;
-
-#define CGFSNG_DEBUG(format, ...)                                              \
-       do {                                                                   \
-               if (lxc_cgfsng_debug)                                          \
-                       printf("cgfsng: " format, ##__VA_ARGS__);              \
-       } while (0)
+lxc_log_define(cgfsng, cgroup);
  
  static void free_string_list(char **clist)
  {
@@ -257,8 +134,9 @@ static char *cg_legacy_must_prefix_named(char *entry)
         len = strlen(entry);
         prefixed = must_alloc(len + 6);
  
-       memcpy(prefixed, "name=", sizeof("name="));
-       memcpy(prefixed + sizeof("name="), entry, len);
+
+       memcpy(prefixed, "name=", STRLITERALLEN("name="));
+       memcpy(prefixed + STRLITERALLEN("name="), entry, len);
         prefixed[len + 5] = '\0';
         return prefixed;
  }
@@ -298,42 +176,39 @@ static void must_append_controller(char **klist, char **nlist, char ***clist,
         (*clist)[newentry] = copy;
  }
  
-static void free_handler_data(struct cgfsng_handler_data *d)
-{
-       free(d->cgroup_pattern);
-       free(d->container_cgroup);
-       free(d->name);
-       if (d->cgroup_meta.dir)
-               free(d->cgroup_meta.dir);
-       if (d->cgroup_meta.controllers)
-               free(d->cgroup_meta.controllers);
-       free(d);
-}
-
  /* Given a handler's cgroup data, return the struct hierarchy for the controller
   * @c, or NULL if there is none.
   */
-struct hierarchy *get_hierarchy(const char *c)
+struct hierarchy *get_hierarchy(struct cgroup_ops *ops, const char *controller)
  {
         int i;
  
-       if (!hierarchies)
+       errno = ENOENT;
+
+       if (!ops->hierarchies) {
+               TRACE("There are no useable cgroup controllers");
                 return NULL;
+       }
  
-       for (i = 0; hierarchies[i]; i++) {
-               if (!c) {
+       for (i = 0; ops->hierarchies[i]; i++) {
+               if (!controller) {
                         /* This is the empty unified hierarchy. */
-                       if (hierarchies[i]->controllers &&
-                           !hierarchies[i]->controllers[0])
-                               return hierarchies[i];
+                       if (ops->hierarchies[i]->controllers &&
+                           !ops->hierarchies[i]->controllers[0])
+                               return ops->hierarchies[i];
  
-                       return NULL;
+                       continue;
                 }
  
-               if (string_in_list(hierarchies[i]->controllers, c))
-                       return hierarchies[i];
+               if (string_in_list(ops->hierarchies[i]->controllers, controller))
+                       return ops->hierarchies[i];
         }
  
+       if (controller)
+               WARN("There is no useable %s controller", controller);
+       else
+               WARN("There is no empty unified cgroup hierarchy");
+
         return NULL;
  }
  
@@ -410,14 +285,13 @@ static uint32_t *lxc_cpumask(char *buf, size_t nbits)
         char *token;
         size_t arrlen;
         uint32_t *bitarr;
-       char *saveptr = NULL;
  
         arrlen = BITS_TO_LONGS(nbits);
         bitarr = calloc(arrlen, sizeof(uint32_t));
         if (!bitarr)
                 return NULL;
  
-       for (; (token = strtok_r(buf, ",", &saveptr)); buf = NULL) {
+       lxc_iterate_parts(token, buf, ",") {
                 errno = 0;
                 unsigned end, start;
                 char *range;
@@ -451,14 +325,14 @@ static char *lxc_cpumask_to_cpulist(uint32_t *bitarr, size_t nbits)
         int ret;
         size_t i;
         char **cpulist = NULL;
-       char numstr[LXC_NUMSTRLEN64] = {0};
+       char numstr[INTTYPE_TO_STRLEN(size_t)] = {0};
  
         for (i = 0; i <= nbits; i++) {
                 if (!is_set(i, bitarr))
                         continue;
  
-               ret = snprintf(numstr, LXC_NUMSTRLEN64, "%zu", i);
-               if (ret < 0 || (size_t)ret >= LXC_NUMSTRLEN64) {
+               ret = snprintf(numstr, sizeof(numstr), "%zu", i);
+               if (ret < 0 || (size_t)ret >= sizeof(numstr)) {
                         lxc_free_array((void **)cpulist, free);
                         return NULL;
                 }
@@ -534,7 +408,7 @@ static bool cg_legacy_filter_and_set_cpus(char *path, bool am_initialized)
  
         /* Get maximum number of cpus found in possible cpuset. */
         maxposs = get_max_cpus(posscpus);
-       if (maxposs < 0)
+       if (maxposs < 0 || maxposs >= INT_MAX - 1)
                 goto on_error;
  
         if (!file_exists(__ISOL_CPUS)) {
@@ -579,7 +453,7 @@ static bool cg_legacy_filter_and_set_cpus(char *path, bool am_initialized)
  
         /* Get maximum number of cpus found in isolated cpuset. */
         maxisol = get_max_cpus(isolcpus);
-       if (maxisol < 0)
+       if (maxisol < 0 || maxisol >= INT_MAX - 1)
                 goto on_error;
  
         if (maxposs < maxisol)
@@ -622,7 +496,7 @@ copy_parent:
         *lastslash = oldv;
         free(fpath);
         fpath = must_make_path(path, "cpuset.cpus", NULL);
-       ret = lxc_write_to_file(fpath, cpulist, strlen(cpulist), false);
+       ret = lxc_write_to_file(fpath, cpulist, strlen(cpulist), false, 0666);
         if (ret < 0) {
                 SYSERROR("Failed to write cpu list to \"%s\"", fpath);
                 goto on_error;
@@ -673,7 +547,7 @@ static bool copy_parent_file(char *path, char *file)
  
         *lastslash = oldv;
         fpath = must_make_path(path, file, NULL);
-       ret = lxc_write_to_file(fpath, value, len, false);
+       ret = lxc_write_to_file(fpath, value, len, false, 0666);
         if (ret < 0)
                 SYSERROR("Failed to write \"%s\" to file \"%s\"", value, fpath);
         free(fpath);
@@ -707,7 +581,7 @@ static bool cg_legacy_handle_cpuset_hierarchy(struct hierarchy *h, char *cgname)
         if (slash)
                 *slash = '\0';
  
-       cgpath = must_make_path(h->mountpoint, h->base_cgroup, cgname, NULL);
+       cgpath = must_make_path(h->mountpoint, h->container_base_path, cgname, NULL);
         if (slash)
                 *slash = '/';
  
@@ -720,8 +594,7 @@ static bool cg_legacy_handle_cpuset_hierarchy(struct hierarchy *h, char *cgname)
                 }
         }
  
-       clonechildrenpath =
-           must_make_path(cgpath, "cgroup.clone_children", NULL);
+       clonechildrenpath = must_make_path(cgpath, "cgroup.clone_children", NULL);
         /* unified hierarchy doesn't have clone_children */
         if (!file_exists(clonechildrenpath)) {
                 free(clonechildrenpath);
@@ -762,7 +635,7 @@ static bool cg_legacy_handle_cpuset_hierarchy(struct hierarchy *h, char *cgname)
         }
         free(cgpath);
  
-       ret = lxc_write_to_file(clonechildrenpath, "1", 1, false);
+       ret = lxc_write_to_file(clonechildrenpath, "1", 1, false, 0666);
         if (ret < 0) {
                 /* Set clone_children so children inherit our settings */
                 SYSERROR("Failed to write 1 to \"%s\"", clonechildrenpath);
@@ -829,27 +702,24 @@ static bool controller_found(struct hierarchy **hlist, char *entry)
  /* Return true if all of the controllers which we require have been found.  The
   * required list is  freezer and anything in lxc.cgroup.use.
   */
-static bool all_controllers_found(void)
+static bool all_controllers_found(struct cgroup_ops *ops)
  {
-       char *p;
-       char *saveptr = NULL;
-       struct hierarchy **hlist = hierarchies;
+       char **cur;
+       struct hierarchy **hlist = ops->hierarchies;
  
         if (!controller_found(hlist, "freezer")) {
-               CGFSNG_DEBUG("No freezer controller mountpoint found\n");
+               ERROR("No freezer controller mountpoint found");
                 return false;
         }
  
-       if (!cgroup_use)
+       if (!ops->cgroup_use)
                 return true;
  
-       for (p = strtok_r(cgroup_use, ",", &saveptr); p;
-            p = strtok_r(NULL, ",", &saveptr)) {
-               if (!controller_found(hlist, p)) {
-                       CGFSNG_DEBUG("No %s controller mountpoint found\n", p);
+       for (cur = ops->cgroup_use; cur && *cur; cur++)
+               if (!controller_found(hlist, *cur)) {
+                       ERROR("No %s controller mountpoint found", *cur);
                         return false;
                 }
-       }
  
         return true;
  }
@@ -867,7 +737,7 @@ static char **cg_hybrid_get_controllers(char **klist, char **nlist, char *line,
          */
         int i;
         char *dup, *p2, *tok;
-       char *p = line, *saveptr = NULL, *sep = ",";
+       char *p = line, *sep = ",";
         char **aret = NULL;
  
         for (i = 0; i < 4; i++) {
@@ -881,29 +751,30 @@ static char **cg_hybrid_get_controllers(char **klist, char **nlist, char *line,
          * verify /sys/fs/cgroup/ in this field.
          */
         if (strncmp(p, "/sys/fs/cgroup/", 15) != 0) {
-               CGFSNG_DEBUG("Found hierarchy not under /sys/fs/cgroup: \"%s\"\n", p);
+               ERROR("Found hierarchy not under /sys/fs/cgroup: \"%s\"", p);
                 return NULL;
         }
  
         p += 15;
         p2 = strchr(p, ' ');
         if (!p2) {
-               CGFSNG_DEBUG("Corrupt mountinfo\n");
+               ERROR("Corrupt mountinfo");
                 return NULL;
         }
         *p2 = '\0';
  
         if (type == CGROUP_SUPER_MAGIC) {
-               /* strdup() here for v1 hierarchies. Otherwise strtok_r() will
-                * destroy mountpoints such as "/sys/fs/cgroup/cpu,cpuacct".
+               /* strdup() here for v1 hierarchies. Otherwise
+                * lxc_iterate_parts() will destroy mountpoints such as
+                * "/sys/fs/cgroup/cpu,cpuacct".
                  */
                 dup = strdup(p);
                 if (!dup)
                         return NULL;
  
-               for (tok = strtok_r(dup, sep, &saveptr); tok;
-                    tok = strtok_r(NULL, sep, &saveptr))
+               lxc_iterate_parts(tok, dup, sep) {
                         must_append_controller(klist, nlist, &aret, tok);
+               }
  
                 free(dup);
         }
@@ -925,15 +796,14 @@ static char **cg_unified_make_empty_controller(void)
  static char **cg_unified_get_controllers(const char *file)
  {
         char *buf, *tok;
-       char *saveptr = NULL, *sep = " \t\n";
+       char *sep = " \t\n";
         char **aret = NULL;
  
         buf = read_file(file);
         if (!buf)
                 return NULL;
  
-       for (tok = strtok_r(buf, sep, &saveptr); tok;
-            tok = strtok_r(NULL, sep, &saveptr)) {
+       lxc_iterate_parts(tok, buf, sep) {
                 int newentry;
                 char *copy;
  
@@ -946,8 +816,8 @@ static char **cg_unified_get_controllers(const char *file)
         return aret;
  }
  
-static struct hierarchy *add_hierarchy(char **clist, char *mountpoint,
-                                      char *base_cgroup, int type)
+static struct hierarchy *add_hierarchy(struct hierarchy ***h, char **clist, char *mountpoint,
+                                      char *container_base_path, int type)
  {
         struct hierarchy *new;
         int newentry;
@@ -955,12 +825,13 @@ static struct hierarchy *add_hierarchy(char **clist, char *mountpoint,
         new = must_alloc(sizeof(*new));
         new->controllers = clist;
         new->mountpoint = mountpoint;
-       new->base_cgroup = base_cgroup;
-       new->fullcgpath = NULL;
+       new->container_base_path = container_base_path;
+       new->container_full_path = NULL;
+       new->monitor_full_path = NULL;
         new->version = type;
  
-       newentry = append_null_to_list((void ***)&hierarchies);
-       hierarchies[newentry] = new;
+       newentry = append_null_to_list((void ***)h);
+       (*h)[newentry] = new;
         return new;
  }
  
@@ -1017,7 +888,7 @@ static char *copy_to_eol(char *p)
   */
  static bool controller_in_clist(char *cgline, char *c)
  {
-       char *tok, *saveptr = NULL, *eol, *tmp;
+       char *tok, *eol, *tmp;
         size_t len;
  
         eol = strchr(cgline, ':');
@@ -1029,8 +900,7 @@ static bool controller_in_clist(char *cgline, char *c)
         memcpy(tmp, cgline, len);
         tmp[len] = '\0';
  
-       for (tok = strtok_r(tmp, ",", &saveptr); tok;
-            tok = strtok_r(NULL, ",", &saveptr)) {
+       lxc_iterate_parts(tok, tmp, ",") {
                 if (strcmp(tok, c) == 0)
                         return true;
         }
@@ -1094,7 +964,7 @@ static int get_existing_subsystems(char ***klist, char ***nlist)
                 return -1;
  
         while (getline(&line, &len, f) != -1) {
-               char *p, *p2, *tok, *saveptr = NULL;
+               char *p, *p2, *tok;
                 p = strchr(line, ':');
                 if (!p)
                         continue;
@@ -1116,8 +986,7 @@ static int get_existing_subsystems(char ***klist, char ***nlist)
                         continue;
                 }
  
-               for (tok = strtok_r(p, ",", &saveptr); tok;
-                    tok = strtok_r(NULL, ",", &saveptr)) {
+               lxc_iterate_parts(tok, p, ",") {
                         if (strncmp(tok, "name=", 5) == 0)
                                 must_append_string(nlist, tok);
                         else
@@ -1139,39 +1008,26 @@ static void trim(char *s)
                 s[--len] = '\0';
  }
  
-static void lxc_cgfsng_print_handler_data(const struct cgfsng_handler_data *d)
-{
-       printf("Cgroup information:\n");
-       printf("  container name: %s\n", d->name ? d->name : "(null)");
-       printf("  lxc.cgroup.use: %s\n", cgroup_use ? cgroup_use : "(null)");
-       printf("  lxc.cgroup.pattern: %s\n",
-              d->cgroup_pattern ? d->cgroup_pattern : "(null)");
-       printf("  lxc.cgroup.dir: %s\n",
-              d->cgroup_meta.dir ? d->cgroup_meta.dir : "(null)");
-       printf("  cgroup: %s\n",
-              d->container_cgroup ? d->container_cgroup : "(null)");
-}
-
-static void lxc_cgfsng_print_hierarchies()
+static void lxc_cgfsng_print_hierarchies(struct cgroup_ops *ops)
  {
         int i;
         struct hierarchy **it;
  
-       if (!hierarchies) {
-               printf("  No hierarchies found\n");
+       if (!ops->hierarchies) {
+               TRACE("  No hierarchies found");
                 return;
         }
  
-       printf("  Hierarchies:\n");
-       for (i = 0, it = hierarchies; it && *it; it++, i++) {
+       TRACE("  Hierarchies:");
+       for (i = 0, it = ops->hierarchies; it && *it; it++, i++) {
                 int j;
                 char **cit;
  
-               printf("  %d: base_cgroup: %s\n", i, (*it)->base_cgroup ? (*it)->base_cgroup : "(null)");
-               printf("      mountpoint:  %s\n", (*it)->mountpoint ? (*it)->mountpoint : "(null)");
-               printf("      controllers:\n");
+               TRACE("  %d: base_cgroup: %s", i, (*it)->container_base_path ? (*it)->container_base_path : "(null)");
+               TRACE("      mountpoint:  %s", (*it)->mountpoint ? (*it)->mountpoint : "(null)");
+               TRACE("      controllers:");
                 for (j = 0, cit = (*it)->controllers; cit && *cit; cit++, j++)
-                       printf("      %d: %s\n", j, *cit);
+                       TRACE("      %d: %s", j, *cit);
         }
  }
  
@@ -1181,441 +1037,518 @@ static void lxc_cgfsng_print_basecg_debuginfo(char *basecginfo, char **klist,
         int k;
         char **it;
  
-       printf("basecginfo is:\n");
-       printf("%s\n", basecginfo);
+       TRACE("basecginfo is:");
+       TRACE("%s", basecginfo);
  
         for (k = 0, it = klist; it && *it; it++, k++)
-               printf("kernel subsystem %d: %s\n", k, *it);
+               TRACE("kernel subsystem %d: %s", k, *it);
  
         for (k = 0, it = nlist; it && *it; it++, k++)
-               printf("named subsystem %d: %s\n", k, *it);
+               TRACE("named subsystem %d: %s", k, *it);
  }
  
-static void lxc_cgfsng_print_debuginfo(const struct cgfsng_handler_data *d)
+static int cgroup_rmdir(struct hierarchy **hierarchies,
+                       const char *container_cgroup)
  {
-       lxc_cgfsng_print_handler_data(d);
-       lxc_cgfsng_print_hierarchies();
+       int i;
+
+       if (!container_cgroup || !hierarchies)
+               return 0;
+
+       for (i = 0; hierarchies[i]; i++) {
+               int ret;
+               struct hierarchy *h = hierarchies[i];
+
+               if (!h->container_full_path)
+                       continue;
+
+               ret = recursive_destroy(h->container_full_path);
+               if (ret < 0)
+                       WARN("Failed to destroy \"%s\"", h->container_full_path);
+
+               free(h->container_full_path);
+               h->container_full_path = NULL;
+       }
+
+       return 0;
  }
  
-/* At startup, parse_hierarchies finds all the info we need about cgroup
- * mountpoints and current cgroups, and stores it in @d.
- */
-static bool cg_hybrid_init(void)
+struct generic_userns_exec_data {
+       struct hierarchy **hierarchies;
+       const char *container_cgroup;
+       struct lxc_conf *conf;
+       uid_t origuid; /* target uid in parent namespace */
+       char *path;
+};
+
+static int cgroup_rmdir_wrapper(void *data)
  {
         int ret;
-       char *basecginfo;
-       bool will_escape;
-       FILE *f;
-       size_t len = 0;
-       char *line = NULL;
-       char **klist = NULL, **nlist = NULL;
+       struct generic_userns_exec_data *arg = data;
+       uid_t nsuid = (arg->conf->root_nsuid_map != NULL) ? 0 : arg->conf->init_uid;
+       gid_t nsgid = (arg->conf->root_nsgid_map != NULL) ? 0 : arg->conf->init_gid;
  
-       /* Root spawned containers escape the current cgroup, so use init's
-        * cgroups as our base in that case.
-        */
-       will_escape = (geteuid() == 0);
-       if (will_escape)
-               basecginfo = read_file("/proc/1/cgroup");
-       else
-               basecginfo = read_file("/proc/self/cgroup");
-       if (!basecginfo)
-               return false;
+       ret = setresgid(nsgid, nsgid, nsgid);
+       if (ret < 0) {
+               SYSERROR("Failed to setresgid(%d, %d, %d)", (int)nsgid,
+                        (int)nsgid, (int)nsgid);
+               return -1;
+       }
  
-       ret = get_existing_subsystems(&klist, &nlist);
+       ret = setresuid(nsuid, nsuid, nsuid);
         if (ret < 0) {
-               CGFSNG_DEBUG("Failed to retrieve available legacy cgroup controllers\n");
-               free(basecginfo);
-               return false;
+               SYSERROR("Failed to setresuid(%d, %d, %d)", (int)nsuid,
+                        (int)nsuid, (int)nsuid);
+               return -1;
         }
  
-       f = fopen("/proc/self/mountinfo", "r");
-       if (!f) {
-               CGFSNG_DEBUG("Failed to open \"/proc/self/mountinfo\"\n");
-               free(basecginfo);
-               return false;
+       ret = setgroups(0, NULL);
+       if (ret < 0 && errno != EPERM) {
+               SYSERROR("Failed to setgroups(0, NULL)");
+               return -1;
         }
  
-       if (lxc_cgfsng_debug)
-               lxc_cgfsng_print_basecg_debuginfo(basecginfo, klist, nlist);
+       return cgroup_rmdir(arg->hierarchies, arg->container_cgroup);
+}
  
-       while (getline(&line, &len, f) != -1) {
-               int type;
-               bool writeable;
-               struct hierarchy *new;
-               char *base_cgroup = NULL, *mountpoint = NULL;
-               char **controller_list = NULL;
+__cgfsng_ops static void cgfsng_payload_destroy(struct cgroup_ops *ops,
+                                               struct lxc_handler *handler)
+{
+       int ret;
+       struct generic_userns_exec_data wrap;
  
-               type = get_cgroup_version(line);
-               if (type == 0)
-                       continue;
+       wrap.origuid = 0;
+       wrap.container_cgroup = ops->container_cgroup;
+       wrap.hierarchies = ops->hierarchies;
+       wrap.conf = handler->conf;
  
-               if (type == CGROUP2_SUPER_MAGIC && unified)
-                       continue;
+       if (handler->conf && !lxc_list_empty(&handler->conf->id_map))
+               ret = userns_exec_1(handler->conf, cgroup_rmdir_wrapper, &wrap,
+                                   "cgroup_rmdir_wrapper");
+       else
+               ret = cgroup_rmdir(ops->hierarchies, ops->container_cgroup);
+       if (ret < 0) {
+               WARN("Failed to destroy cgroups");
+               return;
+       }
+}
  
-               if (cgroup_layout == CGROUP_LAYOUT_UNKNOWN) {
-                       if (type == CGROUP2_SUPER_MAGIC)
-                               cgroup_layout = CGROUP_LAYOUT_UNIFIED;
-                       else if (type == CGROUP_SUPER_MAGIC)
-                               cgroup_layout = CGROUP_LAYOUT_LEGACY;
-               } else if (cgroup_layout == CGROUP_LAYOUT_UNIFIED) {
-                       if (type == CGROUP_SUPER_MAGIC)
-                               cgroup_layout = CGROUP_LAYOUT_HYBRID;
-               } else if (cgroup_layout == CGROUP_LAYOUT_LEGACY) {
-                       if (type == CGROUP2_SUPER_MAGIC)
-                               cgroup_layout = CGROUP_LAYOUT_HYBRID;
-               }
+__cgfsng_ops static void cgfsng_monitor_destroy(struct cgroup_ops *ops,
+                                               struct lxc_handler *handler)
+{
+       int len;
+       char *pivot_path;
+       struct lxc_conf *conf = handler->conf;
+       char pidstr[INTTYPE_TO_STRLEN(pid_t)];
  
-               controller_list = cg_hybrid_get_controllers(klist, nlist, line, type);
-               if (!controller_list && type == CGROUP_SUPER_MAGIC)
-                       continue;
+       if (!ops->hierarchies)
+               return;
  
-               if (type == CGROUP_SUPER_MAGIC)
-                       if (controller_list_is_dup(hierarchies, controller_list))
-                               goto next;
+       len = snprintf(pidstr, sizeof(pidstr), "%d", handler->monitor_pid);
+       if (len < 0 || (size_t)len >= sizeof(pidstr))
+               return;
  
-               mountpoint = cg_hybrid_get_mountpoint(line);
-               if (!mountpoint) {
-                       CGFSNG_DEBUG("Failed parsing mountpoint from \"%s\"\n", line);
-                       goto next;
-               }
+       for (int i = 0; ops->hierarchies[i]; i++) {
+               int ret;
+               struct hierarchy *h = ops->hierarchies[i];
  
-               if (type == CGROUP_SUPER_MAGIC)
-                       base_cgroup = cg_hybrid_get_current_cgroup(basecginfo, controller_list[0], CGROUP_SUPER_MAGIC);
+               if (!h->monitor_full_path)
+                       continue;
+
+               if (conf && conf->cgroup_meta.dir)
+                       pivot_path = must_make_path(h->mountpoint,
+                                                   h->container_base_path,
+                                                   conf->cgroup_meta.dir,
+                                                   PIVOT_CGROUP,
+                                                   "cgroup.procs", NULL);
                 else
-                       base_cgroup = cg_hybrid_get_current_cgroup(basecginfo, NULL, CGROUP2_SUPER_MAGIC);
-               if (!base_cgroup) {
-                       CGFSNG_DEBUG("Failed to find current cgroup\n");
+                       pivot_path = must_make_path(h->mountpoint,
+                                                   h->container_base_path,
+                                                   PIVOT_CGROUP,
+                                                   "cgroup.procs", NULL);
+
+               ret = mkdir_p(pivot_path, 0755);
+               if (ret < 0 && errno != EEXIST)
                         goto next;
-               }
  
-               trim(base_cgroup);
-               prune_init_scope(base_cgroup);
-               if (type == CGROUP2_SUPER_MAGIC)
-                       writeable = test_writeable_v2(mountpoint, base_cgroup);
-               else
-                       writeable = test_writeable_v1(mountpoint, base_cgroup);
-               if (!writeable)
+               /* Move ourselves into the pivot cgroup to delete our own
+                * cgroup.
+                */
+               ret = lxc_write_to_file(pivot_path, pidstr, len, false, 0666);
+               if (ret != 0)
                         goto next;
  
-               if (type == CGROUP2_SUPER_MAGIC) {
-                       char *cgv2_ctrl_path;
+               ret = recursive_destroy(h->monitor_full_path);
+               if (ret < 0)
+                       WARN("Failed to destroy \"%s\"", h->monitor_full_path);
  
-                       cgv2_ctrl_path = must_make_path(mountpoint, base_cgroup,
-                                                       "cgroup.controllers",
-                                                       NULL);
+       next:
+               free(pivot_path);
+       }
+}
  
-                       controller_list = cg_unified_get_controllers(cgv2_ctrl_path);
-                       free(cgv2_ctrl_path);
-                       if (!controller_list)
-                               controller_list = cg_unified_make_empty_controller();
-               }
+static bool cg_unified_create_cgroup(struct hierarchy *h, char *cgname)
+{
+       size_t i, parts_len;
+       char **it;
+       size_t full_len = 0;
+       char *add_controllers = NULL, *cgroup = NULL;
+       char **parts = NULL;
+       bool bret = false;
  
-               new = add_hierarchy(controller_list, mountpoint, base_cgroup, type);
-               if (type == CGROUP2_SUPER_MAGIC && !unified)
-                       unified = new;
+       if (h->version != CGROUP2_SUPER_MAGIC)
+               return true;
  
-               continue;
+       if (!h->controllers)
+               return true;
  
-       next:
-               free_string_list(controller_list);
-               free(mountpoint);
-               free(base_cgroup);
+       /* For now we simply enable all controllers that we have detected by
+        * creating a string like "+memory +pids +cpu +io".
+        * TODO: In the near future we might want to support "-<controller>"
+        * etc. but whether supporting semantics like this make sense will need
+        * some thinking.
+        */
+       for (it = h->controllers; it && *it; it++) {
+               full_len += strlen(*it) + 2;
+               add_controllers = must_realloc(add_controllers, full_len + 1);
+
+               if (h->controllers[0] == *it)
+                       add_controllers[0] = '\0';
+
+               (void)strlcat(add_controllers, "+", full_len + 1);
+               (void)strlcat(add_controllers, *it, full_len + 1);
+
+               if ((it + 1) && *(it + 1))
+                       (void)strlcat(add_controllers, " ", full_len + 1);
         }
  
-       free_string_list(klist);
-       free_string_list(nlist);
+       parts = lxc_string_split(cgname, '/');
+       if (!parts)
+               goto on_error;
  
-       free(basecginfo);
+       parts_len = lxc_array_len((void **)parts);
+       if (parts_len > 0)
+               parts_len--;
  
-       fclose(f);
-       free(line);
+       cgroup = must_make_path(h->mountpoint, h->container_base_path, NULL);
+       for (i = 0; i < parts_len; i++) {
+               int ret;
+               char *target;
  
-       if (lxc_cgfsng_debug) {
-               printf("Writable cgroup hierarchies:\n");
-               lxc_cgfsng_print_hierarchies();
+               cgroup = must_append_path(cgroup, parts[i], NULL);
+               target = must_make_path(cgroup, "cgroup.subtree_control", NULL);
+               ret = lxc_write_to_file(target, add_controllers, full_len, false, 0666);
+               free(target);
+               if (ret < 0) {
+                       SYSERROR("Could not enable \"%s\" controllers in the "
+                                "unified cgroup \"%s\"", add_controllers, cgroup);
+                       goto on_error;
+               }
         }
  
-       /* verify that all controllers in cgroup.use and all crucial
-        * controllers are accounted for
-        */
-       if (!all_controllers_found())
-               return false;
+       bret = true;
  
-       return true;
+on_error:
+       lxc_free_array((void **)parts, free);
+       free(add_controllers);
+       free(cgroup);
+       return bret;
  }
  
-static int cg_is_pure_unified(void)
+static bool monitor_create_path_for_hierarchy(struct hierarchy *h, char *cgname)
  {
-
         int ret;
-       struct statfs fs;
  
-       ret = statfs("/sys/fs/cgroup", &fs);
-       if (ret < 0)
-               return -ENOMEDIUM;
+       h->monitor_full_path = must_make_path(h->mountpoint, h->container_base_path, cgname, NULL);
+       if (dir_exists(h->monitor_full_path))
+               return true;
  
-       if (is_fs_type(&fs, CGROUP2_SUPER_MAGIC))
-               return CGROUP2_SUPER_MAGIC;
+       if (!cg_legacy_handle_cpuset_hierarchy(h, cgname)) {
+               ERROR("Failed to handle legacy cpuset controller");
+               return false;
+       }
  
-       return 0;
+       ret = mkdir_p(h->monitor_full_path, 0755);
+       if (ret < 0) {
+               ERROR("Failed to create cgroup \"%s\"", h->monitor_full_path);
+               return false;
+       }
+
+       return cg_unified_create_cgroup(h, cgname);
  }
  
-/* Get current cgroup from /proc/self/cgroup for the cgroupfs v2 hierarchy. */
-static char *cg_unified_get_current_cgroup(void)
+static bool container_create_path_for_hierarchy(struct hierarchy *h, char *cgname)
  {
-       char *basecginfo, *base_cgroup;
-       bool will_escape;
-       char *copy = NULL;
+       int ret;
  
-       will_escape = (geteuid() == 0);
-       if (will_escape)
-               basecginfo = read_file("/proc/1/cgroup");
-       else
-               basecginfo = read_file("/proc/self/cgroup");
-       if (!basecginfo)
-               return NULL;
-
-       base_cgroup = strstr(basecginfo, "0::/");
-       if (!base_cgroup)
-               goto cleanup_on_err;
+       h->container_full_path = must_make_path(h->mountpoint, h->container_base_path, cgname, NULL);
+       if (dir_exists(h->container_full_path)) {
+               ERROR("The cgroup \"%s\" already existed", h->container_full_path);
+               return false;
+       }
  
-       base_cgroup = base_cgroup + 3;
-       copy = copy_to_eol(base_cgroup);
-       if (!copy)
-               goto cleanup_on_err;
+       if (!cg_legacy_handle_cpuset_hierarchy(h, cgname)) {
+               ERROR("Failed to handle legacy cpuset controller");
+               return false;
+       }
  
-cleanup_on_err:
-       free(basecginfo);
-       if (copy)
-               trim(copy);
+       ret = mkdir_p(h->container_full_path, 0755);
+       if (ret < 0) {
+               ERROR("Failed to create cgroup \"%s\"", h->container_full_path);
+               return false;
+       }
  
-       return copy;
+       return cg_unified_create_cgroup(h, cgname);
  }
  
-static int cg_unified_init(void)
+static void remove_path_for_hierarchy(struct hierarchy *h, char *cgname, bool monitor)
  {
         int ret;
-       char *mountpoint, *subtree_path;
-       char **delegatable;
-       char *base_cgroup = NULL;
+       char *full_path;
  
-       ret = cg_is_pure_unified();
-       if (ret == -ENOMEDIUM)
-               return -ENOMEDIUM;
+       if (monitor)
+               full_path = h->monitor_full_path;
+       else
+               full_path = h->container_full_path;
  
-       if (ret != CGROUP2_SUPER_MAGIC)
-               return 0;
+       ret = rmdir(full_path);
+       if (ret < 0)
+               SYSERROR("Failed to rmdir(\"%s\") from failed creation attempt", full_path);
  
-       base_cgroup = cg_unified_get_current_cgroup();
-       if (!base_cgroup)
-               return -EINVAL;
-       prune_init_scope(base_cgroup);
+       free(full_path);
  
-       /* We assume that we have already been given controllers to delegate
-        * further down the hierarchy. If not it is up to the user to delegate
-        * them to us.
-        */
-       mountpoint = must_copy_string("/sys/fs/cgroup");
-       subtree_path = must_make_path(mountpoint, base_cgroup,
-                                     "cgroup.subtree_control", NULL);
-       delegatable = cg_unified_get_controllers(subtree_path);
-       free(subtree_path);
-       if (!delegatable)
-               delegatable = cg_unified_make_empty_controller();
-       if (!delegatable[0])
-               CGFSNG_DEBUG("No controllers are enabled for delegation\n");
+       if (monitor)
+               h->monitor_full_path = NULL;
+       else
+               h->container_full_path = NULL;
+}
  
-       /* TODO: If the user requested specific controllers via lxc.cgroup.use
-        * we should verify here. The reason I'm not doing it right is that I'm
-        * not convinced that lxc.cgroup.use will be the future since it is a
-        * global property. I much rather have an option that lets you request
-        * controllers per container.
-        */
+__cgfsng_ops static inline bool cgfsng_monitor_create(struct cgroup_ops *ops,
+                                                       struct lxc_handler *handler)
+{
+       char *monitor_cgroup, *offset, *tmp;
+       int idx = 0;
+       size_t len;
+       bool bret = false;
+       struct lxc_conf *conf = handler->conf;
  
-       add_hierarchy(delegatable, mountpoint, base_cgroup, CGROUP2_SUPER_MAGIC);
-       unified = hierarchies[0];
+       if (!conf)
+               return bret;
  
-       cgroup_layout = CGROUP_LAYOUT_UNIFIED;
-       return CGROUP2_SUPER_MAGIC;
-}
+       if (conf->cgroup_meta.dir)
+               tmp = lxc_string_join("/",
+                                     (const char *[]){conf->cgroup_meta.dir,
+                                                      ops->monitor_pattern,
+                                                      handler->name, NULL},
+                                     false);
+       else
+               tmp = must_make_path(ops->monitor_pattern, handler->name, NULL);
+       if (!tmp)
+               return bret;
  
-static bool cg_init(void)
-{
-       int ret;
-       const char *tmp;
+       len = strlen(tmp) + 5; /* leave room for -NNN\0 */
+       monitor_cgroup = must_alloc(len);
+       (void)strlcpy(monitor_cgroup, tmp, len);
+       free(tmp);
+       offset = monitor_cgroup + len - 5;
  
-       errno = 0;
-       tmp = lxc_global_config_value("lxc.cgroup.use");
-       if (!cgroup_use && errno != 0) { /* lxc.cgroup.use can be NULL */
-               CGFSNG_DEBUG("Failed to retrieve list of cgroups to use\n");
-               return false;
-       }
-       cgroup_use = must_copy_string(tmp);
+       do {
+               if (idx) {
+                       int ret = snprintf(offset, 5, "-%d", idx);
+                       if (ret < 0 || (size_t)ret >= 5)
+                               goto on_error;
+               }
  
-       ret = cg_unified_init();
-       if (ret < 0)
-               return false;
+               for (int i = 0; ops->hierarchies[i]; i++) {
+                       if (!monitor_create_path_for_hierarchy(ops->hierarchies[i], monitor_cgroup)) {
+                               ERROR("Failed to create cgroup \"%s\"", ops->hierarchies[i]->monitor_full_path);
+                               free(ops->hierarchies[i]->container_full_path);
+                               ops->hierarchies[i]->container_full_path = NULL;
  
-       if (ret == CGROUP2_SUPER_MAGIC)
-               return true;
+                               for (int j = 0; j < i; j++)
+                                       remove_path_for_hierarchy(ops->hierarchies[j], monitor_cgroup, true);
+
+                               idx++;
+                               break;
+                       }
+               }
+       } while (idx > 0 && idx < 1000);
+
+       if (idx < 1000)
+               bret = true;
+
+on_error:
+       free(monitor_cgroup);
  
-       return cg_hybrid_init();
+       return bret;
  }
  
-static void *cgfsng_init(struct lxc_handler *handler)
+/* Try to create the same cgroup in all hierarchies. Start with cgroup_pattern;
+ * next cgroup_pattern-1, -2, ..., -999.
+ */
+__cgfsng_ops static inline bool cgfsng_payload_create(struct cgroup_ops *ops,
+                                                       struct lxc_handler *handler)
  {
-       const char *cgroup_pattern;
-       struct cgfsng_handler_data *d;
+       int i;
+       size_t len;
+       char *container_cgroup, *offset, *tmp;
+       int idx = 0;
+       struct lxc_conf *conf = handler->conf;
  
-       d = must_alloc(sizeof(*d));
-       memset(d, 0, sizeof(*d));
+       if (ops->container_cgroup) {
+               WARN("cgfsng_create called a second time: %s", ops->container_cgroup);
+               return false;
+       }
  
-       /* copy container name */
-       d->name = must_copy_string(handler->name);
+       if (!conf)
+               return false;
  
-       /* copy per-container cgroup information */
-       d->cgroup_meta.dir = NULL;
-       d->cgroup_meta.controllers = NULL;
-       if (handler->conf) {
-               d->cgroup_meta.dir = must_copy_string(handler->conf->cgroup_meta.dir);
-               d->cgroup_meta.controllers = must_copy_string(handler->conf->cgroup_meta.controllers);
+       if (conf->cgroup_meta.dir)
+               tmp = lxc_string_join("/", (const char *[]){conf->cgroup_meta.dir, handler->name, NULL}, false);
+       else
+               tmp = lxc_string_replace("%n", handler->name, ops->cgroup_pattern);
+       if (!tmp) {
+               ERROR("Failed expanding cgroup name pattern");
+               return false;
         }
  
-       /* copy system-wide cgroup information */
-       cgroup_pattern = lxc_global_config_value("lxc.cgroup.pattern");
-       if (!cgroup_pattern) {
-               /* lxc.cgroup.pattern is only NULL on error. */
-               ERROR("Failed to retrieve cgroup pattern");
+       len = strlen(tmp) + 5; /* leave room for -NNN\0 */
+       container_cgroup = must_alloc(len);
+       (void)strlcpy(container_cgroup, tmp, len);
+       free(tmp);
+       offset = container_cgroup + len - 5;
+
+again:
+       if (idx == 1000) {
+               ERROR("Too many conflicting cgroup names");
                 goto out_free;
         }
-       d->cgroup_pattern = must_copy_string(cgroup_pattern);
  
-       d->cgroup_layout = cgroup_layout;
-       if (d->cgroup_layout == CGROUP_LAYOUT_LEGACY)
-               TRACE("Running with legacy cgroup layout");
-       else if (d->cgroup_layout == CGROUP_LAYOUT_HYBRID)
-               TRACE("Running with hybrid cgroup layout");
-       else if (d->cgroup_layout == CGROUP_LAYOUT_UNIFIED)
-               TRACE("Running with unified cgroup layout");
-       else
-               WARN("Running with unknown cgroup layout");
+       if (idx) {
+               int ret;
+
+               ret = snprintf(offset, 5, "-%d", idx);
+               if (ret < 0 || (size_t)ret >= 5) {
+                       FILE *f = fopen("/dev/null", "w");
+                       if (f) {
+                               fprintf(f, "Workaround for GCC7 bug: "
+                                          "https://gcc.gnu.org/bugzilla/"
+                                          "show_bug.cgi?id=78969");
+                               fclose(f);
+                       }
+               }
+       }
+
+       for (i = 0; ops->hierarchies[i]; i++) {
+               if (!container_create_path_for_hierarchy(ops->hierarchies[i], container_cgroup)) {
+                       ERROR("Failed to create cgroup \"%s\"", ops->hierarchies[i]->container_full_path);
+                       free(ops->hierarchies[i]->container_full_path);
+                       ops->hierarchies[i]->container_full_path = NULL;
+                       for (int j = 0; j < i; j++)
+                               remove_path_for_hierarchy(ops->hierarchies[j], container_cgroup, false);
+                       idx++;
+                       goto again;
+               }
+       }
  
-       if (lxc_cgfsng_debug)
-               lxc_cgfsng_print_debuginfo(d);
+       ops->container_cgroup = container_cgroup;
  
-       return d;
+       return true;
  
  out_free:
-       free_handler_data(d);
-       return NULL;
+       free(container_cgroup);
+
+       return false;
  }
  
-static int recursive_destroy(char *dirname)
+__cgfsng_ops static bool __do_cgroup_enter(struct cgroup_ops *ops, pid_t pid,
+                                            bool monitor)
  {
-       int ret;
-       struct dirent *direntp;
-       DIR *dir;
-       int r = 0;
-
-       dir = opendir(dirname);
-       if (!dir)
-               return -1;
-
-       while ((direntp = readdir(dir))) {
-               char *pathname;
-               struct stat mystat;
+       int len;
+       char pidstr[INTTYPE_TO_STRLEN(pid_t)];
  
-               if (!strcmp(direntp->d_name, ".") ||
-                   !strcmp(direntp->d_name, ".."))
-                       continue;
+       len = snprintf(pidstr, sizeof(pidstr), "%d", pid);
+       if (len < 0 || (size_t)len >= sizeof(pidstr))
+               return false;
  
-               pathname = must_make_path(dirname, direntp->d_name, NULL);
+       for (int i = 0; ops->hierarchies[i]; i++) {
+               int ret;
+               char *path;
  
-               ret = lstat(pathname, &mystat);
-               if (ret < 0) {
-                       if (!r)
-                               WARN("Failed to stat \"%s\"", pathname);
-                       r = -1;
-                       goto next;
+               if (monitor)
+                       path = must_make_path(ops->hierarchies[i]->monitor_full_path,
+                                             "cgroup.procs", NULL);
+               else
+                       path = must_make_path(ops->hierarchies[i]->container_full_path,
+                                             "cgroup.procs", NULL);
+               ret = lxc_write_to_file(path, pidstr, len, false, 0666);
+               if (ret != 0) {
+                       SYSERROR("Failed to enter cgroup \"%s\"", path);
+                       free(path);
+                       return false;
                 }
-
-               if (!S_ISDIR(mystat.st_mode))
-                       goto next;
-
-               ret = recursive_destroy(pathname);
-               if (ret < 0)
-                       r = -1;
-       next:
-               free(pathname);
-       }
-
-       ret = rmdir(dirname);
-       if (ret < 0) {
-               if (!r)
-                       WARN("%s - Failed to delete \"%s\"", strerror(errno), dirname);
-               r = -1;
-       }
-
-       ret = closedir(dir);
-       if (ret < 0) {
-               if (!r)
-                       WARN("%s - Failed to delete \"%s\"", strerror(errno), dirname);
-               r = -1;
+               free(path);
         }
  
-       return r;
+       return true;
  }
  
-static int cgroup_rmdir(char *container_cgroup)
+__cgfsng_ops static bool cgfsng_monitor_enter(struct cgroup_ops *ops, pid_t pid)
  {
-       int i;
-
-       if (!container_cgroup || !hierarchies)
-               return 0;
+       return __do_cgroup_enter(ops, pid, true);
+}
  
-       for (i = 0; hierarchies[i]; i++) {
-               int ret;
-               struct hierarchy *h = hierarchies[i];
+static bool cgfsng_payload_enter(struct cgroup_ops *ops, pid_t pid)
+{
+       return __do_cgroup_enter(ops, pid, false);
+}
  
-               if (!h->fullcgpath)
-                       continue;
+static int chowmod(char *path, uid_t chown_uid, gid_t chown_gid,
+                  mode_t chmod_mode)
+{
+       int ret;
  
-               ret = recursive_destroy(h->fullcgpath);
-               if (ret < 0)
-                       WARN("Failed to destroy \"%s\"", h->fullcgpath);
+       ret = chown(path, chown_uid, chown_gid);
+       if (ret < 0) {
+               SYSWARN("Failed to chown(%s, %d, %d)", path, (int)chown_uid, (int)chown_gid);
+               return -1;
+       }
  
-               free(h->fullcgpath);
-               h->fullcgpath = NULL;
+       ret = chmod(path, chmod_mode);
+       if (ret < 0) {
+               SYSWARN("Failed to chmod(%s, %d)", path, (int)chmod_mode);
+               return -1;
         }
  
         return 0;
  }
  
-struct generic_userns_exec_data {
-       struct cgfsng_handler_data *d;
-       struct lxc_conf *conf;
-       uid_t origuid; /* target uid in parent namespace */
-       char *path;
-};
-
-static int cgroup_rmdir_wrapper(void *data)
+/* chgrp the container cgroups to container group.  We leave
+ * the container owner as cgroup owner.  So we must make the
+ * directories 775 so that the container can create sub-cgroups.
+ *
+ * Also chown the tasks and cgroup.procs files.  Those may not
+ * exist depending on kernel version.
+ */
+static int chown_cgroup_wrapper(void *data)
  {
-       int ret;
+       int i, ret;
+       uid_t destuid;
         struct generic_userns_exec_data *arg = data;
         uid_t nsuid = (arg->conf->root_nsuid_map != NULL) ? 0 : arg->conf->init_uid;
         gid_t nsgid = (arg->conf->root_nsgid_map != NULL) ? 0 : arg->conf->init_gid;
  
         ret = setresgid(nsgid, nsgid, nsgid);
         if (ret < 0) {
-               SYSERROR("Failed to setresgid(%d, %d, %d)", (int)nsgid,
-                        (int)nsgid, (int)nsgid);
+               SYSERROR("Failed to setresgid(%d, %d, %d)",
+                        (int)nsgid, (int)nsgid, (int)nsgid);
                 return -1;
         }
  
         ret = setresuid(nsuid, nsuid, nsuid);
         if (ret < 0) {
-               SYSERROR("Failed to setresuid(%d, %d, %d)", (int)nsuid,
-                        (int)nsuid, (int)nsuid);
+               SYSERROR("Failed to setresuid(%d, %d, %d)",
+                        (int)nsuid, (int)nsuid, (int)nsuid);
                 return -1;
         }
  
@@ -1625,537 +1558,207 @@ static int cgroup_rmdir_wrapper(void *data)
                 return -1;
         }
  
-       return cgroup_rmdir(arg->d->container_cgroup);
-}
-
-static void cgfsng_destroy(void *hdata, struct lxc_conf *conf)
-{
-       int ret;
-       struct cgfsng_handler_data *d = hdata;
-       struct generic_userns_exec_data wrap;
+       destuid = get_ns_uid(arg->origuid);
+       if (destuid == LXC_INVALID_UID)
+               destuid = 0;
  
-       if (!d)
-               return;
+       for (i = 0; arg->hierarchies[i]; i++) {
+               char *fullpath;
+               char *path = arg->hierarchies[i]->container_full_path;
  
-       wrap.origuid = 0;
-       wrap.d = hdata;
-       wrap.conf = conf;
+               ret = chowmod(path, destuid, nsgid, 0775);
+               if (ret < 0)
+                       return -1;
  
-       if (conf && !lxc_list_empty(&conf->id_map))
-               ret = userns_exec_1(conf, cgroup_rmdir_wrapper, &wrap,
-                                   "cgroup_rmdir_wrapper");
-       else
-               ret = cgroup_rmdir(d->container_cgroup);
-       if (ret < 0) {
-               WARN("Failed to destroy cgroups");
-               return;
-       }
+               /* Failures to chown() these are inconvenient but not
+                * detrimental We leave these owned by the container launcher,
+                * so that container root can write to the files to attach.  We
+                * chmod() them 664 so that container systemd can write to the
+                * files (which systemd in wily insists on doing).
+                */
  
-       free_handler_data(d);
-}
-
-struct cgroup_ops *cgfsng_ops_init(void)
-{
-       if (getenv("LXC_DEBUG_CGFSNG"))
-               lxc_cgfsng_debug = true;
-
-       if (!cg_init())
-               return NULL;
-
-       return &cgfsng_ops;
-}
-
-static bool cg_unified_create_cgroup(struct hierarchy *h, char *cgname)
-{
-       size_t i, parts_len;
-       char **it;
-       size_t full_len = 0;
-       char *add_controllers = NULL, *cgroup = NULL;
-       char **parts = NULL;
-       bool bret = false;
-
-       if (h->version != CGROUP2_SUPER_MAGIC)
-               return true;
-
-       if (!h->controllers)
-               return true;
+               if (arg->hierarchies[i]->version == CGROUP_SUPER_MAGIC) {
+                       fullpath = must_make_path(path, "tasks", NULL);
+                       (void)chowmod(fullpath, destuid, nsgid, 0664);
+                       free(fullpath);
+               }
  
-       /* For now we simply enable all controllers that we have detected by
-        * creating a string like "+memory +pids +cpu +io".
-        * TODO: In the near future we might want to support "-<controller>"
-        * etc. but whether supporting semantics like this make sense will need
-        * some thinking.
-        */
-       for (it = h->controllers; it && *it; it++) {
-                full_len += strlen(*it) + 2;
-                add_controllers = must_realloc(add_controllers, full_len + 1);
-                if (h->controllers[0] == *it)
-                        add_controllers[0] = '\0';
-                strcat(add_controllers, "+");
-                strcat(add_controllers, *it);
-                if ((it + 1) && *(it + 1))
-                        strcat(add_controllers, " ");
-       }
+               fullpath = must_make_path(path, "cgroup.procs", NULL);
+               (void)chowmod(fullpath, destuid, nsgid, 0664);
+               free(fullpath);
  
-       parts = lxc_string_split(cgname, '/');
-       if (!parts)
-               goto on_error;
-       parts_len = lxc_array_len((void **)parts);
-       if (parts_len > 0)
-               parts_len--;
+               if (arg->hierarchies[i]->version != CGROUP2_SUPER_MAGIC)
+                       continue;
  
-       cgroup = must_make_path(h->mountpoint, h->base_cgroup, NULL);
-       for (i = 0; i < parts_len; i++) {
-               int ret;
-               char *target;
+               fullpath = must_make_path(path, "cgroup.subtree_control", NULL);
+               (void)chowmod(fullpath, destuid, nsgid, 0664);
+               free(fullpath);
  
-               cgroup = must_append_path(cgroup, parts[i], NULL);
-               target = must_make_path(cgroup, "cgroup.subtree_control", NULL);
-               ret = lxc_write_to_file(target, add_controllers, full_len, false);
-               free(target);
-               if (ret < 0) {
-                       SYSERROR("Could not enable \"%s\" controllers in the "
-                                "unified cgroup \"%s\"", add_controllers, cgroup);
-                       goto on_error;
-               }
+               fullpath = must_make_path(path, "cgroup.threads", NULL);
+               (void)chowmod(fullpath, destuid, nsgid, 0664);
+               free(fullpath);
         }
  
-       bret = true;
-
-on_error:
-       lxc_free_array((void **)parts, free);
-       free(add_controllers);
-       free(cgroup);
-       return bret;
+       return 0;
  }
  
-static bool create_path_for_hierarchy(struct hierarchy *h, char *cgname)
+__cgfsng_ops static bool cgfsng_chown(struct cgroup_ops *ops,
+                                       struct lxc_conf *conf)
  {
-       int ret;
+       struct generic_userns_exec_data wrap;
  
-       h->fullcgpath = must_make_path(h->mountpoint, h->base_cgroup, cgname, NULL);
-       if (dir_exists(h->fullcgpath)) {
-               ERROR("The cgroup \"%s\" already existed", h->fullcgpath);
-               return false;
-       }
+       if (lxc_list_empty(&conf->id_map))
+               return true;
  
-       if (!cg_legacy_handle_cpuset_hierarchy(h, cgname)) {
-               ERROR("Failed to handle legacy cpuset controller");
-               return false;
-       }
+       wrap.origuid = geteuid();
+       wrap.path = NULL;
+       wrap.hierarchies = ops->hierarchies;
+       wrap.conf = conf;
  
-       ret = mkdir_p(h->fullcgpath, 0755);
-       if (ret < 0) {
-               ERROR("Failed to create cgroup \"%s\"", h->fullcgpath);
+       if (userns_exec_1(conf, chown_cgroup_wrapper, &wrap,
+                         "chown_cgroup_wrapper") < 0) {
+               ERROR("Error requesting cgroup chown in new user namespace");
                 return false;
         }
  
-       return cg_unified_create_cgroup(h, cgname);
+       return true;
  }
  
-static void remove_path_for_hierarchy(struct hierarchy *h, char *cgname)
+/* cgroup-full:* is done, no need to create subdirs */
+static bool cg_mount_needs_subdirs(int type)
  {
-       int ret;
-
-       ret = rmdir(h->fullcgpath);
-       if (ret < 0)
-               SYSERROR("Failed to rmdir(\"%s\") from failed creation attempt", h->fullcgpath);
+       if (type >= LXC_AUTO_CGROUP_FULL_RO)
+               return false;
  
-       free(h->fullcgpath);
-       h->fullcgpath = NULL;
+       return true;
  }
  
-/* Try to create the same cgroup in all hierarchies. Start with cgroup_pattern;
- * next cgroup_pattern-1, -2, ..., -999.
+/* After $rootfs/sys/fs/container/controller/the/cg/path has been created,
+ * remount controller ro if needed and bindmount the cgroupfs onto
+ * controll/the/cg/path.
   */
-static inline bool cgfsng_create(void *hdata)
+static int cg_legacy_mount_controllers(int type, struct hierarchy *h,
+                                      char *controllerpath, char *cgpath,
+                                      const char *container_cgroup)
  {
-       int i;
-       size_t len;
-       char *container_cgroup, *offset, *tmp;
-       int idx = 0;
-       struct cgfsng_handler_data *d = hdata;
-
-       if (!d)
-               return false;
+       int ret, remount_flags;
+       char *sourcepath;
+       int flags = MS_BIND;
  
-       if (d->container_cgroup) {
-               WARN("cgfsng_create called a second time");
-               return false;
-       }
+       if (type == LXC_AUTO_CGROUP_RO || type == LXC_AUTO_CGROUP_MIXED) {
+               ret = mount(controllerpath, controllerpath, "cgroup", MS_BIND, NULL);
+               if (ret < 0) {
+                       SYSERROR("Failed to bind mount \"%s\" onto \"%s\"",
+                                controllerpath, controllerpath);
+                       return -1;
+               }
  
-       if (d->cgroup_meta.dir)
-               tmp = lxc_string_join("/", (const char *[]){d->cgroup_meta.dir, d->name, NULL}, false);
-       else
-               tmp = lxc_string_replace("%n", d->name, d->cgroup_pattern);
-       if (!tmp) {
-               ERROR("Failed expanding cgroup name pattern");
-               return false;
-       }
-       len = strlen(tmp) + 5; /* leave room for -NNN\0 */
-       container_cgroup = must_alloc(len);
-       strcpy(container_cgroup, tmp);
-       free(tmp);
-       offset = container_cgroup + len - 5;
+               remount_flags = add_required_remount_flags(controllerpath,
+                                                          controllerpath,
+                                                          flags | MS_REMOUNT);
+               ret = mount(controllerpath, controllerpath, "cgroup",
+                           remount_flags | MS_REMOUNT | MS_BIND | MS_RDONLY,
+                           NULL);
+               if (ret < 0) {
+                       SYSERROR("Failed to remount \"%s\" ro", controllerpath);
+                       return -1;
+               }
  
-again:
-       if (idx == 1000) {
-               ERROR("Too many conflicting cgroup names");
-               goto out_free;
+               INFO("Remounted %s read-only", controllerpath);
         }
  
-       if (idx) {
-               int ret;
+       sourcepath = must_make_path(h->mountpoint, h->container_base_path,
+                                   container_cgroup, NULL);
+       if (type == LXC_AUTO_CGROUP_RO)
+               flags |= MS_RDONLY;
  
-               ret = snprintf(offset, 5, "-%d", idx);
-               if (ret < 0 || (size_t)ret >= 5) {
-                       FILE *f = fopen("/dev/null", "w");
-                       if (f) {
-                               fprintf(f, "Workaround for GCC7 bug: "
-                                          "https://gcc.gnu.org/bugzilla/"
-                                          "show_bug.cgi?id=78969");
-                               fclose(f);
-                       }
-               }
+       ret = mount(sourcepath, cgpath, "cgroup", flags, NULL);
+       if (ret < 0) {
+               SYSERROR("Failed to mount \"%s\" onto \"%s\"", h->controllers[0], cgpath);
+               free(sourcepath);
+               return -1;
         }
+       INFO("Mounted \"%s\" onto \"%s\"", h->controllers[0], cgpath);
  
-       for (i = 0; hierarchies[i]; i++) {
-               if (!create_path_for_hierarchy(hierarchies[i], container_cgroup)) {
-                       int j;
-                       ERROR("Failed to create cgroup \"%s\"", hierarchies[i]->fullcgpath);
-                       free(hierarchies[i]->fullcgpath);
-                       hierarchies[i]->fullcgpath = NULL;
-                       for (j = 0; j < i; j++)
-                               remove_path_for_hierarchy(hierarchies[j], container_cgroup);
-                       idx++;
-                       goto again;
+       if (flags & MS_RDONLY) {
+               remount_flags = add_required_remount_flags(sourcepath, cgpath,
+                                                          flags | MS_REMOUNT);
+               ret = mount(sourcepath, cgpath, "cgroup", remount_flags, NULL);
+               if (ret < 0) {
+                       SYSERROR("Failed to remount \"%s\" ro", cgpath);
+                       free(sourcepath);
+                       return -1;
                 }
+               INFO("Remounted %s read-only", cgpath);
         }
  
-       d->container_cgroup = container_cgroup;
-
-       return true;
-
-out_free:
-       free(container_cgroup);
-
-       return false;
+       free(sourcepath);
+       INFO("Completed second stage cgroup automounts for \"%s\"", cgpath);
+       return 0;
  }
  
-static bool cgfsng_enter(void *hdata, pid_t pid)
+/* __cg_mount_direct
+ *
+ * Mount cgroup hierarchies directly without using bind-mounts. The main
+ * uses-cases are mounting cgroup hierarchies in cgroup namespaces and mounting
+ * cgroups for the LXC_AUTO_CGROUP_FULL option.
+ */
+static int __cg_mount_direct(int type, struct hierarchy *h,
+                            const char *controllerpath)
  {
-       int i, len;
-       char pidstr[25];
-
-       len = snprintf(pidstr, 25, "%d", pid);
-       if (len < 0 || len >= 25)
-               return false;
-
-       for (i = 0; hierarchies[i]; i++) {
-               int ret;
-               char *fullpath;
-
-               fullpath = must_make_path(hierarchies[i]->fullcgpath,
-                                         "cgroup.procs", NULL);
-               ret = lxc_write_to_file(fullpath, pidstr, len, false);
-               if (ret != 0) {
-                       SYSERROR("Failed to enter cgroup \"%s\"", fullpath);
-                       free(fullpath);
-                       return false;
-               }
-               free(fullpath);
-       }
+        int ret;
+        char *controllers = NULL;
+        char *fstype = "cgroup2";
+        unsigned long flags = 0;
  
-       return true;
-}
+        flags |= MS_NOSUID;
+        flags |= MS_NOEXEC;
+        flags |= MS_NODEV;
+        flags |= MS_RELATIME;
  
-static int chowmod(char *path, uid_t chown_uid, gid_t chown_gid,
-                  mode_t chmod_mode)
-{
-       int ret;
+        if (type == LXC_AUTO_CGROUP_RO || type == LXC_AUTO_CGROUP_FULL_RO)
+                flags |= MS_RDONLY;
  
-       ret = chown(path, chown_uid, chown_gid);
-       if (ret < 0) {
-               WARN("%s - Failed to chown(%s, %d, %d)", strerror(errno), path,
-                    (int)chown_uid, (int)chown_gid);
-               return -1;
+        if (h->version != CGROUP2_SUPER_MAGIC) {
+                controllers = lxc_string_join(",", (const char **)h->controllers, false);
+                if (!controllers)
+                        return -ENOMEM;
+                fstype = "cgroup";
         }
  
-       ret = chmod(path, chmod_mode);
+       ret = mount("cgroup", controllerpath, fstype, flags, controllers);
+       free(controllers);
         if (ret < 0) {
-               WARN("%s - Failed to chmod(%s, %d)", strerror(errno), path,
-                    (int)chmod_mode);
+               SYSERROR("Failed to mount \"%s\" with cgroup filesystem type %s", controllerpath, fstype);
                 return -1;
         }
  
+       DEBUG("Mounted \"%s\" with cgroup filesystem type %s", controllerpath, fstype);
         return 0;
  }
  
-/* chgrp the container cgroups to container group.  We leave
- * the container owner as cgroup owner.  So we must make the
- * directories 775 so that the container can create sub-cgroups.
- *
- * Also chown the tasks and cgroup.procs files.  Those may not
- * exist depending on kernel version.
- */
-static int chown_cgroup_wrapper(void *data)
+static inline int cg_mount_in_cgroup_namespace(int type, struct hierarchy *h,
+                                              const char *controllerpath)
  {
-       int i, ret;
-       uid_t destuid;
-       struct generic_userns_exec_data *arg = data;
-       uid_t nsuid = (arg->conf->root_nsuid_map != NULL) ? 0 : arg->conf->init_uid;
-       gid_t nsgid = (arg->conf->root_nsgid_map != NULL) ? 0 : arg->conf->init_gid;
+       return __cg_mount_direct(type, h, controllerpath);
+}
  
-       ret = setresgid(nsgid, nsgid, nsgid);
-       if (ret < 0) {
-               SYSERROR("Failed to setresgid(%d, %d, %d)",
-                        (int)nsgid, (int)nsgid, (int)nsgid);
-               return -1;
-       }
+static inline int cg_mount_cgroup_full(int type, struct hierarchy *h,
+                                      const char *controllerpath)
+{
+       if (type < LXC_AUTO_CGROUP_FULL_RO || type > LXC_AUTO_CGROUP_FULL_MIXED)
+               return 0;
  
-       ret = setresuid(nsuid, nsuid, nsuid);
-       if (ret < 0) {
-               SYSERROR("Failed to setresuid(%d, %d, %d)",
-                        (int)nsuid, (int)nsuid, (int)nsuid);
-               return -1;
-       }
-
-       ret = setgroups(0, NULL);
-       if (ret < 0 && errno != EPERM) {
-               SYSERROR("Failed to setgroups(0, NULL)");
-               return -1;
-       }
-
-       destuid = get_ns_uid(arg->origuid);
-
-       for (i = 0; hierarchies[i]; i++) {
-               char *fullpath;
-               char *path = hierarchies[i]->fullcgpath;
-
-               ret = chowmod(path, destuid, nsgid, 0775);
-               if (ret < 0)
-                       return -1;
-
-               /* Failures to chown() these are inconvenient but not
-                * detrimental We leave these owned by the container launcher,
-                * so that container root can write to the files to attach.  We
-                * chmod() them 664 so that container systemd can write to the
-                * files (which systemd in wily insists on doing).
-                */
-
-               if (hierarchies[i]->version == CGROUP_SUPER_MAGIC) {
-                       fullpath = must_make_path(path, "tasks", NULL);
-                       (void)chowmod(fullpath, destuid, nsgid, 0664);
-                       free(fullpath);
-               }
-
-               fullpath = must_make_path(path, "cgroup.procs", NULL);
-               (void)chowmod(fullpath, destuid, 0, 0664);
-               free(fullpath);
-
-               if (hierarchies[i]->version != CGROUP2_SUPER_MAGIC)
-                       continue;
-
-               fullpath = must_make_path(path, "cgroup.subtree_control", NULL);
-               (void)chowmod(fullpath, destuid, nsgid, 0664);
-               free(fullpath);
-
-               fullpath = must_make_path(path, "cgroup.threads", NULL);
-               (void)chowmod(fullpath, destuid, nsgid, 0664);
-               free(fullpath);
-       }
-
-       return 0;
-}
-
-static bool cgfsng_chown(void *hdata, struct lxc_conf *conf)
-{
-       struct cgfsng_handler_data *d = hdata;
-       struct generic_userns_exec_data wrap;
-
-       if (!d)
-               return false;
-
-       if (lxc_list_empty(&conf->id_map))
-               return true;
-
-       wrap.origuid = geteuid();
-       wrap.path = NULL;
-       wrap.d = d;
-       wrap.conf = conf;
-
-       if (userns_exec_1(conf, chown_cgroup_wrapper, &wrap,
-                         "chown_cgroup_wrapper") < 0) {
-               ERROR("Error requesting cgroup chown in new user namespace");
-               return false;
-       }
-
-       return true;
-}
-
-/* We've safe-mounted a tmpfs as parent, so we don't need to protect against
- * symlinks any more - just use mount.
- *
- * mount cgroup-full if requested
- */
-static int mount_cgroup_full(int type, struct hierarchy *h, char *dest,
-                            char *container_cgroup)
-{
-       int ret;
-       char *rwpath, *source;
-
-       if (type < LXC_AUTO_CGROUP_FULL_RO || type > LXC_AUTO_CGROUP_FULL_MIXED)
-               return 0;
-
-       ret = mount(h->mountpoint, dest, "cgroup", MS_BIND, NULL);
-       if (ret < 0) {
-               SYSERROR("Failed to bind mount cgroup \"%s\" onto \"%s\"",
-                        h->mountpoint, dest);
-               return -1;
-       }
-
-       if (type != LXC_AUTO_CGROUP_FULL_RW) {
-               unsigned long flags = MS_BIND | MS_NOSUID | MS_NOEXEC | MS_NODEV |
-                                     MS_REMOUNT | MS_RDONLY;
-
-               ret = mount(NULL, dest, "cgroup", flags, NULL);
-               if (ret < 0) {
-                       SYSERROR("Failed to remount cgroup \"%s\" read-only", dest);
-                       return -1;
-               }
-       }
-
-       INFO("Bind mounted \"%s\" onto \"%s\"", h->mountpoint, dest);
-       if (type != LXC_AUTO_CGROUP_FULL_MIXED)
-               return 0;
-
-       /* mount just the container path rw */
-       source = must_make_path(h->mountpoint, h->base_cgroup, container_cgroup, NULL);
-       rwpath = must_make_path(dest, h->base_cgroup, container_cgroup, NULL);
-       ret = mount(source, rwpath, "cgroup", MS_BIND, NULL);
-       if (ret < 0)
-               WARN("%s - Failed to mount cgroup \"%s\" read-write",
-                    strerror(errno), rwpath);
-
-       TRACE("Mounted cgroup \"%s\" read-write", rwpath);
-       free(rwpath);
-       free(source);
-       return 0;
-}
-
-/* cgroup-full:* is done, no need to create subdirs */
-static bool cg_mount_needs_subdirs(int type)
-{
-       if (type >= LXC_AUTO_CGROUP_FULL_RO)
-               return false;
-
-       return true;
-}
-
-/* After $rootfs/sys/fs/container/controller/the/cg/path has been created,
- * remount controller ro if needed and bindmount the cgroupfs onto
- * controll/the/cg/path.
- */
-static int do_secondstage_mounts_if_needed(int type, struct hierarchy *h,
-                                          char *controllerpath, char *cgpath,
-                                          const char *container_cgroup)
-{
-       int ret, remount_flags;
-       char *sourcepath;
-       int flags = MS_BIND;
-
-       if (type == LXC_AUTO_CGROUP_RO || type == LXC_AUTO_CGROUP_MIXED) {
-               ret = mount(controllerpath, controllerpath, "cgroup", MS_BIND, NULL);
-               if (ret < 0) {
-                       SYSERROR("Failed to bind mount \"%s\" onto \"%s\"",
-                                controllerpath, controllerpath);
-                       return -1;
-               }
-
-               remount_flags = add_required_remount_flags(controllerpath,
-                                                          controllerpath,
-                                                          flags | MS_REMOUNT);
-               ret = mount(controllerpath, controllerpath, "cgroup",
-                           MS_REMOUNT | MS_BIND | MS_RDONLY, NULL);
-               if (ret < 0) {
-                       SYSERROR("Failed to remount \"%s\" ro", controllerpath);
-                       return -1;
-               }
-
-               INFO("Remounted %s read-only", controllerpath);
-       }
-
-       sourcepath = must_make_path(h->mountpoint, h->base_cgroup,
-                                   container_cgroup, NULL);
-       if (type == LXC_AUTO_CGROUP_RO)
-               flags |= MS_RDONLY;
-
-       ret = mount(sourcepath, cgpath, "cgroup", flags, NULL);
-       if (ret < 0) {
-               SYSERROR("Failed to mount \"%s\" onto \"%s\"", h->controllers[0], cgpath);
-               free(sourcepath);
-               return -1;
-       }
-       INFO("Mounted \"%s\" onto \"%s\"", h->controllers[0], cgpath);
-
-       if (flags & MS_RDONLY) {
-               remount_flags = add_required_remount_flags(sourcepath, cgpath,
-                                                          flags | MS_REMOUNT);
-               ret = mount(sourcepath, cgpath, "cgroup", remount_flags, NULL);
-               if (ret < 0) {
-                       SYSERROR("Failed to remount \"%s\" ro", cgpath);
-                       free(sourcepath);
-                       return -1;
-               }
-               INFO("Remounted %s read-only", cgpath);
-       }
-
-       free(sourcepath);
-       INFO("Completed second stage cgroup automounts for \"%s\"", cgpath);
-       return 0;
-}
-
-static int cg_mount_in_cgroup_namespace(int type, struct hierarchy *h,
-                                       const char *controllerpath)
-{
-        int ret;
-        char *controllers = NULL;
-        char *fstype = "cgroup2";
-        unsigned long flags = 0;
-
-        flags |= MS_NOSUID;
-        flags |= MS_NOEXEC;
-        flags |= MS_NODEV;
-        flags |= MS_RELATIME;
-
-        if (type == LXC_AUTO_CGROUP_RO || type == LXC_AUTO_CGROUP_FULL_RO)
-                flags |= MS_RDONLY;
-
-        if (h->version != CGROUP2_SUPER_MAGIC) {
-                controllers = lxc_string_join(",", (const char **)h->controllers, false);
-                if (!controllers)
-                        return -ENOMEM;
-                fstype = "cgroup";
-       }
-
-       ret = mount("cgroup", controllerpath, fstype, flags, controllers);
-       free(controllers);
-       if (ret < 0) {
-               SYSERROR("Failed to mount %s with cgroup filesystem type %s", controllerpath, fstype);
-               return -1;
-       }
-
-       DEBUG("Mounted %s with cgroup filesystem type %s", controllerpath, fstype);
-       return 0;
+       return __cg_mount_direct(type, h, controllerpath);
  }
  
-static bool cgfsng_mount(void *hdata, const char *root, int type)
+__cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops,
+                                       struct lxc_handler *handler,
+                                       const char *root, int type)
  {
         int i, ret;
         char *tmpfspath = NULL;
         bool has_cgns = false, retval = false, wants_force_mount = false;
-       struct lxc_handler *handler = hdata;
-       struct cgfsng_handler_data *d = handler->cgroup_data;
  
         if ((type & LXC_AUTO_CGROUP_MASK) == 0)
                 return true;
@@ -2183,15 +1786,15 @@ static bool cgfsng_mount(void *hdata, const char *root, int type)
  
         /* Mount tmpfs */
         tmpfspath = must_make_path(root, "/sys/fs/cgroup", NULL);
-       ret = safe_mount("cgroup_root", tmpfspath, "tmpfs",
+       ret = safe_mount(NULL, tmpfspath, "tmpfs",
                          MS_NOSUID | MS_NODEV | MS_NOEXEC | MS_RELATIME,
                          "size=10240k,mode=755", root);
         if (ret < 0)
                 goto on_error;
  
-       for (i = 0; hierarchies[i]; i++) {
+       for (i = 0; ops->hierarchies[i]; i++) {
                 char *controllerpath, *path2;
-               struct hierarchy *h = hierarchies[i];
+               struct hierarchy *h = ops->hierarchies[i];
                 char *controller = strrchr(h->mountpoint, '/');
  
                 if (!controller)
@@ -2224,7 +1827,7 @@ static bool cgfsng_mount(void *hdata, const char *root, int type)
                         continue;
                 }
  
-               ret = mount_cgroup_full(type, h, controllerpath, d->container_cgroup);
+               ret = cg_mount_cgroup_full(type, h, controllerpath);
                 if (ret < 0) {
                         free(controllerpath);
                         goto on_error;
@@ -2235,8 +1838,8 @@ static bool cgfsng_mount(void *hdata, const char *root, int type)
                         continue;
                 }
  
-               path2 = must_make_path(controllerpath, h->base_cgroup,
-                                      d->container_cgroup, NULL);
+               path2 = must_make_path(controllerpath, h->container_base_path,
+                                      ops->container_cgroup, NULL);
                 ret = mkdir_p(path2, 0755);
                 if (ret < 0) {
                         free(controllerpath);
@@ -2244,8 +1847,8 @@ static bool cgfsng_mount(void *hdata, const char *root, int type)
                         goto on_error;
                 }
  
-               ret = do_secondstage_mounts_if_needed(type, h, controllerpath,
-                                                     path2, d->container_cgroup);
+               ret = cg_legacy_mount_controllers(type, h, controllerpath,
+                                                 path2, ops->container_cgroup);
                 free(controllerpath);
                 free(path2);
                 if (ret < 0)
@@ -2272,9 +1875,6 @@ static int recursive_count_nrtasks(char *dirname)
         while ((direntp = readdir(dir))) {
                 struct stat mystat;
  
-               if (!direntp)
-                       break;
-
                 if (!strcmp(direntp->d_name, ".") ||
                     !strcmp(direntp->d_name, ".."))
                         continue;
@@ -2303,37 +1903,37 @@ static int recursive_count_nrtasks(char *dirname)
         return count;
  }
  
-static int cgfsng_nrtasks(void *hdata)
+__cgfsng_ops static int cgfsng_nrtasks(struct cgroup_ops *ops)
  {
         int count;
         char *path;
-       struct cgfsng_handler_data *d = hdata;
  
-       if (!d || !d->container_cgroup || !hierarchies)
+       if (!ops->container_cgroup || !ops->hierarchies)
                 return -1;
  
-       path = must_make_path(hierarchies[0]->fullcgpath, NULL);
+       path = must_make_path(ops->hierarchies[0]->container_full_path, NULL);
         count = recursive_count_nrtasks(path);
         free(path);
         return count;
  }
  
  /* Only root needs to escape to the cgroup of its init. */
-static bool cgfsng_escape()
+__cgfsng_ops static bool cgfsng_escape(const struct cgroup_ops *ops,
+                                        struct lxc_conf *conf)
  {
         int i;
  
-       if (geteuid())
+       if (conf->cgroup_meta.relative || geteuid())
                 return true;
  
-       for (i = 0; hierarchies[i]; i++) {
+       for (i = 0; ops->hierarchies[i]; i++) {
                 int ret;
                 char *fullpath;
  
-               fullpath = must_make_path(hierarchies[i]->mountpoint,
-                                         hierarchies[i]->base_cgroup,
+               fullpath = must_make_path(ops->hierarchies[i]->mountpoint,
+                                         ops->hierarchies[i]->container_base_path,
                                           "cgroup.procs", NULL);
-               ret = lxc_write_to_file(fullpath, "0", 2, false);
+               ret = lxc_write_to_file(fullpath, "0", 2, false, 0666);
                 if (ret != 0) {
                         SYSERROR("Failed to escape to cgroup \"%s\"", fullpath);
                         free(fullpath);
@@ -2345,26 +1945,26 @@ static bool cgfsng_escape()
         return true;
  }
  
-static int cgfsng_num_hierarchies(void)
+__cgfsng_ops static int cgfsng_num_hierarchies(struct cgroup_ops *ops)
  {
         int i;
  
-       for (i = 0; hierarchies[i]; i++)
+       for (i = 0; ops->hierarchies[i]; i++)
                 ;
  
         return i;
  }
  
-static bool cgfsng_get_hierarchies(int n, char ***out)
+__cgfsng_ops static bool cgfsng_get_hierarchies(struct cgroup_ops *ops, int n, char ***out)
  {
         int i;
  
         /* sanity check n */
         for (i = 0; i < n; i++)
-               if (!hierarchies[i])
+               if (!ops->hierarchies[i])
                         return false;
  
-       *out = hierarchies[i]->controllers;
+       *out = ops->hierarchies[i]->controllers;
  
         return true;
  }
@@ -2375,18 +1975,18 @@ static bool cgfsng_get_hierarchies(int n, char ***out)
  /* TODO: If the unified cgroup hierarchy grows a freezer controller this needs
   * to be adapted.
   */
-static bool cgfsng_unfreeze(void *hdata)
+__cgfsng_ops static bool cgfsng_unfreeze(struct cgroup_ops *ops)
  {
         int ret;
         char *fullpath;
         struct hierarchy *h;
  
-       h = get_hierarchy("freezer");
+       h = get_hierarchy(ops, "freezer");
         if (!h)
                 return false;
  
-       fullpath = must_make_path(h->fullcgpath, "freezer.state", NULL);
-       ret = lxc_write_to_file(fullpath, THAWED, THAWED_LEN, false);
+       fullpath = must_make_path(h->container_full_path, "freezer.state", NULL);
+       ret = lxc_write_to_file(fullpath, THAWED, THAWED_LEN, false, 0666);
         free(fullpath);
         if (ret < 0)
                 return false;
@@ -2394,15 +1994,19 @@ static bool cgfsng_unfreeze(void *hdata)
         return true;
  }
  
-static const char *cgfsng_get_cgroup(void *hdata, const char *subsystem)
+__cgfsng_ops static const char *cgfsng_get_cgroup(struct cgroup_ops *ops,
+                                                   const char *controller)
  {
         struct hierarchy *h;
  
-       h = get_hierarchy(subsystem);
-       if (!h)
+       h = get_hierarchy(ops, controller);
+       if (!h) {
+               WARN("Failed to find hierarchy for controller \"%s\"",
+                    controller ? controller : "(null)");
                 return NULL;
+       }
  
-       return h->fullcgpath ? h->fullcgpath + strlen(h->mountpoint) : NULL;
+       return h->container_full_path ? h->container_full_path + strlen(h->mountpoint) : NULL;
  }
  
  /* Given a cgroup path returned from lxc_cmd_get_cgroup_path, build a full path,
@@ -2441,7 +2045,7 @@ static int __cg_unified_attach(const struct hierarchy *h, const char *name,
         base_path = must_make_path(h->mountpoint, container_cgroup, NULL);
         full_path = must_make_path(base_path, "cgroup.procs", NULL);
         /* cgroup is populated */
-       ret = lxc_write_to_file(full_path, pidstr, pidstr_len, false);
+       ret = lxc_write_to_file(full_path, pidstr, pidstr_len, false, 0666);
         if (ret < 0 && errno != EBUSY)
                 goto on_error;
  
@@ -2450,8 +2054,8 @@ static int __cg_unified_attach(const struct hierarchy *h, const char *name,
  
         free(full_path);
  
-       len = strlen(base_path) + sizeof("/lxc-1000") - 1 +
-             sizeof("/cgroup-procs") - 1;
+       len = strlen(base_path) + STRLITERALLEN("/lxc-1000") +
+             STRLITERALLEN("/cgroup-procs");
         full_path = must_alloc(len + 1);
         do {
                 if (idx)
@@ -2466,8 +2070,8 @@ static int __cg_unified_attach(const struct hierarchy *h, const char *name,
                 if (ret < 0 && errno != EEXIST)
                         goto on_error;
  
-               strcat(full_path, "/cgroup.procs");
-               ret = lxc_write_to_file(full_path, pidstr, len, false);
+               (void)strlcat(full_path, "/cgroup.procs", len + 1);
+               ret = lxc_write_to_file(full_path, pidstr, len, false, 0666);
                 if (ret == 0)
                         goto on_success;
  
@@ -2489,19 +2093,20 @@ on_error:
         return fret;
  }
  
-static bool cgfsng_attach(const char *name, const char *lxcpath, pid_t pid)
+__cgfsng_ops static bool cgfsng_attach(struct cgroup_ops *ops, const char *name,
+                                        const char *lxcpath, pid_t pid)
  {
         int i, len, ret;
-       char pidstr[25];
+       char pidstr[INTTYPE_TO_STRLEN(pid_t)];
  
-       len = snprintf(pidstr, 25, "%d", pid);
-       if (len < 0 || len >= 25)
+       len = snprintf(pidstr, sizeof(pidstr), "%d", pid);
+       if (len < 0 || (size_t)len >= sizeof(pidstr))
                 return false;
  
-       for (i = 0; hierarchies[i]; i++) {
+       for (i = 0; ops->hierarchies[i]; i++) {
                 char *path;
                 char *fullpath = NULL;
-               struct hierarchy *h = hierarchies[i];
+               struct hierarchy *h = ops->hierarchies[i];
  
                 if (h->version == CGROUP2_SUPER_MAGIC) {
                         ret = __cg_unified_attach(h, name, lxcpath, pidstr, len,
@@ -2518,7 +2123,8 @@ static bool cgfsng_attach(const char *name, const char *lxcpath, pid_t pid)
                         continue;
  
                 fullpath = build_full_cgpath_from_monitorpath(h, path, "cgroup.procs");
-               ret = lxc_write_to_file(fullpath, pidstr, len, false);
+               free(path);
+               ret = lxc_write_to_file(fullpath, pidstr, len, false, 0666);
                 if (ret < 0) {
                         SYSERROR("Failed to attach %d to %s", (int)pid, fullpath);
                         free(fullpath);
@@ -2534,8 +2140,9 @@ static bool cgfsng_attach(const char *name, const char *lxcpath, pid_t pid)
   * don't have a cgroup_data set up, so we ask the running container through the
   * commands API for the cgroup path.
   */
-static int cgfsng_get(const char *filename, char *value, size_t len,
-                     const char *name, const char *lxcpath)
+__cgfsng_ops static int cgfsng_get(struct cgroup_ops *ops, const char *filename,
+                                    char *value, size_t len, const char *name,
+                                    const char *lxcpath)
  {
         int ret = -1;
         size_t controller_len;
@@ -2544,7 +2151,8 @@ static int cgfsng_get(const char *filename, char *value, size_t len,
  
         controller_len = strlen(filename);
         controller = alloca(controller_len + 1);
-       strcpy(controller, filename);
+       (void)strlcpy(controller, filename, controller_len + 1);
+
         p = strchr(controller, '.');
         if (p)
                 *p = '\0';
@@ -2554,7 +2162,7 @@ static int cgfsng_get(const char *filename, char *value, size_t len,
         if (!path)
                 return -1;
  
-       h = get_hierarchy(controller);
+       h = get_hierarchy(ops, controller);
         if (h) {
                 char *fullpath;
  
@@ -2571,8 +2179,9 @@ static int cgfsng_get(const char *filename, char *value, size_t len,
   * don't have a cgroup_data set up, so we ask the running container through the
   * commands API for the cgroup path.
   */
-static int cgfsng_set(const char *filename, const char *value, const char *name,
-                     const char *lxcpath)
+__cgfsng_ops static int cgfsng_set(struct cgroup_ops *ops,
+                                    const char *filename, const char *value,
+                                    const char *name, const char *lxcpath)
  {
         int ret = -1;
         size_t controller_len;
@@ -2581,7 +2190,8 @@ static int cgfsng_set(const char *filename, const char *value, const char *name,
  
         controller_len = strlen(filename);
         controller = alloca(controller_len + 1);
-       strcpy(controller, filename);
+       (void)strlcpy(controller, filename, controller_len + 1);
+
         p = strchr(controller, '.');
         if (p)
                 *p = '\0';
@@ -2591,12 +2201,12 @@ static int cgfsng_set(const char *filename, const char *value, const char *name,
         if (!path)
                 return -1;
  
-       h = get_hierarchy(controller);
+       h = get_hierarchy(ops, controller);
         if (h) {
                 char *fullpath;
  
                 fullpath = build_full_cgpath_from_monitorpath(h, path, filename);
-               ret = lxc_write_to_file(fullpath, value, strlen(value), false);
+               ret = lxc_write_to_file(fullpath, value, strlen(value), false, 0666);
                 free(fullpath);
         }
         free(path);
@@ -2682,15 +2292,14 @@ out:
         return ret;
  }
  
-/*
- * Called from setup_limits - here we have the container's cgroup_data because
- * we created the cgroups
+/* Called from setup_limits - here we have the container's cgroup_data because
+ * we created the cgroups.
   */
-static int cg_legacy_set_data(const char *filename, const char *value,
-                             struct cgfsng_handler_data *d)
+static int cg_legacy_set_data(struct cgroup_ops *ops, const char *filename,
+                             const char *value)
  {
-       char *fullpath, *p;
         size_t len;
+       char *fullpath, *p;
         /* "b|c <2^64-1>:<2^64-1> r|w|m" = 47 chars max */
         char converted_value[50];
         struct hierarchy *h;
@@ -2699,7 +2308,8 @@ static int cg_legacy_set_data(const char *filename, const char *value,
  
         len = strlen(filename);
         controller = alloca(len + 1);
-       strcpy(controller, filename);
+       (void)strlcpy(controller, filename, len + 1);
+
         p = strchr(controller, '.');
         if (p)
                 *p = '\0';
@@ -2711,7 +2321,7 @@ static int cg_legacy_set_data(const char *filename, const char *value,
                 value = converted_value;
         }
  
-       h = get_hierarchy(controller);
+       h = get_hierarchy(ops, controller);
         if (!h) {
                 ERROR("Failed to setup limits for the \"%s\" controller. "
                       "The controller seems to be unused by \"cgfsng\" cgroup "
@@ -2721,18 +2331,17 @@ static int cg_legacy_set_data(const char *filename, const char *value,
                 return -ENOENT;
         }
  
-       fullpath = must_make_path(h->fullcgpath, filename, NULL);
-       ret = lxc_write_to_file(fullpath, value, strlen(value), false);
+       fullpath = must_make_path(h->container_full_path, filename, NULL);
+       ret = lxc_write_to_file(fullpath, value, strlen(value), false, 0666);
         free(fullpath);
         return ret;
  }
  
-static bool __cg_legacy_setup_limits(void *hdata,
+static bool __cg_legacy_setup_limits(struct cgroup_ops *ops,
                                      struct lxc_list *cgroup_settings,
                                      bool do_devices)
  {
-       struct cgfsng_handler_data *d = hdata;
-       struct lxc_list *iterator, *sorted_cgroup_settings, *next;
+       struct lxc_list *iterator, *next, *sorted_cgroup_settings;
         struct lxc_cgroup *cg;
         bool ret = false;
  
@@ -2747,17 +2356,18 @@ static bool __cg_legacy_setup_limits(void *hdata,
                 cg = iterator->elem;
  
                 if (do_devices == !strncmp("devices", cg->subsystem, 7)) {
-                       if (cg_legacy_set_data(cg->subsystem, cg->value, d)) {
+                       if (cg_legacy_set_data(ops, cg->subsystem, cg->value)) {
                                 if (do_devices && (errno == EACCES || errno == EPERM)) {
-                                       WARN("Error setting %s to %s for %s",
-                                             cg->subsystem, cg->value, d->name);
+                                       WARN("Failed to set \"%s\" to \"%s\"",
+                                            cg->subsystem, cg->value);
                                         continue;
                                 }
-                               SYSERROR("Error setting %s to %s for %s",
-                                     cg->subsystem, cg->value, d->name);
+                               WARN("Failed to set \"%s\" to \"%s\"",
+                                    cg->subsystem, cg->value);
                                 goto out;
                         }
-                       DEBUG("cgroup '%s' set to '%s'", cg->subsystem, cg->value);
+                       DEBUG("Set controller \"%s\" set to \"%s\"",
+                             cg->subsystem, cg->value);
                 }
         }
  
@@ -2772,11 +2382,11 @@ out:
         return ret;
  }
  
-static bool __cg_unified_setup_limits(void *hdata,
+static bool __cg_unified_setup_limits(struct cgroup_ops *ops,
                                       struct lxc_list *cgroup_settings)
  {
         struct lxc_list *iterator;
-       struct hierarchy *h = unified;
+       struct hierarchy *h = ops->unified;
  
         if (lxc_list_empty(cgroup_settings))
                 return true;
@@ -2789,11 +2399,12 @@ static bool __cg_unified_setup_limits(void *hdata,
                 char *fullpath;
                 struct lxc_cgroup *cg = iterator->elem;
  
-               fullpath = must_make_path(h->fullcgpath, cg->subsystem, NULL);
-               ret = lxc_write_to_file(fullpath, cg->value, strlen(cg->value), false);
+               fullpath = must_make_path(h->container_full_path, cg->subsystem, NULL);
+               ret = lxc_write_to_file(fullpath, cg->value, strlen(cg->value), false, 0666);
                 free(fullpath);
                 if (ret < 0) {
-                       SYSERROR("Failed to set \"%s\" to \"%s\"", cg->subsystem, cg->value);
+                       SYSERROR("Failed to set \"%s\" to \"%s\"",
+                                cg->subsystem, cg->value);
                         return false;
                 }
                 TRACE("Set \"%s\" to \"%s\"", cg->subsystem, cg->value);
@@ -2803,38 +2414,372 @@ static bool __cg_unified_setup_limits(void *hdata,
         return true;
  }
  
-static bool cgfsng_setup_limits(void *hdata, struct lxc_conf *conf,
-                               bool do_devices)
+__cgfsng_ops static bool cgfsng_setup_limits(struct cgroup_ops *ops,
+                                              struct lxc_conf *conf,
+                                              bool do_devices)
  {
         bool bret;
  
-       bret = __cg_legacy_setup_limits(hdata, &conf->cgroup, do_devices);
+       bret = __cg_legacy_setup_limits(ops, &conf->cgroup, do_devices);
         if (!bret)
                 return false;
  
-       return __cg_unified_setup_limits(hdata, &conf->cgroup2);
-}
-
-static struct cgroup_ops cgfsng_ops = {
-       .init = cgfsng_init,
-       .destroy = cgfsng_destroy,
-       .create = cgfsng_create,
-       .enter = cgfsng_enter,
-       .escape = cgfsng_escape,
-       .num_hierarchies = cgfsng_num_hierarchies,
-       .get_hierarchies = cgfsng_get_hierarchies,
-       .get_cgroup = cgfsng_get_cgroup,
-       .get = cgfsng_get,
-       .set = cgfsng_set,
-       .unfreeze = cgfsng_unfreeze,
-       .setup_limits = cgfsng_setup_limits,
-       .name = "cgroupfs-ng",
-       .attach = cgfsng_attach,
-       .chown = cgfsng_chown,
-       .mount_cgroup = cgfsng_mount,
-       .nrtasks = cgfsng_nrtasks,
-       .driver = CGFSNG,
-
-       /* unsupported */
-       .create_legacy = NULL,
-};
+       return __cg_unified_setup_limits(ops, &conf->cgroup2);
+}
+
+static bool cgroup_use_wants_controllers(const struct cgroup_ops *ops,
+                                      char **controllers)
+{
+       char **cur_ctrl, **cur_use;
+
+       if (!ops->cgroup_use)
+               return true;
+
+       for (cur_ctrl = controllers; cur_ctrl && *cur_ctrl; cur_ctrl++) {
+               bool found = false;
+
+               for (cur_use = ops->cgroup_use; cur_use && *cur_use; cur_use++) {
+                       if (strcmp(*cur_use, *cur_ctrl) != 0)
+                               continue;
+
+                       found = true;
+                       break;
+               }
+
+               if (found)
+                       continue;
+
+               return false;
+       }
+
+       return true;
+}
+
+/* At startup, parse_hierarchies finds all the info we need about cgroup
+ * mountpoints and current cgroups, and stores it in @d.
+ */
+static bool cg_hybrid_init(struct cgroup_ops *ops, bool relative)
+{
+       int ret;
+       char *basecginfo;
+       FILE *f;
+       size_t len = 0;
+       char *line = NULL;
+       char **klist = NULL, **nlist = NULL;
+
+       /* Root spawned containers escape the current cgroup, so use init's
+        * cgroups as our base in that case.
+        */
+       if (!relative && (geteuid() == 0))
+               basecginfo = read_file("/proc/1/cgroup");
+       else
+               basecginfo = read_file("/proc/self/cgroup");
+       if (!basecginfo)
+               return false;
+
+       ret = get_existing_subsystems(&klist, &nlist);
+       if (ret < 0) {
+               ERROR("Failed to retrieve available legacy cgroup controllers");
+               free(basecginfo);
+               return false;
+       }
+
+       f = fopen("/proc/self/mountinfo", "r");
+       if (!f) {
+               ERROR("Failed to open \"/proc/self/mountinfo\"");
+               free(basecginfo);
+               return false;
+       }
+
+       lxc_cgfsng_print_basecg_debuginfo(basecginfo, klist, nlist);
+
+       while (getline(&line, &len, f) != -1) {
+               int type;
+               bool writeable;
+               struct hierarchy *new;
+               char *base_cgroup = NULL, *mountpoint = NULL;
+               char **controller_list = NULL;
+
+               type = get_cgroup_version(line);
+               if (type == 0)
+                       continue;
+
+               if (type == CGROUP2_SUPER_MAGIC && ops->unified)
+                       continue;
+
+               if (ops->cgroup_layout == CGROUP_LAYOUT_UNKNOWN) {
+                       if (type == CGROUP2_SUPER_MAGIC)
+                               ops->cgroup_layout = CGROUP_LAYOUT_UNIFIED;
+                       else if (type == CGROUP_SUPER_MAGIC)
+                               ops->cgroup_layout = CGROUP_LAYOUT_LEGACY;
+               } else if (ops->cgroup_layout == CGROUP_LAYOUT_UNIFIED) {
+                       if (type == CGROUP_SUPER_MAGIC)
+                               ops->cgroup_layout = CGROUP_LAYOUT_HYBRID;
+               } else if (ops->cgroup_layout == CGROUP_LAYOUT_LEGACY) {
+                       if (type == CGROUP2_SUPER_MAGIC)
+                               ops->cgroup_layout = CGROUP_LAYOUT_HYBRID;
+               }
+
+               controller_list = cg_hybrid_get_controllers(klist, nlist, line, type);
+               if (!controller_list && type == CGROUP_SUPER_MAGIC)
+                       continue;
+
+               if (type == CGROUP_SUPER_MAGIC)
+                       if (controller_list_is_dup(ops->hierarchies, controller_list))
+                               goto next;
+
+               mountpoint = cg_hybrid_get_mountpoint(line);
+               if (!mountpoint) {
+                       ERROR("Failed parsing mountpoint from \"%s\"", line);
+                       goto next;
+               }
+
+               if (type == CGROUP_SUPER_MAGIC)
+                       base_cgroup = cg_hybrid_get_current_cgroup(basecginfo, controller_list[0], CGROUP_SUPER_MAGIC);
+               else
+                       base_cgroup = cg_hybrid_get_current_cgroup(basecginfo, NULL, CGROUP2_SUPER_MAGIC);
+               if (!base_cgroup) {
+                       ERROR("Failed to find current cgroup");
+                       goto next;
+               }
+
+               trim(base_cgroup);
+               prune_init_scope(base_cgroup);
+               if (type == CGROUP2_SUPER_MAGIC)
+                       writeable = test_writeable_v2(mountpoint, base_cgroup);
+               else
+                       writeable = test_writeable_v1(mountpoint, base_cgroup);
+               if (!writeable)
+                       goto next;
+
+               if (type == CGROUP2_SUPER_MAGIC) {
+                       char *cgv2_ctrl_path;
+
+                       cgv2_ctrl_path = must_make_path(mountpoint, base_cgroup,
+                                                       "cgroup.controllers",
+                                                       NULL);
+
+                       controller_list = cg_unified_get_controllers(cgv2_ctrl_path);
+                       free(cgv2_ctrl_path);
+                       if (!controller_list) {
+                               controller_list = cg_unified_make_empty_controller();
+                               TRACE("No controllers are enabled for "
+                                     "delegation in the unified hierarchy");
+                       }
+               }
+
+               /* Exclude all controllers that cgroup use does not want. */
+               if (!cgroup_use_wants_controllers(ops, controller_list))
+                       goto next;
+
+               new = add_hierarchy(&ops->hierarchies, controller_list, mountpoint, base_cgroup, type);
+               if (type == CGROUP2_SUPER_MAGIC && !ops->unified)
+                       ops->unified = new;
+
+               continue;
+
+       next:
+               free_string_list(controller_list);
+               free(mountpoint);
+               free(base_cgroup);
+       }
+
+       free_string_list(klist);
+       free_string_list(nlist);
+
+       free(basecginfo);
+
+       fclose(f);
+       free(line);
+
+       TRACE("Writable cgroup hierarchies:");
+       lxc_cgfsng_print_hierarchies(ops);
+
+       /* verify that all controllers in cgroup.use and all crucial
+        * controllers are accounted for
+        */
+       if (!all_controllers_found(ops))
+               return false;
+
+       return true;
+}
+
+static int cg_is_pure_unified(void)
+{
+
+       int ret;
+       struct statfs fs;
+
+       ret = statfs("/sys/fs/cgroup", &fs);
+       if (ret < 0)
+               return -ENOMEDIUM;
+
+       if (is_fs_type(&fs, CGROUP2_SUPER_MAGIC))
+               return CGROUP2_SUPER_MAGIC;
+
+       return 0;
+}
+
+/* Get current cgroup from /proc/self/cgroup for the cgroupfs v2 hierarchy. */
+static char *cg_unified_get_current_cgroup(bool relative)
+{
+       char *basecginfo, *base_cgroup;
+       char *copy = NULL;
+
+       if (!relative && (geteuid() == 0))
+               basecginfo = read_file("/proc/1/cgroup");
+       else
+               basecginfo = read_file("/proc/self/cgroup");
+       if (!basecginfo)
+               return NULL;
+
+       base_cgroup = strstr(basecginfo, "0::/");
+       if (!base_cgroup)
+               goto cleanup_on_err;
+
+       base_cgroup = base_cgroup + 3;
+       copy = copy_to_eol(base_cgroup);
+       if (!copy)
+               goto cleanup_on_err;
+
+cleanup_on_err:
+       free(basecginfo);
+       if (copy)
+               trim(copy);
+
+       return copy;
+}
+
+static int cg_unified_init(struct cgroup_ops *ops, bool relative)
+{
+       int ret;
+       char *mountpoint, *subtree_path;
+       char **delegatable;
+       char *base_cgroup = NULL;
+
+       ret = cg_is_pure_unified();
+       if (ret == -ENOMEDIUM)
+               return -ENOMEDIUM;
+
+       if (ret != CGROUP2_SUPER_MAGIC)
+               return 0;
+
+       base_cgroup = cg_unified_get_current_cgroup(relative);
+       if (!base_cgroup)
+               return -EINVAL;
+       prune_init_scope(base_cgroup);
+
+       /* We assume that we have already been given controllers to delegate
+        * further down the hierarchy. If not it is up to the user to delegate
+        * them to us.
+        */
+       mountpoint = must_copy_string("/sys/fs/cgroup");
+       subtree_path = must_make_path(mountpoint, base_cgroup,
+                                     "cgroup.subtree_control", NULL);
+       delegatable = cg_unified_get_controllers(subtree_path);
+       free(subtree_path);
+       if (!delegatable)
+               delegatable = cg_unified_make_empty_controller();
+       if (!delegatable[0])
+               TRACE("No controllers are enabled for delegation");
+
+       /* TODO: If the user requested specific controllers via lxc.cgroup.use
+        * we should verify here. The reason I'm not doing it right is that I'm
+        * not convinced that lxc.cgroup.use will be the future since it is a
+        * global property. I much rather have an option that lets you request
+        * controllers per container.
+        */
+
+       add_hierarchy(&ops->hierarchies, delegatable, mountpoint, base_cgroup, CGROUP2_SUPER_MAGIC);
+
+       ops->cgroup_layout = CGROUP_LAYOUT_UNIFIED;
+       return CGROUP2_SUPER_MAGIC;
+}
+
+static bool cg_init(struct cgroup_ops *ops, struct lxc_conf *conf)
+{
+       int ret;
+       const char *tmp;
+       bool relative = conf->cgroup_meta.relative;
+
+       tmp = lxc_global_config_value("lxc.cgroup.use");
+       if (tmp) {
+               char *chop, *cur, *pin;
+
+               pin = must_copy_string(tmp);
+               chop = pin;
+
+               lxc_iterate_parts(cur, chop, ",") {
+                       must_append_string(&ops->cgroup_use, cur);
+               }
+
+               free(pin);
+       }
+
+       ret = cg_unified_init(ops, relative);
+       if (ret < 0)
+               return false;
+
+       if (ret == CGROUP2_SUPER_MAGIC)
+               return true;
+
+       return cg_hybrid_init(ops, relative);
+}
+
+__cgfsng_ops static bool cgfsng_data_init(struct cgroup_ops *ops)
+{
+       const char *cgroup_pattern;
+
+       /* copy system-wide cgroup information */
+       cgroup_pattern = lxc_global_config_value("lxc.cgroup.pattern");
+       if (!cgroup_pattern) {
+               /* lxc.cgroup.pattern is only NULL on error. */
+               ERROR("Failed to retrieve cgroup pattern");
+               return false;
+       }
+       ops->cgroup_pattern = must_copy_string(cgroup_pattern);
+       ops->monitor_pattern = MONITOR_CGROUP;
+
+       return true;
+}
+
+struct cgroup_ops *cgfsng_ops_init(struct lxc_conf *conf)
+{
+       struct cgroup_ops *cgfsng_ops;
+
+       cgfsng_ops = malloc(sizeof(struct cgroup_ops));
+       if (!cgfsng_ops)
+               return NULL;
+
+       memset(cgfsng_ops, 0, sizeof(struct cgroup_ops));
+       cgfsng_ops->cgroup_layout = CGROUP_LAYOUT_UNKNOWN;
+
+       if (!cg_init(cgfsng_ops, conf)) {
+               free(cgfsng_ops);
+               return NULL;
+       }
+
+       cgfsng_ops->data_init = cgfsng_data_init;
+       cgfsng_ops->payload_destroy = cgfsng_payload_destroy;
+       cgfsng_ops->monitor_destroy = cgfsng_monitor_destroy;
+       cgfsng_ops->monitor_create = cgfsng_monitor_create;
+       cgfsng_ops->monitor_enter = cgfsng_monitor_enter;
+       cgfsng_ops->payload_create = cgfsng_payload_create;
+       cgfsng_ops->payload_enter = cgfsng_payload_enter;
+       cgfsng_ops->escape = cgfsng_escape;
+       cgfsng_ops->num_hierarchies = cgfsng_num_hierarchies;
+       cgfsng_ops->get_hierarchies = cgfsng_get_hierarchies;
+       cgfsng_ops->get_cgroup = cgfsng_get_cgroup;
+       cgfsng_ops->get = cgfsng_get;
+       cgfsng_ops->set = cgfsng_set;
+       cgfsng_ops->unfreeze = cgfsng_unfreeze;
+       cgfsng_ops->setup_limits = cgfsng_setup_limits;
+       cgfsng_ops->driver = "cgfsng";
+       cgfsng_ops->version = "1.0.0";
+       cgfsng_ops->attach = cgfsng_attach;
+       cgfsng_ops->chown = cgfsng_chown;
+       cgfsng_ops->mount = cgfsng_mount;
+       cgfsng_ops->nrtasks = cgfsng_nrtasks;
+
+       return cgfsng_ops;
+}