]> git.proxmox.com Git - mirror_lxc.git/blobdiff - src/lxc/cgroups/cgfsng.c
cgfsng: s/25/INTTYPE_TO_STRLEN(pid_t)/g
[mirror_lxc.git] / src / lxc / cgroups / cgfsng.c
index 3cc9f9f628889192926448d640337804f4fd693f..7388ad7675525a4a2fc112f9499c42b5b9f4cf71 100644 (file)
@@ -55,6 +55,7 @@
 #include "commands.h"
 #include "conf.h"
 #include "log.h"
+#include "macro.h"
 #include "storage/storage.h"
 #include "utils.h"
 
@@ -66,7 +67,7 @@
 #include "include/strlcat.h"
 #endif
 
-lxc_log_define(lxc_cgfsng, lxc);
+lxc_log_define(cgfsng, cgroup);
 
 static void free_string_list(char **clist)
 {
@@ -133,8 +134,9 @@ static char *cg_legacy_must_prefix_named(char *entry)
        len = strlen(entry);
        prefixed = must_alloc(len + 6);
 
-       memcpy(prefixed, "name=", sizeof("name=") - 1);
-       memcpy(prefixed + sizeof("name=") - 1, entry, len);
+
+       memcpy(prefixed, "name=", STRLITERALLEN("name="));
+       memcpy(prefixed + STRLITERALLEN("name="), entry, len);
        prefixed[len + 5] = '\0';
        return prefixed;
 }
@@ -177,15 +179,19 @@ static void must_append_controller(char **klist, char **nlist, char ***clist,
 /* Given a handler's cgroup data, return the struct hierarchy for the controller
  * @c, or NULL if there is none.
  */
-struct hierarchy *get_hierarchy(struct cgroup_ops *ops, const char *c)
+struct hierarchy *get_hierarchy(struct cgroup_ops *ops, const char *controller)
 {
        int i;
 
-       if (!ops->hierarchies)
+       errno = ENOENT;
+
+       if (!ops->hierarchies) {
+               TRACE("There are no useable cgroup controllers");
                return NULL;
+       }
 
        for (i = 0; ops->hierarchies[i]; i++) {
-               if (!c) {
+               if (!controller) {
                        /* This is the empty unified hierarchy. */
                        if (ops->hierarchies[i]->controllers &&
                            !ops->hierarchies[i]->controllers[0])
@@ -194,10 +200,15 @@ struct hierarchy *get_hierarchy(struct cgroup_ops *ops, const char *c)
                        continue;
                }
 
-               if (string_in_list(ops->hierarchies[i]->controllers, c))
+               if (string_in_list(ops->hierarchies[i]->controllers, controller))
                        return ops->hierarchies[i];
        }
 
+       if (controller)
+               WARN("There is no useable %s controller", controller);
+       else
+               WARN("There is no empty unified cgroup hierarchy");
+
        return NULL;
 }
 
@@ -274,14 +285,13 @@ static uint32_t *lxc_cpumask(char *buf, size_t nbits)
        char *token;
        size_t arrlen;
        uint32_t *bitarr;
-       char *saveptr = NULL;
 
        arrlen = BITS_TO_LONGS(nbits);
        bitarr = calloc(arrlen, sizeof(uint32_t));
        if (!bitarr)
                return NULL;
 
-       for (; (token = strtok_r(buf, ",", &saveptr)); buf = NULL) {
+       lxc_iterate_parts(token, buf, ",") {
                errno = 0;
                unsigned end, start;
                char *range;
@@ -315,14 +325,14 @@ static char *lxc_cpumask_to_cpulist(uint32_t *bitarr, size_t nbits)
        int ret;
        size_t i;
        char **cpulist = NULL;
-       char numstr[LXC_NUMSTRLEN64] = {0};
+       char numstr[INTTYPE_TO_STRLEN(size_t)] = {0};
 
        for (i = 0; i <= nbits; i++) {
                if (!is_set(i, bitarr))
                        continue;
 
-               ret = snprintf(numstr, LXC_NUMSTRLEN64, "%zu", i);
-               if (ret < 0 || (size_t)ret >= LXC_NUMSTRLEN64) {
+               ret = snprintf(numstr, sizeof(numstr), "%zu", i);
+               if (ret < 0 || (size_t)ret >= sizeof(numstr)) {
                        lxc_free_array((void **)cpulist, free);
                        return NULL;
                }
@@ -398,7 +408,7 @@ static bool cg_legacy_filter_and_set_cpus(char *path, bool am_initialized)
 
        /* Get maximum number of cpus found in possible cpuset. */
        maxposs = get_max_cpus(posscpus);
-       if (maxposs < 0)
+       if (maxposs < 0 || maxposs >= INT_MAX - 1)
                goto on_error;
 
        if (!file_exists(__ISOL_CPUS)) {
@@ -443,7 +453,7 @@ static bool cg_legacy_filter_and_set_cpus(char *path, bool am_initialized)
 
        /* Get maximum number of cpus found in isolated cpuset. */
        maxisol = get_max_cpus(isolcpus);
-       if (maxisol < 0)
+       if (maxisol < 0 || maxisol >= INT_MAX - 1)
                goto on_error;
 
        if (maxposs < maxisol)
@@ -571,7 +581,7 @@ static bool cg_legacy_handle_cpuset_hierarchy(struct hierarchy *h, char *cgname)
        if (slash)
                *slash = '\0';
 
-       cgpath = must_make_path(h->mountpoint, h->base_cgroup, cgname, NULL);
+       cgpath = must_make_path(h->mountpoint, h->container_base_path, cgname, NULL);
        if (slash)
                *slash = '/';
 
@@ -584,8 +594,7 @@ static bool cg_legacy_handle_cpuset_hierarchy(struct hierarchy *h, char *cgname)
                }
        }
 
-       clonechildrenpath =
-           must_make_path(cgpath, "cgroup.clone_children", NULL);
+       clonechildrenpath = must_make_path(cgpath, "cgroup.clone_children", NULL);
        /* unified hierarchy doesn't have clone_children */
        if (!file_exists(clonechildrenpath)) {
                free(clonechildrenpath);
@@ -695,8 +704,7 @@ static bool controller_found(struct hierarchy **hlist, char *entry)
  */
 static bool all_controllers_found(struct cgroup_ops *ops)
 {
-       char *p;
-       char *saveptr = NULL;
+       char **cur;
        struct hierarchy **hlist = ops->hierarchies;
 
        if (!controller_found(hlist, "freezer")) {
@@ -707,9 +715,9 @@ static bool all_controllers_found(struct cgroup_ops *ops)
        if (!ops->cgroup_use)
                return true;
 
-       for (; (p = strtok_r(ops->cgroup_use, ",", &saveptr)); ops->cgroup_use = NULL)
-               if (!controller_found(hlist, p)) {
-                       ERROR("No %s controller mountpoint found", p);
+       for (cur = ops->cgroup_use; cur && *cur; cur++)
+               if (!controller_found(hlist, *cur)) {
+                       ERROR("No %s controller mountpoint found", *cur);
                        return false;
                }
 
@@ -729,7 +737,7 @@ static char **cg_hybrid_get_controllers(char **klist, char **nlist, char *line,
         */
        int i;
        char *dup, *p2, *tok;
-       char *p = line, *saveptr = NULL, *sep = ",";
+       char *p = line, *sep = ",";
        char **aret = NULL;
 
        for (i = 0; i < 4; i++) {
@@ -756,16 +764,17 @@ static char **cg_hybrid_get_controllers(char **klist, char **nlist, char *line,
        *p2 = '\0';
 
        if (type == CGROUP_SUPER_MAGIC) {
-               /* strdup() here for v1 hierarchies. Otherwise strtok_r() will
-                * destroy mountpoints such as "/sys/fs/cgroup/cpu,cpuacct".
+               /* strdup() here for v1 hierarchies. Otherwise
+                * lxc_iterate_parts() will destroy mountpoints such as
+                * "/sys/fs/cgroup/cpu,cpuacct".
                 */
                dup = strdup(p);
                if (!dup)
                        return NULL;
 
-               for (tok = strtok_r(dup, sep, &saveptr); tok;
-                    tok = strtok_r(NULL, sep, &saveptr))
+               lxc_iterate_parts(tok, dup, sep) {
                        must_append_controller(klist, nlist, &aret, tok);
+               }
 
                free(dup);
        }
@@ -787,15 +796,14 @@ static char **cg_unified_make_empty_controller(void)
 static char **cg_unified_get_controllers(const char *file)
 {
        char *buf, *tok;
-       char *saveptr = NULL, *sep = " \t\n";
+       char *sep = " \t\n";
        char **aret = NULL;
 
        buf = read_file(file);
        if (!buf)
                return NULL;
 
-       for (tok = strtok_r(buf, sep, &saveptr); tok;
-            tok = strtok_r(NULL, sep, &saveptr)) {
+       lxc_iterate_parts(tok, buf, sep) {
                int newentry;
                char *copy;
 
@@ -809,7 +817,7 @@ static char **cg_unified_get_controllers(const char *file)
 }
 
 static struct hierarchy *add_hierarchy(struct hierarchy ***h, char **clist, char *mountpoint,
-                                      char *base_cgroup, int type)
+                                      char *container_base_path, int type)
 {
        struct hierarchy *new;
        int newentry;
@@ -817,8 +825,9 @@ static struct hierarchy *add_hierarchy(struct hierarchy ***h, char **clist, char
        new = must_alloc(sizeof(*new));
        new->controllers = clist;
        new->mountpoint = mountpoint;
-       new->base_cgroup = base_cgroup;
-       new->fullcgpath = NULL;
+       new->container_base_path = container_base_path;
+       new->container_full_path = NULL;
+       new->monitor_full_path = NULL;
        new->version = type;
 
        newentry = append_null_to_list((void ***)h);
@@ -879,7 +888,7 @@ static char *copy_to_eol(char *p)
  */
 static bool controller_in_clist(char *cgline, char *c)
 {
-       char *tok, *saveptr = NULL, *eol, *tmp;
+       char *tok, *eol, *tmp;
        size_t len;
 
        eol = strchr(cgline, ':');
@@ -891,8 +900,7 @@ static bool controller_in_clist(char *cgline, char *c)
        memcpy(tmp, cgline, len);
        tmp[len] = '\0';
 
-       for (tok = strtok_r(tmp, ",", &saveptr); tok;
-            tok = strtok_r(NULL, ",", &saveptr)) {
+       lxc_iterate_parts(tok, tmp, ",") {
                if (strcmp(tok, c) == 0)
                        return true;
        }
@@ -956,7 +964,7 @@ static int get_existing_subsystems(char ***klist, char ***nlist)
                return -1;
 
        while (getline(&line, &len, f) != -1) {
-               char *p, *p2, *tok, *saveptr = NULL;
+               char *p, *p2, *tok;
                p = strchr(line, ':');
                if (!p)
                        continue;
@@ -978,8 +986,7 @@ static int get_existing_subsystems(char ***klist, char ***nlist)
                        continue;
                }
 
-               for (tok = strtok_r(p, ",", &saveptr); tok;
-                    tok = strtok_r(NULL, ",", &saveptr)) {
+               lxc_iterate_parts(tok, p, ",") {
                        if (strncmp(tok, "name=", 5) == 0)
                                must_append_string(nlist, tok);
                        else
@@ -1016,7 +1023,7 @@ static void lxc_cgfsng_print_hierarchies(struct cgroup_ops *ops)
                int j;
                char **cit;
 
-               TRACE("  %d: base_cgroup: %s", i, (*it)->base_cgroup ? (*it)->base_cgroup : "(null)");
+               TRACE("  %d: base_cgroup: %s", i, (*it)->container_base_path ? (*it)->container_base_path : "(null)");
                TRACE("      mountpoint:  %s", (*it)->mountpoint ? (*it)->mountpoint : "(null)");
                TRACE("      controllers:");
                for (j = 0, cit = (*it)->controllers; cit && *cit; cit++, j++)
@@ -1052,15 +1059,15 @@ static int cgroup_rmdir(struct hierarchy **hierarchies,
                int ret;
                struct hierarchy *h = hierarchies[i];
 
-               if (!h->fullcgpath)
+               if (!h->container_full_path)
                        continue;
 
-               ret = recursive_destroy(h->fullcgpath);
+               ret = recursive_destroy(h->container_full_path);
                if (ret < 0)
-                       WARN("Failed to destroy \"%s\"", h->fullcgpath);
+                       WARN("Failed to destroy \"%s\"", h->container_full_path);
 
-               free(h->fullcgpath);
-               h->fullcgpath = NULL;
+               free(h->container_full_path);
+               h->container_full_path = NULL;
        }
 
        return 0;
@@ -1104,7 +1111,8 @@ static int cgroup_rmdir_wrapper(void *data)
        return cgroup_rmdir(arg->hierarchies, arg->container_cgroup);
 }
 
-static void cgfsng_destroy(struct cgroup_ops *ops, struct lxc_handler *handler)
+__cgfsng_ops static void cgfsng_payload_destroy(struct cgroup_ops *ops,
+                                               struct lxc_handler *handler)
 {
        int ret;
        struct generic_userns_exec_data wrap;
@@ -1125,6 +1133,60 @@ static void cgfsng_destroy(struct cgroup_ops *ops, struct lxc_handler *handler)
        }
 }
 
+__cgfsng_ops static void cgfsng_monitor_destroy(struct cgroup_ops *ops,
+                                               struct lxc_handler *handler)
+{
+       int len;
+       char *pivot_path;
+       struct lxc_conf *conf = handler->conf;
+       char pidstr[INTTYPE_TO_STRLEN(pid_t)];
+
+       if (!ops->hierarchies)
+               return;
+
+       len = snprintf(pidstr, sizeof(pidstr), "%d", handler->monitor_pid);
+       if (len < 0 || (size_t)len >= sizeof(pidstr))
+               return;
+
+       for (int i = 0; ops->hierarchies[i]; i++) {
+               int ret;
+               struct hierarchy *h = ops->hierarchies[i];
+
+               if (!h->monitor_full_path)
+                       continue;
+
+               if (conf && conf->cgroup_meta.dir)
+                       pivot_path = must_make_path(h->mountpoint,
+                                                   h->container_base_path,
+                                                   conf->cgroup_meta.dir,
+                                                   PIVOT_CGROUP,
+                                                   "cgroup.procs", NULL);
+               else
+                       pivot_path = must_make_path(h->mountpoint,
+                                                   h->container_base_path,
+                                                   PIVOT_CGROUP,
+                                                   "cgroup.procs", NULL);
+
+               ret = mkdir_p(pivot_path, 0755);
+               if (ret < 0 && errno != EEXIST)
+                       goto next;
+
+               /* Move ourselves into the pivot cgroup to delete our own
+                * cgroup.
+                */
+               ret = lxc_write_to_file(pivot_path, pidstr, len, false, 0666);
+               if (ret != 0)
+                       goto next;
+
+               ret = recursive_destroy(h->monitor_full_path);
+               if (ret < 0)
+                       WARN("Failed to destroy \"%s\"", h->monitor_full_path);
+
+       next:
+               free(pivot_path);
+       }
+}
+
 static bool cg_unified_create_cgroup(struct hierarchy *h, char *cgname)
 {
        size_t i, parts_len;
@@ -1168,7 +1230,7 @@ static bool cg_unified_create_cgroup(struct hierarchy *h, char *cgname)
        if (parts_len > 0)
                parts_len--;
 
-       cgroup = must_make_path(h->mountpoint, h->base_cgroup, NULL);
+       cgroup = must_make_path(h->mountpoint, h->container_base_path, NULL);
        for (i = 0; i < parts_len; i++) {
                int ret;
                char *target;
@@ -1193,13 +1255,35 @@ on_error:
        return bret;
 }
 
-static bool create_path_for_hierarchy(struct hierarchy *h, char *cgname)
+static bool monitor_create_path_for_hierarchy(struct hierarchy *h, char *cgname)
+{
+       int ret;
+
+       h->monitor_full_path = must_make_path(h->mountpoint, h->container_base_path, cgname, NULL);
+       if (dir_exists(h->monitor_full_path))
+               return true;
+
+       if (!cg_legacy_handle_cpuset_hierarchy(h, cgname)) {
+               ERROR("Failed to handle legacy cpuset controller");
+               return false;
+       }
+
+       ret = mkdir_p(h->monitor_full_path, 0755);
+       if (ret < 0) {
+               ERROR("Failed to create cgroup \"%s\"", h->monitor_full_path);
+               return false;
+       }
+
+       return cg_unified_create_cgroup(h, cgname);
+}
+
+static bool container_create_path_for_hierarchy(struct hierarchy *h, char *cgname)
 {
        int ret;
 
-       h->fullcgpath = must_make_path(h->mountpoint, h->base_cgroup, cgname, NULL);
-       if (dir_exists(h->fullcgpath)) {
-               ERROR("The cgroup \"%s\" already existed", h->fullcgpath);
+       h->container_full_path = must_make_path(h->mountpoint, h->container_base_path, cgname, NULL);
+       if (dir_exists(h->container_full_path)) {
+               ERROR("The cgroup \"%s\" already existed", h->container_full_path);
                return false;
        }
 
@@ -1208,32 +1292,102 @@ static bool create_path_for_hierarchy(struct hierarchy *h, char *cgname)
                return false;
        }
 
-       ret = mkdir_p(h->fullcgpath, 0755);
+       ret = mkdir_p(h->container_full_path, 0755);
        if (ret < 0) {
-               ERROR("Failed to create cgroup \"%s\"", h->fullcgpath);
+               ERROR("Failed to create cgroup \"%s\"", h->container_full_path);
                return false;
        }
 
        return cg_unified_create_cgroup(h, cgname);
 }
 
-static void remove_path_for_hierarchy(struct hierarchy *h, char *cgname)
+static void remove_path_for_hierarchy(struct hierarchy *h, char *cgname, bool monitor)
 {
        int ret;
+       char *full_path;
+
+       if (monitor)
+               full_path = h->monitor_full_path;
+       else
+               full_path = h->container_full_path;
 
-       ret = rmdir(h->fullcgpath);
+       ret = rmdir(full_path);
        if (ret < 0)
-               SYSERROR("Failed to rmdir(\"%s\") from failed creation attempt", h->fullcgpath);
+               SYSERROR("Failed to rmdir(\"%s\") from failed creation attempt", full_path);
+
+       free(full_path);
 
-       free(h->fullcgpath);
-       h->fullcgpath = NULL;
+       if (monitor)
+               h->monitor_full_path = NULL;
+       else
+               h->container_full_path = NULL;
+}
+
+__cgfsng_ops static inline bool cgfsng_monitor_create(struct cgroup_ops *ops,
+                                                       struct lxc_handler *handler)
+{
+       char *monitor_cgroup, *offset, *tmp;
+       int idx = 0;
+       size_t len;
+       bool bret = false;
+       struct lxc_conf *conf = handler->conf;
+
+       if (!conf)
+               return bret;
+
+       if (conf->cgroup_meta.dir)
+               tmp = lxc_string_join("/",
+                                     (const char *[]){conf->cgroup_meta.dir,
+                                                      ops->monitor_pattern,
+                                                      handler->name, NULL},
+                                     false);
+       else
+               tmp = must_make_path(ops->monitor_pattern, handler->name, NULL);
+       if (!tmp)
+               return bret;
+
+       len = strlen(tmp) + 5; /* leave room for -NNN\0 */
+       monitor_cgroup = must_alloc(len);
+       (void)strlcpy(monitor_cgroup, tmp, len);
+       free(tmp);
+       offset = monitor_cgroup + len - 5;
+
+       do {
+               if (idx) {
+                       int ret = snprintf(offset, 5, "-%d", idx);
+                       if (ret < 0 || (size_t)ret >= 5)
+                               goto on_error;
+               }
+
+               for (int i = 0; ops->hierarchies[i]; i++) {
+                       if (!monitor_create_path_for_hierarchy(ops->hierarchies[i], monitor_cgroup)) {
+                               ERROR("Failed to create cgroup \"%s\"", ops->hierarchies[i]->monitor_full_path);
+                               free(ops->hierarchies[i]->container_full_path);
+                               ops->hierarchies[i]->container_full_path = NULL;
+
+                               for (int j = 0; j < i; j++)
+                                       remove_path_for_hierarchy(ops->hierarchies[j], monitor_cgroup, true);
+
+                               idx++;
+                               break;
+                       }
+               }
+       } while (idx > 0 && idx < 1000);
+
+       if (idx < 1000)
+               bret = true;
+
+on_error:
+       free(monitor_cgroup);
+
+       return bret;
 }
 
 /* Try to create the same cgroup in all hierarchies. Start with cgroup_pattern;
  * next cgroup_pattern-1, -2, ..., -999.
  */
-static inline bool cgfsng_create(struct cgroup_ops *ops,
-                                struct lxc_handler *handler)
+__cgfsng_ops static inline bool cgfsng_payload_create(struct cgroup_ops *ops,
+                                                       struct lxc_handler *handler)
 {
        int i;
        size_t len;
@@ -1286,13 +1440,12 @@ again:
        }
 
        for (i = 0; ops->hierarchies[i]; i++) {
-               if (!create_path_for_hierarchy(ops->hierarchies[i], container_cgroup)) {
-                       int j;
-                       ERROR("Failed to create cgroup \"%s\"", ops->hierarchies[i]->fullcgpath);
-                       free(ops->hierarchies[i]->fullcgpath);
-                       ops->hierarchies[i]->fullcgpath = NULL;
-                       for (j = 0; j < i; j++)
-                               remove_path_for_hierarchy(ops->hierarchies[j], container_cgroup);
+               if (!container_create_path_for_hierarchy(ops->hierarchies[i], container_cgroup)) {
+                       ERROR("Failed to create cgroup \"%s\"", ops->hierarchies[i]->container_full_path);
+                       free(ops->hierarchies[i]->container_full_path);
+                       ops->hierarchies[i]->container_full_path = NULL;
+                       for (int j = 0; j < i; j++)
+                               remove_path_for_hierarchy(ops->hierarchies[j], container_cgroup, false);
                        idx++;
                        goto again;
                }
@@ -1308,33 +1461,48 @@ out_free:
        return false;
 }
 
-static bool cgfsng_enter(struct cgroup_ops *ops, pid_t pid)
+__cgfsng_ops static bool __do_cgroup_enter(struct cgroup_ops *ops, pid_t pid,
+                                            bool monitor)
 {
-       int i, len;
-       char pidstr[25];
+       int len;
+       char pidstr[INTTYPE_TO_STRLEN(pid_t)];
 
-       len = snprintf(pidstr, 25, "%d", pid);
-       if (len < 0 || len >= 25)
+       len = snprintf(pidstr, sizeof(pidstr), "%d", pid);
+       if (len < 0 || (size_t)len >= sizeof(pidstr))
                return false;
 
-       for (i = 0; ops->hierarchies[i]; i++) {
+       for (int i = 0; ops->hierarchies[i]; i++) {
                int ret;
-               char *fullpath;
+               char *path;
 
-               fullpath = must_make_path(ops->hierarchies[i]->fullcgpath,
-                                         "cgroup.procs", NULL);
-               ret = lxc_write_to_file(fullpath, pidstr, len, false, 0666);
+               if (monitor)
+                       path = must_make_path(ops->hierarchies[i]->monitor_full_path,
+                                             "cgroup.procs", NULL);
+               else
+                       path = must_make_path(ops->hierarchies[i]->container_full_path,
+                                             "cgroup.procs", NULL);
+               ret = lxc_write_to_file(path, pidstr, len, false, 0666);
                if (ret != 0) {
-                       SYSERROR("Failed to enter cgroup \"%s\"", fullpath);
-                       free(fullpath);
+                       SYSERROR("Failed to enter cgroup \"%s\"", path);
+                       free(path);
                        return false;
                }
-               free(fullpath);
+               free(path);
        }
 
        return true;
 }
 
+__cgfsng_ops static bool cgfsng_monitor_enter(struct cgroup_ops *ops, pid_t pid)
+{
+       return __do_cgroup_enter(ops, pid, true);
+}
+
+static bool cgfsng_payload_enter(struct cgroup_ops *ops, pid_t pid)
+{
+       return __do_cgroup_enter(ops, pid, false);
+}
+
 static int chowmod(char *path, uid_t chown_uid, gid_t chown_gid,
                   mode_t chmod_mode)
 {
@@ -1391,10 +1559,12 @@ static int chown_cgroup_wrapper(void *data)
        }
 
        destuid = get_ns_uid(arg->origuid);
+       if (destuid == LXC_INVALID_UID)
+               destuid = 0;
 
        for (i = 0; arg->hierarchies[i]; i++) {
                char *fullpath;
-               char *path = arg->hierarchies[i]->fullcgpath;
+               char *path = arg->hierarchies[i]->container_full_path;
 
                ret = chowmod(path, destuid, nsgid, 0775);
                if (ret < 0)
@@ -1432,7 +1602,8 @@ static int chown_cgroup_wrapper(void *data)
        return 0;
 }
 
-static bool cgfsng_chown(struct cgroup_ops *ops, struct lxc_conf *conf)
+__cgfsng_ops static bool cgfsng_chown(struct cgroup_ops *ops,
+                                       struct lxc_conf *conf)
 {
        struct generic_userns_exec_data wrap;
 
@@ -1496,7 +1667,7 @@ static int cg_legacy_mount_controllers(int type, struct hierarchy *h,
                INFO("Remounted %s read-only", controllerpath);
        }
 
-       sourcepath = must_make_path(h->mountpoint, h->base_cgroup,
+       sourcepath = must_make_path(h->mountpoint, h->container_base_path,
                                    container_cgroup, NULL);
        if (type == LXC_AUTO_CGROUP_RO)
                flags |= MS_RDONLY;
@@ -1581,8 +1752,9 @@ static inline int cg_mount_cgroup_full(int type, struct hierarchy *h,
        return __cg_mount_direct(type, h, controllerpath);
 }
 
-static bool cgfsng_mount(struct cgroup_ops *ops, struct lxc_handler *handler,
-                        const char *root, int type)
+__cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops,
+                                       struct lxc_handler *handler,
+                                       const char *root, int type)
 {
        int i, ret;
        char *tmpfspath = NULL;
@@ -1666,7 +1838,7 @@ static bool cgfsng_mount(struct cgroup_ops *ops, struct lxc_handler *handler,
                        continue;
                }
 
-               path2 = must_make_path(controllerpath, h->base_cgroup,
+               path2 = must_make_path(controllerpath, h->container_base_path,
                                       ops->container_cgroup, NULL);
                ret = mkdir_p(path2, 0755);
                if (ret < 0) {
@@ -1731,7 +1903,7 @@ static int recursive_count_nrtasks(char *dirname)
        return count;
 }
 
-static int cgfsng_nrtasks(struct cgroup_ops *ops)
+__cgfsng_ops static int cgfsng_nrtasks(struct cgroup_ops *ops)
 {
        int count;
        char *path;
@@ -1739,18 +1911,19 @@ static int cgfsng_nrtasks(struct cgroup_ops *ops)
        if (!ops->container_cgroup || !ops->hierarchies)
                return -1;
 
-       path = must_make_path(ops->hierarchies[0]->fullcgpath, NULL);
+       path = must_make_path(ops->hierarchies[0]->container_full_path, NULL);
        count = recursive_count_nrtasks(path);
        free(path);
        return count;
 }
 
 /* Only root needs to escape to the cgroup of its init. */
-static bool cgfsng_escape(const struct cgroup_ops *ops)
+__cgfsng_ops static bool cgfsng_escape(const struct cgroup_ops *ops,
+                                        struct lxc_conf *conf)
 {
        int i;
 
-       if (geteuid())
+       if (conf->cgroup_meta.relative || geteuid())
                return true;
 
        for (i = 0; ops->hierarchies[i]; i++) {
@@ -1758,7 +1931,7 @@ static bool cgfsng_escape(const struct cgroup_ops *ops)
                char *fullpath;
 
                fullpath = must_make_path(ops->hierarchies[i]->mountpoint,
-                                         ops->hierarchies[i]->base_cgroup,
+                                         ops->hierarchies[i]->container_base_path,
                                          "cgroup.procs", NULL);
                ret = lxc_write_to_file(fullpath, "0", 2, false, 0666);
                if (ret != 0) {
@@ -1772,7 +1945,7 @@ static bool cgfsng_escape(const struct cgroup_ops *ops)
        return true;
 }
 
-static int cgfsng_num_hierarchies(struct cgroup_ops *ops)
+__cgfsng_ops static int cgfsng_num_hierarchies(struct cgroup_ops *ops)
 {
        int i;
 
@@ -1782,7 +1955,7 @@ static int cgfsng_num_hierarchies(struct cgroup_ops *ops)
        return i;
 }
 
-static bool cgfsng_get_hierarchies(struct cgroup_ops *ops, int n, char ***out)
+__cgfsng_ops static bool cgfsng_get_hierarchies(struct cgroup_ops *ops, int n, char ***out)
 {
        int i;
 
@@ -1802,7 +1975,7 @@ static bool cgfsng_get_hierarchies(struct cgroup_ops *ops, int n, char ***out)
 /* TODO: If the unified cgroup hierarchy grows a freezer controller this needs
  * to be adapted.
  */
-static bool cgfsng_unfreeze(struct cgroup_ops *ops)
+__cgfsng_ops static bool cgfsng_unfreeze(struct cgroup_ops *ops)
 {
        int ret;
        char *fullpath;
@@ -1812,7 +1985,7 @@ static bool cgfsng_unfreeze(struct cgroup_ops *ops)
        if (!h)
                return false;
 
-       fullpath = must_make_path(h->fullcgpath, "freezer.state", NULL);
+       fullpath = must_make_path(h->container_full_path, "freezer.state", NULL);
        ret = lxc_write_to_file(fullpath, THAWED, THAWED_LEN, false, 0666);
        free(fullpath);
        if (ret < 0)
@@ -1821,8 +1994,8 @@ static bool cgfsng_unfreeze(struct cgroup_ops *ops)
        return true;
 }
 
-static const char *cgfsng_get_cgroup(struct cgroup_ops *ops,
-                                    const char *controller)
+__cgfsng_ops static const char *cgfsng_get_cgroup(struct cgroup_ops *ops,
+                                                   const char *controller)
 {
        struct hierarchy *h;
 
@@ -1833,7 +2006,7 @@ static const char *cgfsng_get_cgroup(struct cgroup_ops *ops,
                return NULL;
        }
 
-       return h->fullcgpath ? h->fullcgpath + strlen(h->mountpoint) : NULL;
+       return h->container_full_path ? h->container_full_path + strlen(h->mountpoint) : NULL;
 }
 
 /* Given a cgroup path returned from lxc_cmd_get_cgroup_path, build a full path,
@@ -1881,8 +2054,8 @@ static int __cg_unified_attach(const struct hierarchy *h, const char *name,
 
        free(full_path);
 
-       len = strlen(base_path) + sizeof("/lxc-1000") - 1 +
-             sizeof("/cgroup-procs") - 1;
+       len = strlen(base_path) + STRLITERALLEN("/lxc-1000") +
+             STRLITERALLEN("/cgroup-procs");
        full_path = must_alloc(len + 1);
        do {
                if (idx)
@@ -1920,14 +2093,14 @@ on_error:
        return fret;
 }
 
-static bool cgfsng_attach(struct cgroup_ops *ops, const char *name,
-                         const char *lxcpath, pid_t pid)
+__cgfsng_ops static bool cgfsng_attach(struct cgroup_ops *ops, const char *name,
+                                        const char *lxcpath, pid_t pid)
 {
        int i, len, ret;
-       char pidstr[25];
+       char pidstr[INTTYPE_TO_STRLEN(pid_t)];
 
-       len = snprintf(pidstr, 25, "%d", pid);
-       if (len < 0 || len >= 25)
+       len = snprintf(pidstr, sizeof(pidstr), "%d", pid);
+       if (len < 0 || (size_t)len >= sizeof(pidstr))
                return false;
 
        for (i = 0; ops->hierarchies[i]; i++) {
@@ -1967,8 +2140,9 @@ static bool cgfsng_attach(struct cgroup_ops *ops, const char *name,
  * don't have a cgroup_data set up, so we ask the running container through the
  * commands API for the cgroup path.
  */
-static int cgfsng_get(struct cgroup_ops *ops, const char *filename, char *value,
-                     size_t len, const char *name, const char *lxcpath)
+__cgfsng_ops static int cgfsng_get(struct cgroup_ops *ops, const char *filename,
+                                    char *value, size_t len, const char *name,
+                                    const char *lxcpath)
 {
        int ret = -1;
        size_t controller_len;
@@ -2005,8 +2179,9 @@ static int cgfsng_get(struct cgroup_ops *ops, const char *filename, char *value,
  * don't have a cgroup_data set up, so we ask the running container through the
  * commands API for the cgroup path.
  */
-static int cgfsng_set(struct cgroup_ops *ops, const char *filename,
-                     const char *value, const char *name, const char *lxcpath)
+__cgfsng_ops static int cgfsng_set(struct cgroup_ops *ops,
+                                    const char *filename, const char *value,
+                                    const char *name, const char *lxcpath)
 {
        int ret = -1;
        size_t controller_len;
@@ -2156,7 +2331,7 @@ static int cg_legacy_set_data(struct cgroup_ops *ops, const char *filename,
                return -ENOENT;
        }
 
-       fullpath = must_make_path(h->fullcgpath, filename, NULL);
+       fullpath = must_make_path(h->container_full_path, filename, NULL);
        ret = lxc_write_to_file(fullpath, value, strlen(value), false, 0666);
        free(fullpath);
        return ret;
@@ -2224,7 +2399,7 @@ static bool __cg_unified_setup_limits(struct cgroup_ops *ops,
                char *fullpath;
                struct lxc_cgroup *cg = iterator->elem;
 
-               fullpath = must_make_path(h->fullcgpath, cg->subsystem, NULL);
+               fullpath = must_make_path(h->container_full_path, cg->subsystem, NULL);
                ret = lxc_write_to_file(fullpath, cg->value, strlen(cg->value), false, 0666);
                free(fullpath);
                if (ret < 0) {
@@ -2239,8 +2414,9 @@ static bool __cg_unified_setup_limits(struct cgroup_ops *ops,
        return true;
 }
 
-static bool cgfsng_setup_limits(struct cgroup_ops *ops, struct lxc_conf *conf,
-                               bool do_devices)
+__cgfsng_ops static bool cgfsng_setup_limits(struct cgroup_ops *ops,
+                                              struct lxc_conf *conf,
+                                              bool do_devices)
 {
        bool bret;
 
@@ -2251,14 +2427,41 @@ static bool cgfsng_setup_limits(struct cgroup_ops *ops, struct lxc_conf *conf,
        return __cg_unified_setup_limits(ops, &conf->cgroup2);
 }
 
+static bool cgroup_use_wants_controllers(const struct cgroup_ops *ops,
+                                      char **controllers)
+{
+       char **cur_ctrl, **cur_use;
+
+       if (!ops->cgroup_use)
+               return true;
+
+       for (cur_ctrl = controllers; cur_ctrl && *cur_ctrl; cur_ctrl++) {
+               bool found = false;
+
+               for (cur_use = ops->cgroup_use; cur_use && *cur_use; cur_use++) {
+                       if (strcmp(*cur_use, *cur_ctrl) != 0)
+                               continue;
+
+                       found = true;
+                       break;
+               }
+
+               if (found)
+                       continue;
+
+               return false;
+       }
+
+       return true;
+}
+
 /* At startup, parse_hierarchies finds all the info we need about cgroup
  * mountpoints and current cgroups, and stores it in @d.
  */
-static bool cg_hybrid_init(struct cgroup_ops *ops)
+static bool cg_hybrid_init(struct cgroup_ops *ops, bool relative)
 {
        int ret;
        char *basecginfo;
-       bool will_escape;
        FILE *f;
        size_t len = 0;
        char *line = NULL;
@@ -2267,8 +2470,7 @@ static bool cg_hybrid_init(struct cgroup_ops *ops)
        /* Root spawned containers escape the current cgroup, so use init's
         * cgroups as our base in that case.
         */
-       will_escape = (geteuid() == 0);
-       if (will_escape)
+       if (!relative && (geteuid() == 0))
                basecginfo = read_file("/proc/1/cgroup");
        else
                basecginfo = read_file("/proc/self/cgroup");
@@ -2366,6 +2568,10 @@ static bool cg_hybrid_init(struct cgroup_ops *ops)
                        }
                }
 
+               /* Exclude all controllers that cgroup use does not want. */
+               if (!cgroup_use_wants_controllers(ops, controller_list))
+                       goto next;
+
                new = add_hierarchy(&ops->hierarchies, controller_list, mountpoint, base_cgroup, type);
                if (type == CGROUP2_SUPER_MAGIC && !ops->unified)
                        ops->unified = new;
@@ -2415,14 +2621,12 @@ static int cg_is_pure_unified(void)
 }
 
 /* Get current cgroup from /proc/self/cgroup for the cgroupfs v2 hierarchy. */
-static char *cg_unified_get_current_cgroup(void)
+static char *cg_unified_get_current_cgroup(bool relative)
 {
        char *basecginfo, *base_cgroup;
-       bool will_escape;
        char *copy = NULL;
 
-       will_escape = (geteuid() == 0);
-       if (will_escape)
+       if (!relative && (geteuid() == 0))
                basecginfo = read_file("/proc/1/cgroup");
        else
                basecginfo = read_file("/proc/self/cgroup");
@@ -2446,7 +2650,7 @@ cleanup_on_err:
        return copy;
 }
 
-static int cg_unified_init(struct cgroup_ops *ops)
+static int cg_unified_init(struct cgroup_ops *ops, bool relative)
 {
        int ret;
        char *mountpoint, *subtree_path;
@@ -2460,7 +2664,7 @@ static int cg_unified_init(struct cgroup_ops *ops)
        if (ret != CGROUP2_SUPER_MAGIC)
                return 0;
 
-       base_cgroup = cg_unified_get_current_cgroup();
+       base_cgroup = cg_unified_get_current_cgroup(relative);
        if (!base_cgroup)
                return -EINVAL;
        prune_init_scope(base_cgroup);
@@ -2492,26 +2696,37 @@ static int cg_unified_init(struct cgroup_ops *ops)
        return CGROUP2_SUPER_MAGIC;
 }
 
-static bool cg_init(struct cgroup_ops *ops)
+static bool cg_init(struct cgroup_ops *ops, struct lxc_conf *conf)
 {
        int ret;
        const char *tmp;
+       bool relative = conf->cgroup_meta.relative;
 
        tmp = lxc_global_config_value("lxc.cgroup.use");
-       if (tmp)
-               ops->cgroup_use = must_copy_string(tmp);
+       if (tmp) {
+               char *chop, *cur, *pin;
+
+               pin = must_copy_string(tmp);
+               chop = pin;
+
+               lxc_iterate_parts(cur, chop, ",") {
+                       must_append_string(&ops->cgroup_use, cur);
+               }
+
+               free(pin);
+       }
 
-       ret = cg_unified_init(ops);
+       ret = cg_unified_init(ops, relative);
        if (ret < 0)
                return false;
 
        if (ret == CGROUP2_SUPER_MAGIC)
                return true;
 
-       return cg_hybrid_init(ops);
+       return cg_hybrid_init(ops, relative);
 }
 
-static bool cgfsng_data_init(struct cgroup_ops *ops)
+__cgfsng_ops static bool cgfsng_data_init(struct cgroup_ops *ops)
 {
        const char *cgroup_pattern;
 
@@ -2523,11 +2738,12 @@ static bool cgfsng_data_init(struct cgroup_ops *ops)
                return false;
        }
        ops->cgroup_pattern = must_copy_string(cgroup_pattern);
+       ops->monitor_pattern = MONITOR_CGROUP;
 
        return true;
 }
 
-struct cgroup_ops *cgfsng_ops_init(void)
+struct cgroup_ops *cgfsng_ops_init(struct lxc_conf *conf)
 {
        struct cgroup_ops *cgfsng_ops;
 
@@ -2538,15 +2754,18 @@ struct cgroup_ops *cgfsng_ops_init(void)
        memset(cgfsng_ops, 0, sizeof(struct cgroup_ops));
        cgfsng_ops->cgroup_layout = CGROUP_LAYOUT_UNKNOWN;
 
-       if (!cg_init(cgfsng_ops)) {
+       if (!cg_init(cgfsng_ops, conf)) {
                free(cgfsng_ops);
                return NULL;
        }
 
        cgfsng_ops->data_init = cgfsng_data_init;
-       cgfsng_ops->destroy = cgfsng_destroy;
-       cgfsng_ops->create = cgfsng_create;
-       cgfsng_ops->enter = cgfsng_enter;
+       cgfsng_ops->payload_destroy = cgfsng_payload_destroy;
+       cgfsng_ops->monitor_destroy = cgfsng_monitor_destroy;
+       cgfsng_ops->monitor_create = cgfsng_monitor_create;
+       cgfsng_ops->monitor_enter = cgfsng_monitor_enter;
+       cgfsng_ops->payload_create = cgfsng_payload_create;
+       cgfsng_ops->payload_enter = cgfsng_payload_enter;
        cgfsng_ops->escape = cgfsng_escape;
        cgfsng_ops->num_hierarchies = cgfsng_num_hierarchies;
        cgfsng_ops->get_hierarchies = cgfsng_get_hierarchies;