cgroups: use zalloc

[mirror_lxc.git] / src / lxc / cgroups / cgfsng.c
diff --git a/src/lxc/cgroups/cgfsng.c b/src/lxc/cgroups/cgfsng.c

index 31ad219c1d8193ce826dbef3d35451feee358e0a..d4c111b21e2b854d60a93cbd8642bf424294279c 100644 (file)
--- a/src/lxc/cgroups/cgfsng.c
+++ b/src/lxc/cgroups/cgfsng.c
@@ -145,7 +145,7 @@ static void must_append_controller(char **klist, char **nlist, char ***clist,
  /* Given a handler's cgroup data, return the struct hierarchy for the controller
   * @c, or NULL if there is none.
   */
-struct hierarchy *get_hierarchy(struct cgroup_ops *ops, const char *controller)
+static struct hierarchy *get_hierarchy(struct cgroup_ops *ops, const char *controller)
  {
         if (!ops->hierarchies)
                 return log_trace_errno(NULL, errno, "There are no useable cgroup controllers");
@@ -153,15 +153,28 @@ struct hierarchy *get_hierarchy(struct cgroup_ops *ops, const char *controller)
         for (int i = 0; ops->hierarchies[i]; i++) {
                 if (!controller) {
                         /* This is the empty unified hierarchy. */
-                       if (ops->hierarchies[i]->controllers &&
-                           !ops->hierarchies[i]->controllers[0])
+                       if (ops->hierarchies[i]->controllers && !ops->hierarchies[i]->controllers[0])
                                 return ops->hierarchies[i];
+
                         continue;
-               } else if (pure_unified_layout(ops) &&
-                          strcmp(controller, "devices") == 0) {
-                       if (ops->unified->bpf_device_controller)
-                               return ops->unified;
-                       break;
+               }
+
+               /*
+                * Handle controllers with significant implementation changes
+                * from cgroup to cgroup2.
+                */
+               if (pure_unified_layout(ops)) {
+                       if (strcmp(controller, "devices") == 0) {
+                               if (ops->unified->bpf_device_controller)
+                                       return ops->unified;
+
+                               break;
+                       } else if (strcmp(controller, "freezer") == 0) {
+                               if (ops->unified->freezer_controller)
+                                       return ops->unified;
+
+                               break;
+                       }
                 }
  
                 if (string_in_list(ops->hierarchies[i]->controllers, controller))
@@ -176,45 +189,6 @@ struct hierarchy *get_hierarchy(struct cgroup_ops *ops, const char *controller)
         return ret_set_errno(NULL, ENOENT);
  }
  
-#define BATCH_SIZE 50
-static void batch_realloc(char **mem, size_t oldlen, size_t newlen)
-{
-       int newbatches = (newlen / BATCH_SIZE) + 1;
-       int oldbatches = (oldlen / BATCH_SIZE) + 1;
-
-       if (!*mem || newbatches > oldbatches)
-               *mem = must_realloc(*mem, newbatches * BATCH_SIZE);
-}
-
-static void append_line(char **dest, size_t oldlen, char *new, size_t newlen)
-{
-       size_t full = oldlen + newlen;
-
-       batch_realloc(dest, oldlen, full + 1);
-
-       memcpy(*dest + oldlen, new, newlen + 1);
-}
-
-/* Slurp in a whole file */
-static char *read_file(const char *fnam)
-{
-       __do_free char *buf = NULL, *line = NULL;
-       __do_fclose FILE *f = NULL;
-       size_t len = 0, fulllen = 0;
-       int linelen;
-
-       f = fopen(fnam, "re");
-       if (!f)
-               return NULL;
-
-       while ((linelen = getline(&line, &len, f)) != -1) {
-               append_line(&buf, fulllen, line, linelen);
-               fulllen += linelen;
-       }
-
-       return move_ptr(buf);
-}
-
  /* Taken over modified from the kernel sources. */
  #define NBITS 32 /* bits in uint32_t */
  #define DIV_ROUND_UP(n, d) (((n) + (d)-1) / (d))
@@ -350,7 +324,7 @@ static bool cg_legacy_filter_and_set_cpus(const char *parent_cgroup,
         bool flipped_bit = false;
  
         fpath = must_make_path(parent_cgroup, "cpuset.cpus", NULL);
-       posscpus = read_file(fpath);
+       posscpus = read_file_at(-EBADF, fpath);
         if (!posscpus)
                 return log_error_errno(false, errno, "Failed to read file \"%s\"", fpath);
  
@@ -360,7 +334,7 @@ static bool cg_legacy_filter_and_set_cpus(const char *parent_cgroup,
                 return false;
  
         if (file_exists(__ISOL_CPUS)) {
-               isolcpus = read_file(__ISOL_CPUS);
+               isolcpus = read_file_at(-EBADF, __ISOL_CPUS);
                 if (!isolcpus)
                         return log_error_errno(false, errno, "Failed to read file \"%s\"", __ISOL_CPUS);
  
@@ -379,7 +353,7 @@ static bool cg_legacy_filter_and_set_cpus(const char *parent_cgroup,
         }
  
         if (file_exists(__OFFLINE_CPUS)) {
-               offlinecpus = read_file(__OFFLINE_CPUS);
+               offlinecpus = read_file_at(-EBADF, __OFFLINE_CPUS);
                 if (!offlinecpus)
                         return log_error_errno(false, errno, "Failed to read file \"%s\"", __OFFLINE_CPUS);
  
@@ -654,7 +628,7 @@ static char **cg_hybrid_get_controllers(char **klist, char **nlist, char *line,
          * verify /sys/fs/cgroup/ in this field.
          */
         if (strncmp(p, DEFAULT_CGROUP_MOUNTPOINT "/", 15) != 0)
-               return log_error(NULL, "Found hierarchy not under " DEFAULT_CGROUP_MOUNTPOINT ": \"%s\"", p);
+               return log_warn(NULL, "Found hierarchy not under " DEFAULT_CGROUP_MOUNTPOINT ": \"%s\"", p);
  
         p += 15;
         p2 = strchr(p, ' ');
@@ -691,14 +665,14 @@ static char **cg_unified_make_empty_controller(void)
         return move_ptr(aret);
  }
  
-static char **cg_unified_get_controllers(const char *file)
+static char **cg_unified_get_controllers(int dfd, const char *file)
  {
         __do_free char *buf = NULL;
         __do_free_string_list char **aret = NULL;
         char *sep = " \t\n";
         char *tok;
  
-       buf = read_file(file);
+       buf = read_file_at(dfd, file);
         if (!buf)
                 return NULL;
  
@@ -948,8 +922,7 @@ static void lxc_cgfsng_print_basecg_debuginfo(char *basecginfo, char **klist,
                 TRACE("named subsystem %d: %s", k, *it);
  }
  
-static int cgroup_tree_remove(struct hierarchy **hierarchies,
-                       const char *container_cgroup)
+static int cgroup_tree_remove(struct hierarchy **hierarchies, const char *container_cgroup)
  {
         if (!container_cgroup || !hierarchies)
                 return 0;
@@ -1028,7 +1001,7 @@ __cgfsng_ops static void cgfsng_payload_destroy(struct cgroup_ops *ops,
         }
  
  #ifdef HAVE_STRUCT_BPF_CGROUP_DEV_CTX
-       ret = bpf_program_cgroup_detach(handler->conf->cgroup2_devices);
+       ret = bpf_program_cgroup_detach(handler->cgroup_ops->cgroup2_devices);
         if (ret < 0)
                 WARN("Failed to detach bpf program from cgroup");
  #endif
@@ -1094,7 +1067,10 @@ __cgfsng_ops static void cgfsng_monitor_destroy(struct cgroup_ops *ops,
                         goto try_lxc_rm_rf;
                 }
  
-               if (conf && conf->cgroup_meta.monitor_dir)
+               if (conf && conf->cgroup_meta.monitor_pivot_dir)
+                       pivot_path = must_make_path(h->mountpoint, h->container_base_path,
+                                                   conf->cgroup_meta.monitor_pivot_dir, CGROUP_PIVOT, NULL);
+               else if (conf && conf->cgroup_meta.monitor_dir)
                         pivot_path = must_make_path(h->mountpoint, h->container_base_path,
                                                     conf->cgroup_meta.monitor_dir, CGROUP_PIVOT, NULL);
                 else if (conf && conf->cgroup_meta.dir)
@@ -1283,8 +1259,7 @@ static bool check_cgroup_dir_config(struct lxc_conf *conf)
         return true;
  }
  
-__cgfsng_ops static inline bool cgfsng_monitor_create(struct cgroup_ops *ops,
-                                                     struct lxc_handler *handler)
+__cgfsng_ops static bool cgfsng_monitor_create(struct cgroup_ops *ops, struct lxc_handler *handler)
  {
         __do_free char *monitor_cgroup = NULL, *__cgroup_tree = NULL;
         const char *cgroup_tree;
@@ -1372,8 +1347,7 @@ __cgfsng_ops static inline bool cgfsng_monitor_create(struct cgroup_ops *ops,
   * Try to create the same cgroup in all hierarchies. Start with cgroup_pattern;
   * next cgroup_pattern-1, -2, ..., -999.
   */
-__cgfsng_ops static inline bool cgfsng_payload_create(struct cgroup_ops *ops,
-                                                     struct lxc_handler *handler)
+__cgfsng_ops static bool cgfsng_payload_create(struct cgroup_ops *ops, struct lxc_handler *handler)
  {
         __do_free char *container_cgroup = NULL,
                        *__cgroup_tree = NULL,
@@ -1672,7 +1646,7 @@ __cgfsng_ops static bool cgfsng_chown(struct cgroup_ops *ops,
         return true;
  }
  
-__cgfsng_ops void cgfsng_payload_finalize(struct cgroup_ops *ops)
+__cgfsng_ops static void cgfsng_payload_finalize(struct cgroup_ops *ops)
  {
         if (!ops)
                 return;
@@ -1691,6 +1665,27 @@ __cgfsng_ops void cgfsng_payload_finalize(struct cgroup_ops *ops)
                 if (!is_unified_hierarchy(h))
                         close_prot_errno_disarm(h->cgfd_con);
         }
+
+       /*
+        * The checking for freezer support should obviously be done at cgroup
+        * initialization time but that doesn't work reliable. The freezer
+        * controller has been demoted (rightly so) to a simple file located in
+        * each non-root cgroup. At the time when the container is created we
+        * might still be located in /sys/fs/cgroup and so checking for
+        * cgroup.freeze won't tell us anything because this file doesn't exist
+        * in the root cgroup. We could then iterate through /sys/fs/cgroup and
+        * find an already existing cgroup and then check within that cgroup
+        * for the existence of cgroup.freeze but that will only work on
+        * systemd based hosts. Other init systems might not manage cgroups and
+        * so no cgroup will exist. So we defer until we have created cgroups
+        * for our container which means we check here.
+        */
+        if (pure_unified_layout(ops) &&
+            !faccessat(ops->unified->cgfd_con, "cgroup.freeze", F_OK,
+                       AT_SYMLINK_NOFOLLOW)) {
+               TRACE("Unified hierarchy supports freezer");
+               ops->unified->freezer_controller = 1;
+        }
  }
  
  /* cgroup-full:* is done, no need to create subdirs */
@@ -1832,10 +1827,7 @@ __cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops,
         }
  
         if (!wants_force_mount) {
-               if (!lxc_list_empty(&handler->conf->keepcaps))
-                       wants_force_mount = !in_caplist(CAP_SYS_ADMIN, &handler->conf->keepcaps);
-               else
-                       wants_force_mount = in_caplist(CAP_SYS_ADMIN, &handler->conf->caps);
+               wants_force_mount = !lxc_wants_cap(CAP_SYS_ADMIN, handler->conf);
  
                 /*
                  * Most recent distro versions currently have init system that
@@ -1874,10 +1866,26 @@ __cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops,
                 return cg_mount_cgroup_full(type, ops->unified, cgroup_root) == 0;
         }
  
-       /* mount tmpfs */
-       ret = safe_mount(NULL, cgroup_root, "tmpfs",
-                        MS_NOSUID | MS_NODEV | MS_NOEXEC | MS_RELATIME,
-                        "size=10240k,mode=755", root);
+       /*
+        * Mount a tmpfs over DEFAULT_CGROUP_MOUNTPOINT. Note that we're
+        * relying on RESOLVE_BENEATH so we need to skip the leading "/" in the
+        * DEFAULT_CGROUP_MOUNTPOINT define.
+        */
+       ret = safe_mount_beneath(root, NULL,
+                                DEFAULT_CGROUP_MOUNTPOINT_RELATIVE,
+                                "tmpfs",
+                                MS_NOSUID | MS_NODEV | MS_NOEXEC | MS_RELATIME,
+                                "size=10240k,mode=755");
+       if (ret < 0) {
+               if (errno != ENOSYS)
+                       return log_error_errno(false, errno,
+                                              "Failed to mount tmpfs on %s",
+                                              DEFAULT_CGROUP_MOUNTPOINT);
+
+               ret = safe_mount(NULL, cgroup_root, "tmpfs",
+                                MS_NOSUID | MS_NODEV | MS_NOEXEC | MS_RELATIME,
+                                "size=10240k,mode=755", root);
+       }
         if (ret < 0)
                 return false;
  
@@ -2211,14 +2219,22 @@ static int cgroup_attach_leaf(const struct lxc_conf *conf, int unified_fd, pid_t
  
         do {
                 bool rm = false;
-               char attach_cgroup[STRLITERALLEN(".lxc-1000/cgroup.procs") + 1];
-               char *slash;
+               char attach_cgroup[STRLITERALLEN(".lxc-/cgroup.procs") + INTTYPE_TO_STRLEN(int) + 1];
+               char *slash = attach_cgroup;
  
                 ret = snprintf(attach_cgroup, sizeof(attach_cgroup), ".lxc-%d/cgroup.procs", idx);
                 if (ret < 0 || (size_t)ret >= sizeof(attach_cgroup))
                         return ret_errno(EIO);
  
-               slash = &attach_cgroup[ret] - STRLITERALLEN("/cgroup.procs");
+               /*
+                * This shouldn't really happen but the compiler might complain
+                * that a short write would cause a buffer overrun. So be on
+                * the safe side.
+                */
+               if (ret < STRLITERALLEN(".lxc-/cgroup.procs"))
+                       return log_error_errno(-EINVAL, EINVAL, "Unexpected short write would cause buffer-overrun");
+
+               slash += (ret - STRLITERALLEN("/cgroup.procs"));
                 *slash = '\0';
  
                 ret = mkdirat(unified_fd, attach_cgroup, 0755);
@@ -2655,7 +2671,7 @@ __cgfsng_ops static int cgfsng_set(struct cgroup_ops *ops,
                 *p = '\0';
  
         if (pure_unified_layout(ops) && strcmp(controller, "devices") == 0) {
-               struct device_item device = {0};
+               struct device_item device = {};
  
                 ret = device_cgroup_rule_parse(&device, key, value);
                 if (ret < 0)
@@ -2733,9 +2749,6 @@ static int device_cgroup_rule_parse_devpath(struct device_item *device,
         if (device_cgroup_parse_access(device, mode) < 0)
                 return -1;
  
-       if (n_parts == 1)
-               return ret_set_errno(-1, EINVAL);
-
         ret = stat(path, &sb);
         if (ret < 0)
                 return ret_set_errno(-1, errno);
@@ -2762,7 +2775,7 @@ static int device_cgroup_rule_parse_devpath(struct device_item *device,
  
  static int convert_devpath(const char *invalue, char *dest)
  {
-       struct device_item device = {0};
+       struct device_item device = {};
         int ret;
  
         ret = device_cgroup_rule_parse_devpath(&device, invalue);
@@ -2882,7 +2895,7 @@ static int bpf_device_cgroup_prepare(struct cgroup_ops *ops,
                                      const char *val)
  {
  #ifdef HAVE_STRUCT_BPF_CGROUP_DEV_CTX
-       struct device_item device_item = {0};
+       struct device_item device_item = {};
         int ret;
  
         if (strcmp("devices.allow", key) == 0 && *val == '/')
@@ -2934,33 +2947,28 @@ __cgfsng_ops static bool cgfsng_setup_limits(struct cgroup_ops *ops,
                 struct lxc_cgroup *cg = iterator->elem;
                 int ret;
  
-               if (strncmp("devices", cg->subsystem, 7) == 0) {
-                       ret = bpf_device_cgroup_prepare(ops, conf, cg->subsystem,
-                                                       cg->value);
-               } else {
-                       ret = lxc_write_openat(h->container_limit_path,
-                                              cg->subsystem, cg->value,
-                                              strlen(cg->value));
-                       if (ret < 0)
-                               return log_error_errno(false, errno, "Failed to set \"%s\" to \"%s\"",
-                                                      cg->subsystem, cg->value);
-               }
+               if (strncmp("devices", cg->subsystem, 7) == 0)
+                       ret = bpf_device_cgroup_prepare(ops, conf, cg->subsystem, cg->value);
+               else
+                       ret = lxc_write_openat(h->container_limit_path, cg->subsystem, cg->value, strlen(cg->value));
+               if (ret < 0)
+                       return log_error_errno(false, errno, "Failed to set \"%s\" to \"%s\"", cg->subsystem, cg->value);
+
                 TRACE("Set \"%s\" to \"%s\"", cg->subsystem, cg->value);
         }
  
         return log_info(true, "Limits for the unified cgroup hierarchy have been setup");
  }
  
-__cgfsng_ops bool cgfsng_devices_activate(struct cgroup_ops *ops,
-                                         struct lxc_handler *handler)
+__cgfsng_ops static bool cgfsng_devices_activate(struct cgroup_ops *ops, struct lxc_handler *handler)
  {
  #ifdef HAVE_STRUCT_BPF_CGROUP_DEV_CTX
-       __do_bpf_program_free struct bpf_program *devices = NULL;
+       __do_bpf_program_free struct bpf_program *prog = NULL;
         int ret;
         struct lxc_conf *conf;
         struct hierarchy *unified;
         struct lxc_list *it;
-       struct bpf_program *devices_old;
+       struct bpf_program *prog_old;
  
         if (!ops)
                 return ret_set_errno(false, ENOENT);
@@ -2980,18 +2988,18 @@ __cgfsng_ops bool cgfsng_devices_activate(struct cgroup_ops *ops,
             !unified->container_full_path || lxc_list_empty(&conf->devices))
                 return true;
  
-       devices = bpf_program_new(BPF_PROG_TYPE_CGROUP_DEVICE);
-       if (!devices)
+       prog = bpf_program_new(BPF_PROG_TYPE_CGROUP_DEVICE);
+       if (!prog)
                 return log_error_errno(false, ENOMEM, "Failed to create new bpf program");
  
-       ret = bpf_program_init(devices);
+       ret = bpf_program_init(prog);
         if (ret)
                 return log_error_errno(false, ENOMEM, "Failed to initialize bpf program");
  
         lxc_list_for_each(it, &conf->devices) {
                 struct device_item *cur = it->elem;
  
-               ret = bpf_program_append_device(devices, cur);
+               ret = bpf_program_append_device(prog, cur);
                 if (ret)
                         return log_error_errno(false, ENOMEM, "Failed to add new rule to bpf device program: type %c, major %d, minor %d, access %s, allow %d, global_rule %d",
                                                cur->type,
@@ -3009,25 +3017,25 @@ __cgfsng_ops bool cgfsng_devices_activate(struct cgroup_ops *ops,
                       cur->global_rule);
         }
  
-       ret = bpf_program_finalize(devices);
+       ret = bpf_program_finalize(prog);
         if (ret)
                 return log_error_errno(false, ENOMEM, "Failed to finalize bpf program");
  
-       ret = bpf_program_cgroup_attach(devices, BPF_CGROUP_DEVICE,
+       ret = bpf_program_cgroup_attach(prog, BPF_CGROUP_DEVICE,
                                         unified->container_limit_path,
                                         BPF_F_ALLOW_MULTI);
         if (ret)
                 return log_error_errno(false, ENOMEM, "Failed to attach bpf program");
  
         /* Replace old bpf program. */
-       devices_old = move_ptr(conf->cgroup2_devices);
-       conf->cgroup2_devices = move_ptr(devices);
-       devices = move_ptr(devices_old);
+       prog_old = move_ptr(ops->cgroup2_devices);
+       ops->cgroup2_devices = move_ptr(prog);
+       prog = move_ptr(prog_old);
  #endif
         return true;
  }
  
-bool __cgfsng_delegate_controllers(struct cgroup_ops *ops, const char *cgroup)
+static bool __cgfsng_delegate_controllers(struct cgroup_ops *ops, const char *cgroup)
  {
         __do_free char *add_controllers = NULL, *base_path = NULL;
         __do_free_string_list char **parts = NULL;
@@ -3086,7 +3094,7 @@ bool __cgfsng_delegate_controllers(struct cgroup_ops *ops, const char *cgroup)
         return true;
  }
  
-__cgfsng_ops bool cgfsng_monitor_delegate_controllers(struct cgroup_ops *ops)
+__cgfsng_ops static bool cgfsng_monitor_delegate_controllers(struct cgroup_ops *ops)
  {
         if (!ops)
                 return ret_set_errno(false, ENOENT);
@@ -3094,7 +3102,7 @@ __cgfsng_ops bool cgfsng_monitor_delegate_controllers(struct cgroup_ops *ops)
         return __cgfsng_delegate_controllers(ops, ops->monitor_cgroup);
  }
  
-__cgfsng_ops bool cgfsng_payload_delegate_controllers(struct cgroup_ops *ops)
+__cgfsng_ops static bool cgfsng_payload_delegate_controllers(struct cgroup_ops *ops)
  {
         if (!ops)
                 return ret_set_errno(false, ENOENT);
@@ -3135,7 +3143,7 @@ static void cg_unified_delegate(char ***delegate)
         char *token;
         int idx;
  
-       buf = read_file("/sys/kernel/cgroup/delegate");
+       buf = read_file_at(-EBADF, "/sys/kernel/cgroup/delegate");
         if (!buf) {
                 for (char **p = standard; p && *p; p++) {
                         idx = append_null_to_list((void ***)delegate);
@@ -3173,9 +3181,9 @@ static int cg_hybrid_init(struct cgroup_ops *ops, bool relative, bool unprivileg
          * cgroups as our base in that case.
          */
         if (!relative && (geteuid() == 0))
-               basecginfo = read_file("/proc/1/cgroup");
+               basecginfo = read_file_at(-EBADF, "/proc/1/cgroup");
         else
-               basecginfo = read_file("/proc/self/cgroup");
+               basecginfo = read_file_at(-EBADF, "/proc/self/cgroup");
         if (!basecginfo)
                 return ret_set_errno(-1, ENOMEM);
  
@@ -3228,7 +3236,7 @@ static int cg_hybrid_init(struct cgroup_ops *ops, bool relative, bool unprivileg
  
                 mountpoint = cg_hybrid_get_mountpoint(line);
                 if (!mountpoint) {
-                       ERROR("Failed parsing mountpoint from \"%s\"", line);
+                       WARN("Failed parsing mountpoint from \"%s\"", line);
                         continue;
                 }
  
@@ -3237,7 +3245,7 @@ static int cg_hybrid_init(struct cgroup_ops *ops, bool relative, bool unprivileg
                 else
                         base_cgroup = cg_hybrid_get_current_cgroup(basecginfo, NULL, CGROUP2_SUPER_MAGIC);
                 if (!base_cgroup) {
-                       ERROR("Failed to find current cgroup");
+                       WARN("Failed to find current cgroup");
                         continue;
                 }
  
@@ -3259,7 +3267,7 @@ static int cg_hybrid_init(struct cgroup_ops *ops, bool relative, bool unprivileg
                                                         "cgroup.controllers",
                                                         NULL);
  
-                       controller_list = cg_unified_get_controllers(cgv2_ctrl_path);
+                       controller_list = cg_unified_get_controllers(-EBADF, cgv2_ctrl_path);
                         free(cgv2_ctrl_path);
                         if (!controller_list) {
                                 controller_list = cg_unified_make_empty_controller();
@@ -3302,9 +3310,9 @@ static char *cg_unified_get_current_cgroup(bool relative)
         char *base_cgroup;
  
         if (!relative && (geteuid() == 0))
-               basecginfo = read_file("/proc/1/cgroup");
+               basecginfo = read_file_at(-EBADF, "/proc/1/cgroup");
         else
-               basecginfo = read_file("/proc/self/cgroup");
+               basecginfo = read_file_at(-EBADF, "/proc/self/cgroup");
         if (!basecginfo)
                 return NULL;
  
@@ -3323,12 +3331,11 @@ static char *cg_unified_get_current_cgroup(bool relative)
  static int cg_unified_init(struct cgroup_ops *ops, bool relative,
                            bool unprivileged)
  {
-       __do_free char *subtree_path = NULL;
+       __do_close int cgroup_root_fd = -EBADF;
+       __do_free char *base_cgroup = NULL, *controllers_path = NULL;
         int ret;
-       char *mountpoint;
         char **delegatable;
         struct hierarchy *new;
-       char *base_cgroup = NULL;
  
         ret = unified_cgroup_hierarchy();
         if (ret == -ENOMEDIUM)
@@ -3343,14 +3350,18 @@ static int cg_unified_init(struct cgroup_ops *ops, bool relative,
         if (!relative)
                 prune_init_scope(base_cgroup);
  
+       cgroup_root_fd = openat(-EBADF, DEFAULT_CGROUP_MOUNTPOINT,
+                               O_NOCTTY | O_CLOEXEC | O_NOFOLLOW | O_DIRECTORY);
+       if (cgroup_root_fd < 0)
+               return -errno;
+
         /*
          * We assume that the cgroup we're currently in has been delegated to
          * us and we are free to further delege all of the controllers listed
          * in cgroup.controllers further down the hierarchy.
          */
-       mountpoint = must_copy_string(DEFAULT_CGROUP_MOUNTPOINT);
-       subtree_path = must_make_path(mountpoint, base_cgroup, "cgroup.controllers", NULL);
-       delegatable = cg_unified_get_controllers(subtree_path);
+       controllers_path = must_make_path_relative(base_cgroup, "cgroup.controllers", NULL);
+       delegatable = cg_unified_get_controllers(cgroup_root_fd, controllers_path);
         if (!delegatable)
                 delegatable = cg_unified_make_empty_controller();
         if (!delegatable[0])
@@ -3363,7 +3374,11 @@ static int cg_unified_init(struct cgroup_ops *ops, bool relative,
          * controllers per container.
          */
  
-       new = add_hierarchy(&ops->hierarchies, delegatable, mountpoint, base_cgroup, CGROUP2_SUPER_MAGIC);
+       new = add_hierarchy(&ops->hierarchies,
+                           delegatable,
+                           must_copy_string(DEFAULT_CGROUP_MOUNTPOINT),
+                           move_ptr(base_cgroup),
+                           CGROUP2_SUPER_MAGIC);
         if (unprivileged)
                 cg_unified_delegate(&new->cgroup2_chown);
  
@@ -3423,11 +3438,10 @@ struct cgroup_ops *cgfsng_ops_init(struct lxc_conf *conf)
  {
         __do_free struct cgroup_ops *cgfsng_ops = NULL;
  
-       cgfsng_ops = malloc(sizeof(struct cgroup_ops));
+       cgfsng_ops = zalloc(sizeof(struct cgroup_ops));
         if (!cgfsng_ops)
                 return ret_set_errno(NULL, ENOMEM);
  
-       memset(cgfsng_ops, 0, sizeof(struct cgroup_ops));
         cgfsng_ops->cgroup_layout = CGROUP_LAYOUT_UNKNOWN;
  
         if (cg_init(cgfsng_ops, conf))