]> git.proxmox.com Git - mirror_lxc.git/blobdiff - src/lxc/cgroups/cgfsng.c
mainloop: add io_uring support
[mirror_lxc.git] / src / lxc / cgroups / cgfsng.c
index 7d8fec5e850a3ef9e48cfd1b9eb8af159c4bbdff..46754217ccfaa583fe557fac7f6f86a73045d85a 100644 (file)
@@ -425,7 +425,7 @@ static int cgroup_hierarchy_add(struct cgroup_ops *ops, int dfd_mnt, char *mnt,
        int idx;
 
        if (abspath(base_cgroup))
-               return syserrno_set(-EINVAL, "Container base path must be relative to controller mount");
+               return syserror_set(-EINVAL, "Container base path must be relative to controller mount");
 
        new = zalloc(sizeof(*new));
        if (!new)
@@ -685,29 +685,29 @@ static bool cpuset1_initialize(int dfd_base, int dfd_next)
         */
        bytes = lxc_readat(dfd_base, "cgroup.clone_children", &v, 1);
        if (bytes < 0)
-               return syserrno(false, "Failed to read file %d(cgroup.clone_children)", dfd_base);
+               return syserror_ret(false, "Failed to read file %d(cgroup.clone_children)", dfd_base);
 
        /*
        * Initialize cpuset.cpus and make remove any isolated
        * and offline cpus.
         */
        if (!cpuset1_cpus_initialize(dfd_base, dfd_next, v == '1'))
-               return syserrno(false, "Failed to initialize cpuset.cpus");
+               return syserror_ret(false, "Failed to initialize cpuset.cpus");
 
        /* Read cpuset.mems from parent... */
        bytes = lxc_readat(dfd_base, "cpuset.mems", mems, sizeof(mems));
        if (bytes < 0)
-               return syserrno(false, "Failed to read file %d(cpuset.mems)", dfd_base);
+               return syserror_ret(false, "Failed to read file %d(cpuset.mems)", dfd_base);
 
        /* ... and copy to first cgroup in the tree... */
        bytes = lxc_writeat(dfd_next, "cpuset.mems", mems, bytes);
        if (bytes < 0)
-               return syserrno(false, "Failed to write %d(cpuset.mems)", dfd_next);
+               return syserror_ret(false, "Failed to write %d(cpuset.mems)", dfd_next);
 
        /* ... and finally turn on cpuset inheritance. */
        bytes = lxc_writeat(dfd_next, "cgroup.clone_children", "1", 1);
        if (bytes < 0)
-               return syserrno(false, "Failed to write %d(cgroup.clone_children)", dfd_next);
+               return syserror_ret(false, "Failed to write %d(cgroup.clone_children)", dfd_next);
 
        return log_trace(true, "Initialized cpuset in the legacy hierarchy");
 }
@@ -736,15 +736,15 @@ static int __cgroup_tree_create(int dfd_base, const char *path, mode_t mode,
                 * absolute nor walks upwards.
                 */
                if (abspath(cur))
-                       return syserrno_set(-EINVAL, "No absolute paths allowed");
+                       return syserror_set(-EINVAL, "No absolute paths allowed");
 
                if (strnequal(cur, "..", STRLITERALLEN("..")))
-                       return syserrno_set(-EINVAL, "No upward walking paths allowed");
+                       return syserror_set(-EINVAL, "No upward walking paths allowed");
 
                ret = mkdirat(dfd_cur, cur, mode);
                if (ret < 0) {
                        if (errno != EEXIST)
-                               return syserrno(-errno, "Failed to create %d(%s)", dfd_cur, cur);
+                               return syserror("Failed to create %d(%s)", dfd_cur, cur);
 
                        ret = -EEXIST;
                }
@@ -752,12 +752,12 @@ static int __cgroup_tree_create(int dfd_base, const char *path, mode_t mode,
 
                dfd_final = open_at(dfd_cur, cur, PROTECT_OPATH_DIRECTORY, PROTECT_LOOKUP_BENEATH, 0);
                if (dfd_final < 0)
-                       return syserrno(-errno, "Fail to open%s directory %d(%s)",
+                       return syserror("Fail to open%s directory %d(%s)",
                                        !ret ? " newly created" : "", dfd_base, cur);
                if (dfd_cur != dfd_base)
                        close(dfd_cur);
                else if (cpuset_v1 && !cpuset1_initialize(dfd_base, dfd_final))
-                       return syserrno(-EINVAL, "Failed to initialize cpuset controller in the legacy hierarchy");
+                       return syserror_set(-EINVAL, "Failed to initialize cpuset controller in the legacy hierarchy");
                /*
                 * Leave dfd_final pointing to the last fd we opened so
                 * it will be automatically zapped if we return early.
@@ -768,7 +768,7 @@ static int __cgroup_tree_create(int dfd_base, const char *path, mode_t mode,
        /* The final cgroup must be succesfully creatd by us. */
        if (ret) {
                if (ret != -EEXIST || !eexist_ignore)
-                       return syserrno_set(ret, "Creating the final cgroup %d(%s) failed", dfd_base, path);
+                       return syswarn_set(ret, "Creating the final cgroup %d(%s) failed", dfd_base, path);
        }
 
        return move_fd(dfd_final);
@@ -779,7 +779,6 @@ static bool cgroup_tree_create(struct cgroup_ops *ops, struct lxc_conf *conf,
                               const char *cgroup_leaf, bool payload)
 {
        __do_close int fd_limit = -EBADF, fd_final = -EBADF;
-       __do_free char *path = NULL, *limit_path = NULL;
        bool cpuset_v1 = false;
 
        /*
@@ -792,10 +791,13 @@ static bool cgroup_tree_create(struct cgroup_ops *ops, struct lxc_conf *conf,
                /* With isolation both parts need to not already exist. */
                fd_limit = __cgroup_tree_create(h->dfd_base, cgroup_limit_dir, 0755, cpuset_v1, false);
                if (fd_limit < 0)
-                       return syserrno(false, "Failed to create limiting cgroup %d(%s)", h->dfd_base, cgroup_limit_dir);
+                       return syswarn_ret(false, "Failed to create limiting cgroup %d(%s)", h->dfd_base, cgroup_limit_dir);
+
+               h->path_lim = make_cgroup_path(h, h->at_base, cgroup_limit_dir, NULL);
+               h->dfd_lim = move_fd(fd_limit);
 
                TRACE("Created limit cgroup %d->%d(%s)",
-                     fd_limit, h->dfd_base, cgroup_limit_dir);
+                     h->dfd_lim, h->dfd_base, cgroup_limit_dir);
 
                /*
                 * With isolation the devices legacy cgroup needs to be
@@ -805,46 +807,38 @@ static bool cgroup_tree_create(struct cgroup_ops *ops, struct lxc_conf *conf,
                 */
                if (string_in_list(h->controllers, "devices") &&
                    !ops->setup_limits_legacy(ops, conf, true))
-                       return log_error(false, "Failed to setup legacy device limits");
-
-               limit_path = make_cgroup_path(h, h->at_base, cgroup_limit_dir, NULL);
-               path = must_make_path(limit_path, cgroup_leaf, NULL);
+                       return log_warn(false, "Failed to setup legacy device limits");
 
                /*
                 * If we use a separate limit cgroup, the leaf cgroup, i.e. the
                 * cgroup the container actually resides in, is below fd_limit.
                 */
-               fd_final = __cgroup_tree_create(fd_limit, cgroup_leaf, 0755, cpuset_v1, false);
+               fd_final = __cgroup_tree_create(h->dfd_lim, cgroup_leaf, 0755, cpuset_v1, false);
                if (fd_final < 0) {
                        /* Ensure we don't leave any garbage behind. */
                        if (cgroup_tree_prune(h->dfd_base, cgroup_limit_dir))
                                SYSWARN("Failed to destroy %d(%s)", h->dfd_base, cgroup_limit_dir);
                        else
                                TRACE("Removed cgroup tree %d(%s)", h->dfd_base, cgroup_limit_dir);
+                       return syswarn_ret(false, "Failed to create %s cgroup %d(%s)", payload ? "payload" : "monitor", h->dfd_base, cgroup_limit_dir);
                }
-       } else {
-               path = make_cgroup_path(h, h->at_base, cgroup_limit_dir, NULL);
+               h->dfd_con = move_fd(fd_final);
+               h->path_con = must_make_path(h->path_lim, cgroup_leaf, NULL);
 
+       } else {
                fd_final = __cgroup_tree_create(h->dfd_base, cgroup_limit_dir, 0755, cpuset_v1, false);
-       }
-       if (fd_final < 0)
-               return syserrno(false, "Failed to create %s cgroup %d(%s)", payload ? "payload" : "monitor", h->dfd_base, cgroup_limit_dir);
+               if (fd_final < 0)
+                       return syswarn_ret(false, "Failed to create %s cgroup %d(%s)", payload ? "payload" : "monitor", h->dfd_base, cgroup_limit_dir);
 
-       if (payload) {
-               h->dfd_con = move_fd(fd_final);
-               h->path_con = move_ptr(path);
-
-               if (fd_limit < 0)
+               if (payload) {
+                       h->dfd_con = move_fd(fd_final);
                        h->dfd_lim = h->dfd_con;
-               else
-                       h->dfd_lim = move_fd(fd_limit);
+                       h->path_con = make_cgroup_path(h, h->at_base, cgroup_limit_dir, NULL);
 
-               if (limit_path)
-                       h->path_lim = move_ptr(limit_path);
-               else
                        h->path_lim = h->path_con;
-       } else {
-               h->dfd_mon = move_fd(fd_final);
+               } else {
+                       h->dfd_mon = move_fd(fd_final);
+               }
        }
 
        return true;
@@ -1339,7 +1333,7 @@ static int chown_cgroup_wrapper(void *data)
                int dirfd = arg->hierarchies[i]->dfd_con;
 
                if (dirfd < 0)
-                       return syserrno_set(-EBADF, "Invalid cgroup file descriptor");
+                       return syserror_set(-EBADF, "Invalid cgroup file descriptor");
 
                (void)fchowmodat(dirfd, "", destuid, nsgid, 0775);
 
@@ -1695,8 +1689,8 @@ __cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops,
                dfd_mnt_unified = open_at(rootfs->dfd_mnt, DEFAULT_CGROUP_MOUNTPOINT_RELATIVE,
                                          PROTECT_OPATH_DIRECTORY, PROTECT_LOOKUP_BENEATH_XDEV, 0);
                if (dfd_mnt_unified < 0)
-                       return syserrno(-errno, "Failed to open %d(%s)", rootfs->dfd_mnt,
-                                       DEFAULT_CGROUP_MOUNTPOINT_RELATIVE);
+                       return syserror_ret(false, "Failed to open %d(%s)",
+                                           rootfs->dfd_mnt, DEFAULT_CGROUP_MOUNTPOINT_RELATIVE);
                /*
                 * If cgroup namespaces are supported but the container will
                 * not have CAP_SYS_ADMIN after it has started we need to mount
@@ -1729,7 +1723,7 @@ __cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops,
                         */
                        ret = cgroupfs_mount(cgroup_automount_type, ops->unified, rootfs, dfd_mnt_unified, "");
                        if (ret < 0)
-                               return syserrno(false, "Failed to force mount cgroup filesystem in cgroup namespace");
+                               return syserror_ret(false, "Failed to force mount cgroup filesystem in cgroup namespace");
 
                        return log_trace(true, "Force mounted cgroup filesystem in new cgroup namespace");
                } else {
@@ -1760,7 +1754,7 @@ __cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops,
                        }
                }
 
-               return syserrno(false, "Failed to mount cgroups");
+               return syserror_ret(false, "Failed to mount cgroups");
        }
 
        /*
@@ -1798,8 +1792,8 @@ __cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops,
        dfd_mnt_tmpfs = open_at(rootfs->dfd_mnt, DEFAULT_CGROUP_MOUNTPOINT_RELATIVE,
                                PROTECT_OPATH_DIRECTORY, PROTECT_LOOKUP_BENEATH_XDEV, 0);
        if (dfd_mnt_tmpfs < 0)
-               return syserrno(-errno, "Failed to open %d(%s)", rootfs->dfd_mnt,
-                               DEFAULT_CGROUP_MOUNTPOINT_RELATIVE);
+               return syserror_ret(false, "Failed to open %d(%s)",
+                                   rootfs->dfd_mnt, DEFAULT_CGROUP_MOUNTPOINT_RELATIVE);
 
        for (int i = 0; ops->hierarchies[i]; i++) {
                __do_free char *hierarchy_mnt = NULL, *path2 = NULL;
@@ -1807,7 +1801,7 @@ __cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops,
 
                ret = mkdirat(dfd_mnt_tmpfs, h->at_mnt, 0000);
                if (ret < 0)
-                       return syserrno(false, "Failed to create cgroup at_mnt %d(%s)", dfd_mnt_tmpfs, h->at_mnt);
+                       return syserror_ret(false, "Failed to create cgroup at_mnt %d(%s)", dfd_mnt_tmpfs, h->at_mnt);
 
                if (in_cgroup_ns && wants_force_mount) {
                        /*
@@ -1910,7 +1904,7 @@ __cgfsng_ops static bool cgfsng_criu_get_hierarchies(struct cgroup_ops *ops,
        if (!ops->hierarchies)
                return ret_set_errno(false, ENOENT);
 
-       /* sanity check n */
+       /* consistency check n */
        for (i = 0; i < n; i++)
                if (!ops->hierarchies[i])
                        return ret_set_errno(false, ENOENT);
@@ -1933,7 +1927,7 @@ static int cg_legacy_freeze(struct cgroup_ops *ops)
 }
 
 static int freezer_cgroup_events_cb(int fd, uint32_t events, void *cbdata,
-                                   struct lxc_epoll_descr *descr)
+                                   struct lxc_async_descr *descr)
 {
        __do_free char *line = NULL;
        __do_fclose FILE *f = NULL;
@@ -1966,9 +1960,9 @@ static int cg_unified_freeze_do(struct cgroup_ops *ops, int timeout,
                                const char *wait_error)
 {
        __do_close int fd = -EBADF;
-       call_cleaner(lxc_mainloop_close) struct lxc_epoll_descr *descr_ptr = NULL;
+       call_cleaner(lxc_mainloop_close) struct lxc_async_descr *descr_ptr = NULL;
        int ret;
-       struct lxc_epoll_descr descr;
+       struct lxc_async_descr descr;
        struct hierarchy *h;
 
        h = ops->unified;
@@ -1993,7 +1987,11 @@ static int cg_unified_freeze_do(struct cgroup_ops *ops, int timeout,
                /* automatically cleaned up now */
                descr_ptr = &descr;
 
-               ret = lxc_mainloop_add_handler_events(&descr, fd, EPOLLPRI, freezer_cgroup_events_cb, INT_TO_PTR(state_num));
+               ret = lxc_mainloop_add_handler_events(&descr, fd, EPOLLPRI,
+                                                     freezer_cgroup_events_cb,
+                                                     default_cleanup_handler,
+                                                     INT_TO_PTR(state_num),
+                                                     "freezer_cgroup_events_cb");
                if (ret < 0)
                        return log_error_errno(-1, errno, "Failed to add cgroup.events fd handler to mainloop");
        }
@@ -2211,16 +2209,13 @@ static int cgroup_attach_move_into_leaf(const struct lxc_conf *conf,
                                        int *sk_fd, pid_t pid)
 {
        __do_close int sk = *sk_fd, target_fd0 = -EBADF, target_fd1 = -EBADF;
-       int target_fds[2];
        char pidstr[INTTYPE_TO_STRLEN(int64_t) + 1];
        size_t pidstr_len;
        ssize_t ret;
 
-       ret = lxc_abstract_unix_recv_two_fds(sk, target_fds);
+       ret = lxc_abstract_unix_recv_two_fds(sk, &target_fd0, &target_fd1);
        if (ret < 0)
                return log_error_errno(-1, errno, "Failed to receive target cgroup fd");
-       target_fd0 = target_fds[0];
-       target_fd1 = target_fds[1];
 
        pidstr_len = sprintf(pidstr, INT64_FMT, (int64_t)pid);
 
@@ -2293,7 +2288,7 @@ static int __cg_unified_attach(const struct hierarchy *h,
        ret = cgroup_attach(conf, name, lxcpath, pid);
        if (ret == 0)
                return log_trace(0, "Attached to unified cgroup via command handler");
-       if (ret != -ENOCGROUP2)
+       if (!ERRNO_IS_NOT_SUPPORTED(ret) && ret != -ENOCGROUP2)
                return log_error_errno(ret, errno, "Failed to attach to unified cgroup");
 
        /* Fall back to retrieving the path for the unified cgroup. */
@@ -2363,9 +2358,17 @@ __cgfsng_ops static bool cgfsng_attach(struct cgroup_ops *ops,
                }
 
                path = lxc_cmd_get_cgroup_path(name, lxcpath, h->controllers[0]);
-               /* not running */
-               if (!path)
-                       return false;
+               if (!path) {
+                       /*
+                        * Someone might have created a name=<controller>
+                        * controller after the container has started and so
+                        * the container doesn't make use of this controller.
+                        *
+                        * Link: https://github.com/lxc/lxd/issues/8577
+                        */
+                       TRACE("Skipping unused %s controller", maybe_empty(h->controllers[0]));
+                       continue;
+               }
 
                fullpath = build_full_cgpath_from_monitorpath(h, path, "cgroup.procs");
                ret = lxc_write_to_file(fullpath, pidstr, len, false, 0666);
@@ -2787,7 +2790,7 @@ static int bpf_device_cgroup_prepare(struct cgroup_ops *ops,
        else
                ret = device_cgroup_rule_parse(&device_item, key, val);
        if (ret < 0)
-               return syserrno_set(EINVAL, "Failed to parse device rule %s=%s", key, val);
+               return syserror_set(EINVAL, "Failed to parse device rule %s=%s", key, val);
 
        /*
         * Note that bpf_list_add_device() returns 1 if it altered the device
@@ -2930,20 +2933,20 @@ static bool __cgfsng_delegate_controllers(struct cgroup_ops *ops, const char *cg
                 * absolute nor walks upwards.
                 */
                if (abspath(cur))
-                       return syserrno_set(-EINVAL, "No absolute paths allowed");
+                       return syserror_set(-EINVAL, "No absolute paths allowed");
 
                if (strnequal(cur, "..", STRLITERALLEN("..")))
-                       return syserrno_set(-EINVAL, "No upward walking paths allowed");
+                       return syserror_set(-EINVAL, "No upward walking paths allowed");
 
                ret = lxc_writeat(dfd_cur, "cgroup.subtree_control", add_controllers, full_len);
                if (ret < 0)
-                       return syserrno(-errno, "Could not enable \"%s\" controllers in the unified cgroup %d", add_controllers, dfd_cur);
+                       return syserror("Could not enable \"%s\" controllers in the unified cgroup %d", add_controllers, dfd_cur);
 
                TRACE("Enabled \"%s\" controllers in the unified cgroup %d", add_controllers, dfd_cur);
 
                dfd_final = open_at(dfd_cur, cur, PROTECT_OPATH_DIRECTORY, PROTECT_LOOKUP_BENEATH, 0);
                if (dfd_final < 0)
-                       return syserrno(-errno, "Fail to open directory %d(%s)", dfd_cur, cur);
+                       return syserror("Fail to open directory %d(%s)", dfd_cur, cur);
                if (dfd_cur != unified->dfd_base)
                        close(dfd_cur);
                /*
@@ -3030,7 +3033,7 @@ static int __list_cgroup_delegate(char ***delegate)
                }
 
                *delegate = move_ptr(list);
-               return syswarn(0, "Failed to read /sys/kernel/cgroup/delegate");
+               return syswarn_ret(0, "Failed to read /sys/kernel/cgroup/delegate");
        }
 
        lxc_iterate_parts(token, buf, " \t\n") {
@@ -3057,13 +3060,13 @@ static bool unified_hierarchy_delegated(int dfd_base, char ***ret_files)
 
        ret = __list_cgroup_delegate(&list);
        if (ret < 0)
-               return syserrno(ret, "Failed to determine unified cgroup delegation requirements");
+               return syserror_ret(ret, "Failed to determine unified cgroup delegation requirements");
 
        for (char *const *s = list; s && *s; s++) {
                if (!faccessat(dfd_base, *s, W_OK, 0) || errno == ENOENT)
                        continue;
 
-               return sysinfo(false, "The %s file is not writable, skipping unified hierarchy", *s);
+               return sysinfo_ret(false, "The %s file is not writable, skipping unified hierarchy", *s);
        }
 
        *ret_files = move_ptr(list);
@@ -3072,12 +3075,64 @@ static bool unified_hierarchy_delegated(int dfd_base, char ***ret_files)
 
 static bool legacy_hierarchy_delegated(int dfd_base)
 {
-       if (faccessat(dfd_base, "cgroup.procs", W_OK, 0) && errno != ENOENT)
-               return sysinfo(false, "The cgroup.procs file is not writable, skipping legacy hierarchy");
+       int ret;
+
+       ret = faccessat(dfd_base, ".", W_OK, 0);
+       if (ret < 0 && errno != ENOENT)
+               return sysinfo_ret(false, "Legacy hierarchy not writable, skipping");
 
        return true;
 }
 
+/**
+ * systemd guarantees that the order of co-mounted controllers is stable. On
+ * some systems the order of the controllers might be reversed though.
+ *
+ * For example, this is how the order is mismatched on CentOS 7:
+ *
+ *      [root@localhost ~]# cat /proc/self/cgroup
+ *      11:perf_event:/
+ *      10:pids:/
+ *      9:freezer:/
+ * >>>> 8:cpuacct,cpu:/
+ *      7:memory:/
+ *      6:blkio:/
+ *      5:devices:/
+ *      4:hugetlb:/
+ * >>>> 3:net_prio,net_cls:/
+ *      2:cpuset:/
+ *      1:name=systemd:/user.slice/user-0.slice/session-c1.scope
+ *
+ * whereas the mountpoint:
+ *
+ *      | |-/sys/fs/cgroup                    tmpfs         tmpfs      ro,nosuid,nodev,noexec,mode=755
+ *      | | |-/sys/fs/cgroup/systemd          cgroup        cgroup     rw,nosuid,nodev,noexec,relatime,xattr,release_agent=/usr/lib/systemd/systemd-cgroups-agent,name=systemd
+ *      | | |-/sys/fs/cgroup/cpuset           cgroup        cgroup     rw,nosuid,nodev,noexec,relatime,cpuset
+ * >>>> | | |-/sys/fs/cgroup/net_cls,net_prio cgroup        cgroup     rw,nosuid,nodev,noexec,relatime,net_prio,net_cls
+ *      | | |-/sys/fs/cgroup/hugetlb          cgroup        cgroup     rw,nosuid,nodev,noexec,relatime,hugetlb
+ *      | | |-/sys/fs/cgroup/devices          cgroup        cgroup     rw,nosuid,nodev,noexec,relatime,devices
+ *      | | |-/sys/fs/cgroup/blkio            cgroup        cgroup     rw,nosuid,nodev,noexec,relatime,blkio
+ *      | | |-/sys/fs/cgroup/memory           cgroup        cgroup     rw,nosuid,nodev,noexec,relatime,memory
+ * >>>> | | |-/sys/fs/cgroup/cpu,cpuacct      cgroup        cgroup     rw,nosuid,nodev,noexec,relatime,cpuacct,cpu
+ *      | | |-/sys/fs/cgroup/freezer          cgroup        cgroup     rw,nosuid,nodev,noexec,relatime,freezer
+ *      | | |-/sys/fs/cgroup/pids             cgroup        cgroup     rw,nosuid,nodev,noexec,relatime,pids
+ *      | | `-/sys/fs/cgroup/perf_event       cgroup        cgroup     rw,nosuid,nodev,noexec,relatime,perf_event
+ *
+ * Ensure that we always use the systemd-guaranteed stable order when checking
+ * for the mountpoint.
+ */
+__attribute__((returns_nonnull)) __attribute__((nonnull))
+static const char *stable_order(const char *controllers)
+{
+       if (strequal(controllers, "cpuacct,cpu"))
+               return "cpu,cpuacct";
+
+       if (strequal(controllers, "net_prio,net_cls"))
+               return "net_cls,net_prio";
+
+       return unprefix(controllers);
+}
+
 static int __initialize_cgroups(struct cgroup_ops *ops, bool relative,
                                bool unprivileged)
 {
@@ -3126,7 +3181,7 @@ static int __initialize_cgroups(struct cgroup_ops *ops, bool relative,
                        }
                        if (dfd_mnt < 0) {
                                if (errno != ENOENT)
-                                       return syserrno(-errno, "Failed to open %d/unified", ops->dfd_mnt);
+                                       return syserror("Failed to open %d/unified", ops->dfd_mnt);
 
                                SYSTRACE("Unified cgroup not mounted");
                                continue;
@@ -3137,8 +3192,15 @@ static int __initialize_cgroups(struct cgroup_ops *ops, bool relative,
                                dfd_base = open_at(dfd_mnt, current_cgroup,
                                                   PROTECT_OPATH_DIRECTORY,
                                                   PROTECT_LOOKUP_BENEATH_XDEV, 0);
-                               if (dfd_base < 0)
-                                       return syserrno(-errno, "Failed to open %d/%s", dfd_mnt, current_cgroup);
+                               if (dfd_base < 0) {
+                                       if (errno != ENOENT)
+                                               return syserror("Failed to open %d/%s",
+                                                               dfd_mnt, current_cgroup);
+
+                                       SYSTRACE("Current cgroup %d/%s does not exist (funky cgroup layout?)",
+                                                dfd_mnt, current_cgroup);
+                                       continue;
+                               }
                                dfd = dfd_base;
                        }
 
@@ -3150,7 +3212,7 @@ static int __initialize_cgroups(struct cgroup_ops *ops, bool relative,
                                TRACE("No controllers are enabled for delegation in the unified hierarchy");
                                controller_list = list_new();
                                if (!controller_list)
-                                       return syserrno(-ENOMEM, "Failed to create empty controller list");
+                                       return syserror_set(-ENOMEM, "Failed to create empty controller list");
                        }
 
                        controllers = strdup(unified_mnt);
@@ -3172,16 +3234,17 @@ static int __initialize_cgroups(struct cgroup_ops *ops, bool relative,
                        *__current_cgroup = '\0';
                        __current_cgroup++;
 
-                       controllers = strdup(unprefix(__controllers));
+                       controllers = strdup(stable_order(__controllers));
                        if (!controllers)
                                return ret_errno(ENOMEM);
 
                        dfd_mnt = open_at(ops->dfd_mnt,
-                                         controllers, PROTECT_OPATH_DIRECTORY,
+                                         controllers,
+                                         PROTECT_OPATH_DIRECTORY,
                                          PROTECT_LOOKUP_ABSOLUTE_XDEV, 0);
                        if (dfd_mnt < 0) {
                                if (errno != ENOENT)
-                                       return syserrno(-errno, "Failed to open %d/%s",
+                                       return syserror("Failed to open %d/%s",
                                                        ops->dfd_mnt, controllers);
 
                                SYSTRACE("%s not mounted", controllers);
@@ -3207,9 +3270,15 @@ static int __initialize_cgroups(struct cgroup_ops *ops, bool relative,
                                dfd_base = open_at(dfd_mnt, current_cgroup,
                                                   PROTECT_OPATH_DIRECTORY,
                                                   PROTECT_LOOKUP_BENEATH_XDEV, 0);
-                               if (dfd_base < 0)
-                                       return syserrno(-errno, "Failed to open %d/%s",
-                                                       dfd_mnt, current_cgroup);
+                               if (dfd_base < 0) {
+                                       if (errno != ENOENT)
+                                               return syserror("Failed to open %d/%s",
+                                                               dfd_mnt, current_cgroup);
+
+                                       SYSTRACE("Current cgroup %d/%s does not exist (funky cgroup layout?)",
+                                                dfd_mnt, current_cgroup);
+                                       continue;
+                               }
                                dfd = dfd_base;
                        }
 
@@ -3223,7 +3292,7 @@ static int __initialize_cgroups(struct cgroup_ops *ops, bool relative,
                         */
                        controller_list = list_add_controllers(__controllers);
                        if (!controller_list)
-                               return syserrno(-ENOMEM, "Failed to create controller list from %s", __controllers);
+                               return syserror_set(-ENOMEM, "Failed to create controller list from %s", __controllers);
 
                        if (skip_hierarchy(ops, controller_list))
                                continue;
@@ -3234,7 +3303,7 @@ static int __initialize_cgroups(struct cgroup_ops *ops, bool relative,
                ret = cgroup_hierarchy_add(ops, dfd_mnt, controllers, dfd,
                                           current_cgroup, controller_list, type);
                if (ret < 0)
-                       return syserrno(ret, "Failed to add %s hierarchy", controllers);
+                       return syserror_ret(ret, "Failed to add %s hierarchy", controllers);
 
                /* Transfer ownership. */
                move_fd(dfd_mnt);
@@ -3258,7 +3327,7 @@ static int __initialize_cgroups(struct cgroup_ops *ops, bool relative,
        }
 
        if (!controllers_available(ops))
-               return syserrno_set(-ENOENT, "One or more requested controllers unavailable or not delegated");
+               return syserror_set(-ENOENT, "One or more requested controllers unavailable or not delegated");
 
        return 0;
 }
@@ -3280,7 +3349,7 @@ static int initialize_cgroups(struct cgroup_ops *ops, struct lxc_conf *conf)
        dfd = open_at(-EBADF, DEFAULT_CGROUP_MOUNTPOINT,
                        PROTECT_OPATH_DIRECTORY, PROTECT_LOOKUP_ABSOLUTE_XDEV, 0);
        if (dfd < 0)
-               return syserrno(-errno, "Failed to open " DEFAULT_CGROUP_MOUNTPOINT);
+               return syserror("Failed to open " DEFAULT_CGROUP_MOUNTPOINT);
 
        controllers_use = lxc_global_config_value("lxc.cgroup.use");
        if (controllers_use) {
@@ -3307,7 +3376,7 @@ static int initialize_cgroups(struct cgroup_ops *ops, struct lxc_conf *conf)
 
        ret = __initialize_cgroups(ops, conf->cgroup_meta.relative, !lxc_list_empty(&conf->id_map));
        if (ret < 0)
-               return syserrno(ret, "Failed to initialize cgroups");
+               return syserror_ret(ret, "Failed to initialize cgroups");
 
        /* Transfer ownership to cgroup_ops. */
        move_fd(dfd);
@@ -3334,14 +3403,14 @@ __cgfsng_ops static int cgfsng_data_init(struct cgroup_ops *ops)
 
 struct cgroup_ops *cgroup_ops_init(struct lxc_conf *conf)
 {
-       __do_free struct cgroup_ops *cgfsng_ops = NULL;
+       __cleanup_cgroup_ops struct cgroup_ops *cgfsng_ops = NULL;
 
        cgfsng_ops = zalloc(sizeof(struct cgroup_ops));
        if (!cgfsng_ops)
                return ret_set_errno(NULL, ENOMEM);
 
-       cgfsng_ops->cgroup_layout = CGROUP_LAYOUT_UNKNOWN;
-       cgfsng_ops->dfd_mnt = -EBADF;
+       cgfsng_ops->cgroup_layout       = CGROUP_LAYOUT_UNKNOWN;
+       cgfsng_ops->dfd_mnt             = -EBADF;
 
        if (initialize_cgroups(cgfsng_ops, conf))
                return NULL;
@@ -3430,13 +3499,13 @@ static int __cgroup_attach_many(const struct lxc_conf *conf, const char *name,
                else
                        ret = lxc_writeat(dfd_con, "cgroup.procs", pidstr, pidstr_len);
                if (ret)
-                       return syserrno(ret, "Failed to attach to cgroup fd %d", dfd_con);
+                       return syserror_ret(ret, "Failed to attach to cgroup fd %d", dfd_con);
                else
                        TRACE("Attached to cgroup fd %d", dfd_con);
        }
 
        if (idx == 0)
-               return syserrno_set(-ENOENT, "Failed to attach to cgroups");
+               return syserror_set(-ENOENT, "Failed to attach to cgroups");
 
        TRACE("Attached to %s cgroup layout", cgroup_layout_name(ctx->layout));
        return 0;
@@ -3588,9 +3657,9 @@ static int do_cgroup_freeze(int unified_fd,
                            const char *wait_error)
 {
        __do_close int events_fd = -EBADF;
-       call_cleaner(lxc_mainloop_close) struct lxc_epoll_descr *descr_ptr = NULL;
+       call_cleaner(lxc_mainloop_close) struct lxc_async_descr *descr_ptr = NULL;
        int ret;
-       struct lxc_epoll_descr descr = {};
+       struct lxc_async_descr descr = {};
 
        if (timeout != 0) {
                ret = lxc_mainloop_open(&descr);
@@ -3604,7 +3673,11 @@ static int do_cgroup_freeze(int unified_fd,
                if (events_fd < 0)
                        return log_error_errno(-errno, errno, "Failed to open cgroup.events file");
 
-               ret = lxc_mainloop_add_handler_events(&descr, events_fd, EPOLLPRI, freezer_cgroup_events_cb, INT_TO_PTR(state_num));
+               ret = lxc_mainloop_add_handler_events(&descr, events_fd, EPOLLPRI,
+                                                     freezer_cgroup_events_cb,
+                                                     default_cleanup_handler,
+                                                     INT_TO_PTR(state_num),
+                                                     "freezer_cgroup_events_cb");
                if (ret < 0)
                        return log_error_errno(-1, errno, "Failed to add cgroup.events fd handler to mainloop");
        }