mainloop: add io_uring support

[mirror_lxc.git] / src / lxc / cgroups / cgfsng.c
diff --git a/src/lxc/cgroups/cgfsng.c b/src/lxc/cgroups/cgfsng.c

index 7d8fec5e850a3ef9e48cfd1b9eb8af159c4bbdff..46754217ccfaa583fe557fac7f6f86a73045d85a 100644 (file)
--- a/src/lxc/cgroups/cgfsng.c
+++ b/src/lxc/cgroups/cgfsng.c
@@ -425,7 +425,7 @@ static int cgroup_hierarchy_add(struct cgroup_ops *ops, int dfd_mnt, char *mnt,
         int idx;
  
         if (abspath(base_cgroup))
-               return syserrno_set(-EINVAL, "Container base path must be relative to controller mount");
+               return syserror_set(-EINVAL, "Container base path must be relative to controller mount");
  
         new = zalloc(sizeof(*new));
         if (!new)
@@ -685,29 +685,29 @@ static bool cpuset1_initialize(int dfd_base, int dfd_next)
          */
         bytes = lxc_readat(dfd_base, "cgroup.clone_children", &v, 1);
         if (bytes < 0)
-               return syserrno(false, "Failed to read file %d(cgroup.clone_children)", dfd_base);
+               return syserror_ret(false, "Failed to read file %d(cgroup.clone_children)", dfd_base);
  
         /*
         * Initialize cpuset.cpus and make remove any isolated
         * and offline cpus.
          */
         if (!cpuset1_cpus_initialize(dfd_base, dfd_next, v == '1'))
-               return syserrno(false, "Failed to initialize cpuset.cpus");
+               return syserror_ret(false, "Failed to initialize cpuset.cpus");
  
         /* Read cpuset.mems from parent... */
         bytes = lxc_readat(dfd_base, "cpuset.mems", mems, sizeof(mems));
         if (bytes < 0)
-               return syserrno(false, "Failed to read file %d(cpuset.mems)", dfd_base);
+               return syserror_ret(false, "Failed to read file %d(cpuset.mems)", dfd_base);
  
         /* ... and copy to first cgroup in the tree... */
         bytes = lxc_writeat(dfd_next, "cpuset.mems", mems, bytes);
         if (bytes < 0)
-               return syserrno(false, "Failed to write %d(cpuset.mems)", dfd_next);
+               return syserror_ret(false, "Failed to write %d(cpuset.mems)", dfd_next);
  
         /* ... and finally turn on cpuset inheritance. */
         bytes = lxc_writeat(dfd_next, "cgroup.clone_children", "1", 1);
         if (bytes < 0)
-               return syserrno(false, "Failed to write %d(cgroup.clone_children)", dfd_next);
+               return syserror_ret(false, "Failed to write %d(cgroup.clone_children)", dfd_next);
  
         return log_trace(true, "Initialized cpuset in the legacy hierarchy");
  }
@@ -736,15 +736,15 @@ static int __cgroup_tree_create(int dfd_base, const char *path, mode_t mode,
                  * absolute nor walks upwards.
                  */
                 if (abspath(cur))
-                       return syserrno_set(-EINVAL, "No absolute paths allowed");
+                       return syserror_set(-EINVAL, "No absolute paths allowed");
  
                 if (strnequal(cur, "..", STRLITERALLEN("..")))
-                       return syserrno_set(-EINVAL, "No upward walking paths allowed");
+                       return syserror_set(-EINVAL, "No upward walking paths allowed");
  
                 ret = mkdirat(dfd_cur, cur, mode);
                 if (ret < 0) {
                         if (errno != EEXIST)
-                               return syserrno(-errno, "Failed to create %d(%s)", dfd_cur, cur);
+                               return syserror("Failed to create %d(%s)", dfd_cur, cur);
  
                         ret = -EEXIST;
                 }
@@ -752,12 +752,12 @@ static int __cgroup_tree_create(int dfd_base, const char *path, mode_t mode,
  
                 dfd_final = open_at(dfd_cur, cur, PROTECT_OPATH_DIRECTORY, PROTECT_LOOKUP_BENEATH, 0);
                 if (dfd_final < 0)
-                       return syserrno(-errno, "Fail to open%s directory %d(%s)",
+                       return syserror("Fail to open%s directory %d(%s)",
                                         !ret ? " newly created" : "", dfd_base, cur);
                 if (dfd_cur != dfd_base)
                         close(dfd_cur);
                 else if (cpuset_v1 && !cpuset1_initialize(dfd_base, dfd_final))
-                       return syserrno(-EINVAL, "Failed to initialize cpuset controller in the legacy hierarchy");
+                       return syserror_set(-EINVAL, "Failed to initialize cpuset controller in the legacy hierarchy");
                 /*
                  * Leave dfd_final pointing to the last fd we opened so
                  * it will be automatically zapped if we return early.
@@ -768,7 +768,7 @@ static int __cgroup_tree_create(int dfd_base, const char *path, mode_t mode,
         /* The final cgroup must be succesfully creatd by us. */
         if (ret) {
                 if (ret != -EEXIST || !eexist_ignore)
-                       return syserrno_set(ret, "Creating the final cgroup %d(%s) failed", dfd_base, path);
+                       return syswarn_set(ret, "Creating the final cgroup %d(%s) failed", dfd_base, path);
         }
  
         return move_fd(dfd_final);
@@ -779,7 +779,6 @@ static bool cgroup_tree_create(struct cgroup_ops *ops, struct lxc_conf *conf,
                                const char *cgroup_leaf, bool payload)
  {
         __do_close int fd_limit = -EBADF, fd_final = -EBADF;
-       __do_free char *path = NULL, *limit_path = NULL;
         bool cpuset_v1 = false;
  
         /*
@@ -792,10 +791,13 @@ static bool cgroup_tree_create(struct cgroup_ops *ops, struct lxc_conf *conf,
                 /* With isolation both parts need to not already exist. */
                 fd_limit = __cgroup_tree_create(h->dfd_base, cgroup_limit_dir, 0755, cpuset_v1, false);
                 if (fd_limit < 0)
-                       return syserrno(false, "Failed to create limiting cgroup %d(%s)", h->dfd_base, cgroup_limit_dir);
+                       return syswarn_ret(false, "Failed to create limiting cgroup %d(%s)", h->dfd_base, cgroup_limit_dir);
+
+               h->path_lim = make_cgroup_path(h, h->at_base, cgroup_limit_dir, NULL);
+               h->dfd_lim = move_fd(fd_limit);
  
                 TRACE("Created limit cgroup %d->%d(%s)",
-                     fd_limit, h->dfd_base, cgroup_limit_dir);
+                     h->dfd_lim, h->dfd_base, cgroup_limit_dir);
  
                 /*
                  * With isolation the devices legacy cgroup needs to be
@@ -805,46 +807,38 @@ static bool cgroup_tree_create(struct cgroup_ops *ops, struct lxc_conf *conf,
                  */
                 if (string_in_list(h->controllers, "devices") &&
                     !ops->setup_limits_legacy(ops, conf, true))
-                       return log_error(false, "Failed to setup legacy device limits");
-
-               limit_path = make_cgroup_path(h, h->at_base, cgroup_limit_dir, NULL);
-               path = must_make_path(limit_path, cgroup_leaf, NULL);
+                       return log_warn(false, "Failed to setup legacy device limits");
  
                 /*
                  * If we use a separate limit cgroup, the leaf cgroup, i.e. the
                  * cgroup the container actually resides in, is below fd_limit.
                  */
-               fd_final = __cgroup_tree_create(fd_limit, cgroup_leaf, 0755, cpuset_v1, false);
+               fd_final = __cgroup_tree_create(h->dfd_lim, cgroup_leaf, 0755, cpuset_v1, false);
                 if (fd_final < 0) {
                         /* Ensure we don't leave any garbage behind. */
                         if (cgroup_tree_prune(h->dfd_base, cgroup_limit_dir))
                                 SYSWARN("Failed to destroy %d(%s)", h->dfd_base, cgroup_limit_dir);
                         else
                                 TRACE("Removed cgroup tree %d(%s)", h->dfd_base, cgroup_limit_dir);
+                       return syswarn_ret(false, "Failed to create %s cgroup %d(%s)", payload ? "payload" : "monitor", h->dfd_base, cgroup_limit_dir);
                 }
-       } else {
-               path = make_cgroup_path(h, h->at_base, cgroup_limit_dir, NULL);
+               h->dfd_con = move_fd(fd_final);
+               h->path_con = must_make_path(h->path_lim, cgroup_leaf, NULL);
  
+       } else {
                 fd_final = __cgroup_tree_create(h->dfd_base, cgroup_limit_dir, 0755, cpuset_v1, false);
-       }
-       if (fd_final < 0)
-               return syserrno(false, "Failed to create %s cgroup %d(%s)", payload ? "payload" : "monitor", h->dfd_base, cgroup_limit_dir);
+               if (fd_final < 0)
+                       return syswarn_ret(false, "Failed to create %s cgroup %d(%s)", payload ? "payload" : "monitor", h->dfd_base, cgroup_limit_dir);
  
-       if (payload) {
-               h->dfd_con = move_fd(fd_final);
-               h->path_con = move_ptr(path);
-
-               if (fd_limit < 0)
+               if (payload) {
+                       h->dfd_con = move_fd(fd_final);
                         h->dfd_lim = h->dfd_con;
-               else
-                       h->dfd_lim = move_fd(fd_limit);
+                       h->path_con = make_cgroup_path(h, h->at_base, cgroup_limit_dir, NULL);
  
-               if (limit_path)
-                       h->path_lim = move_ptr(limit_path);
-               else
                         h->path_lim = h->path_con;
-       } else {
-               h->dfd_mon = move_fd(fd_final);
+               } else {
+                       h->dfd_mon = move_fd(fd_final);
+               }
         }
  
         return true;
@@ -1339,7 +1333,7 @@ static int chown_cgroup_wrapper(void *data)
                 int dirfd = arg->hierarchies[i]->dfd_con;
  
                 if (dirfd < 0)
-                       return syserrno_set(-EBADF, "Invalid cgroup file descriptor");
+                       return syserror_set(-EBADF, "Invalid cgroup file descriptor");
  
                 (void)fchowmodat(dirfd, "", destuid, nsgid, 0775);
  
@@ -1695,8 +1689,8 @@ __cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops,
                 dfd_mnt_unified = open_at(rootfs->dfd_mnt, DEFAULT_CGROUP_MOUNTPOINT_RELATIVE,
                                           PROTECT_OPATH_DIRECTORY, PROTECT_LOOKUP_BENEATH_XDEV, 0);
                 if (dfd_mnt_unified < 0)
-                       return syserrno(-errno, "Failed to open %d(%s)", rootfs->dfd_mnt,
-                                       DEFAULT_CGROUP_MOUNTPOINT_RELATIVE);
+                       return syserror_ret(false, "Failed to open %d(%s)",
+                                           rootfs->dfd_mnt, DEFAULT_CGROUP_MOUNTPOINT_RELATIVE);
                 /*
                  * If cgroup namespaces are supported but the container will
                  * not have CAP_SYS_ADMIN after it has started we need to mount
@@ -1729,7 +1723,7 @@ __cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops,
                          */
                         ret = cgroupfs_mount(cgroup_automount_type, ops->unified, rootfs, dfd_mnt_unified, "");
                         if (ret < 0)
-                               return syserrno(false, "Failed to force mount cgroup filesystem in cgroup namespace");
+                               return syserror_ret(false, "Failed to force mount cgroup filesystem in cgroup namespace");
  
                         return log_trace(true, "Force mounted cgroup filesystem in new cgroup namespace");
                 } else {
@@ -1760,7 +1754,7 @@ __cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops,
                         }
                 }
  
-               return syserrno(false, "Failed to mount cgroups");
+               return syserror_ret(false, "Failed to mount cgroups");
         }
  
         /*
@@ -1798,8 +1792,8 @@ __cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops,
         dfd_mnt_tmpfs = open_at(rootfs->dfd_mnt, DEFAULT_CGROUP_MOUNTPOINT_RELATIVE,
                                 PROTECT_OPATH_DIRECTORY, PROTECT_LOOKUP_BENEATH_XDEV, 0);
         if (dfd_mnt_tmpfs < 0)
-               return syserrno(-errno, "Failed to open %d(%s)", rootfs->dfd_mnt,
-                               DEFAULT_CGROUP_MOUNTPOINT_RELATIVE);
+               return syserror_ret(false, "Failed to open %d(%s)",
+                                   rootfs->dfd_mnt, DEFAULT_CGROUP_MOUNTPOINT_RELATIVE);
  
         for (int i = 0; ops->hierarchies[i]; i++) {
                 __do_free char *hierarchy_mnt = NULL, *path2 = NULL;
@@ -1807,7 +1801,7 @@ __cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops,
  
                 ret = mkdirat(dfd_mnt_tmpfs, h->at_mnt, 0000);
                 if (ret < 0)
-                       return syserrno(false, "Failed to create cgroup at_mnt %d(%s)", dfd_mnt_tmpfs, h->at_mnt);
+                       return syserror_ret(false, "Failed to create cgroup at_mnt %d(%s)", dfd_mnt_tmpfs, h->at_mnt);
  
                 if (in_cgroup_ns && wants_force_mount) {
                         /*
@@ -1910,7 +1904,7 @@ __cgfsng_ops static bool cgfsng_criu_get_hierarchies(struct cgroup_ops *ops,
         if (!ops->hierarchies)
                 return ret_set_errno(false, ENOENT);
  
-       /* sanity check n */
+       /* consistency check n */
         for (i = 0; i < n; i++)
                 if (!ops->hierarchies[i])
                         return ret_set_errno(false, ENOENT);
@@ -1933,7 +1927,7 @@ static int cg_legacy_freeze(struct cgroup_ops *ops)
  }
  
  static int freezer_cgroup_events_cb(int fd, uint32_t events, void *cbdata,
-                                   struct lxc_epoll_descr *descr)
+                                   struct lxc_async_descr *descr)
  {
         __do_free char *line = NULL;
         __do_fclose FILE *f = NULL;
@@ -1966,9 +1960,9 @@ static int cg_unified_freeze_do(struct cgroup_ops *ops, int timeout,
                                 const char *wait_error)
  {
         __do_close int fd = -EBADF;
-       call_cleaner(lxc_mainloop_close) struct lxc_epoll_descr *descr_ptr = NULL;
+       call_cleaner(lxc_mainloop_close) struct lxc_async_descr *descr_ptr = NULL;
         int ret;
-       struct lxc_epoll_descr descr;
+       struct lxc_async_descr descr;
         struct hierarchy *h;
  
         h = ops->unified;
@@ -1993,7 +1987,11 @@ static int cg_unified_freeze_do(struct cgroup_ops *ops, int timeout,
                 /* automatically cleaned up now */
                 descr_ptr = &descr;
  
-               ret = lxc_mainloop_add_handler_events(&descr, fd, EPOLLPRI, freezer_cgroup_events_cb, INT_TO_PTR(state_num));
+               ret = lxc_mainloop_add_handler_events(&descr, fd, EPOLLPRI,
+                                                     freezer_cgroup_events_cb,
+                                                     default_cleanup_handler,
+                                                     INT_TO_PTR(state_num),
+                                                     "freezer_cgroup_events_cb");
                 if (ret < 0)
                         return log_error_errno(-1, errno, "Failed to add cgroup.events fd handler to mainloop");
         }
@@ -2211,16 +2209,13 @@ static int cgroup_attach_move_into_leaf(const struct lxc_conf *conf,
                                         int *sk_fd, pid_t pid)
  {
         __do_close int sk = *sk_fd, target_fd0 = -EBADF, target_fd1 = -EBADF;
-       int target_fds[2];
         char pidstr[INTTYPE_TO_STRLEN(int64_t) + 1];
         size_t pidstr_len;
         ssize_t ret;
  
-       ret = lxc_abstract_unix_recv_two_fds(sk, target_fds);
+       ret = lxc_abstract_unix_recv_two_fds(sk, &target_fd0, &target_fd1);
         if (ret < 0)
                 return log_error_errno(-1, errno, "Failed to receive target cgroup fd");
-       target_fd0 = target_fds[0];
-       target_fd1 = target_fds[1];
  
         pidstr_len = sprintf(pidstr, INT64_FMT, (int64_t)pid);
  
@@ -2293,7 +2288,7 @@ static int __cg_unified_attach(const struct hierarchy *h,
         ret = cgroup_attach(conf, name, lxcpath, pid);
         if (ret == 0)
                 return log_trace(0, "Attached to unified cgroup via command handler");
-       if (ret != -ENOCGROUP2)
+       if (!ERRNO_IS_NOT_SUPPORTED(ret) && ret != -ENOCGROUP2)
                 return log_error_errno(ret, errno, "Failed to attach to unified cgroup");
  
         /* Fall back to retrieving the path for the unified cgroup. */
@@ -2363,9 +2358,17 @@ __cgfsng_ops static bool cgfsng_attach(struct cgroup_ops *ops,
                 }
  
                 path = lxc_cmd_get_cgroup_path(name, lxcpath, h->controllers[0]);
-               /* not running */
-               if (!path)
-                       return false;
+               if (!path) {
+                       /*
+                        * Someone might have created a name=<controller>
+                        * controller after the container has started and so
+                        * the container doesn't make use of this controller.
+                        *
+                        * Link: https://github.com/lxc/lxd/issues/8577
+                        */
+                       TRACE("Skipping unused %s controller", maybe_empty(h->controllers[0]));
+                       continue;
+               }
  
                 fullpath = build_full_cgpath_from_monitorpath(h, path, "cgroup.procs");
                 ret = lxc_write_to_file(fullpath, pidstr, len, false, 0666);
@@ -2787,7 +2790,7 @@ static int bpf_device_cgroup_prepare(struct cgroup_ops *ops,
         else
                 ret = device_cgroup_rule_parse(&device_item, key, val);
         if (ret < 0)
-               return syserrno_set(EINVAL, "Failed to parse device rule %s=%s", key, val);
+               return syserror_set(EINVAL, "Failed to parse device rule %s=%s", key, val);
  
         /*
          * Note that bpf_list_add_device() returns 1 if it altered the device
@@ -2930,20 +2933,20 @@ static bool __cgfsng_delegate_controllers(struct cgroup_ops *ops, const char *cg
                  * absolute nor walks upwards.
                  */
                 if (abspath(cur))
-                       return syserrno_set(-EINVAL, "No absolute paths allowed");
+                       return syserror_set(-EINVAL, "No absolute paths allowed");
  
                 if (strnequal(cur, "..", STRLITERALLEN("..")))
-                       return syserrno_set(-EINVAL, "No upward walking paths allowed");
+                       return syserror_set(-EINVAL, "No upward walking paths allowed");
  
                 ret = lxc_writeat(dfd_cur, "cgroup.subtree_control", add_controllers, full_len);
                 if (ret < 0)
-                       return syserrno(-errno, "Could not enable \"%s\" controllers in the unified cgroup %d", add_controllers, dfd_cur);
+                       return syserror("Could not enable \"%s\" controllers in the unified cgroup %d", add_controllers, dfd_cur);
  
                 TRACE("Enabled \"%s\" controllers in the unified cgroup %d", add_controllers, dfd_cur);
  
                 dfd_final = open_at(dfd_cur, cur, PROTECT_OPATH_DIRECTORY, PROTECT_LOOKUP_BENEATH, 0);
                 if (dfd_final < 0)
-                       return syserrno(-errno, "Fail to open directory %d(%s)", dfd_cur, cur);
+                       return syserror("Fail to open directory %d(%s)", dfd_cur, cur);
                 if (dfd_cur != unified->dfd_base)
                         close(dfd_cur);
                 /*
@@ -3030,7 +3033,7 @@ static int __list_cgroup_delegate(char ***delegate)
                 }
  
                 *delegate = move_ptr(list);
-               return syswarn(0, "Failed to read /sys/kernel/cgroup/delegate");
+               return syswarn_ret(0, "Failed to read /sys/kernel/cgroup/delegate");
         }
  
         lxc_iterate_parts(token, buf, " \t\n") {
@@ -3057,13 +3060,13 @@ static bool unified_hierarchy_delegated(int dfd_base, char ***ret_files)
  
         ret = __list_cgroup_delegate(&list);
         if (ret < 0)
-               return syserrno(ret, "Failed to determine unified cgroup delegation requirements");
+               return syserror_ret(ret, "Failed to determine unified cgroup delegation requirements");
  
         for (char *const *s = list; s && *s; s++) {
                 if (!faccessat(dfd_base, *s, W_OK, 0) || errno == ENOENT)
                         continue;
  
-               return sysinfo(false, "The %s file is not writable, skipping unified hierarchy", *s);
+               return sysinfo_ret(false, "The %s file is not writable, skipping unified hierarchy", *s);
         }
  
         *ret_files = move_ptr(list);
@@ -3072,12 +3075,64 @@ static bool unified_hierarchy_delegated(int dfd_base, char ***ret_files)
  
  static bool legacy_hierarchy_delegated(int dfd_base)
  {
-       if (faccessat(dfd_base, "cgroup.procs", W_OK, 0) && errno != ENOENT)
-               return sysinfo(false, "The cgroup.procs file is not writable, skipping legacy hierarchy");
+       int ret;
+
+       ret = faccessat(dfd_base, ".", W_OK, 0);
+       if (ret < 0 && errno != ENOENT)
+               return sysinfo_ret(false, "Legacy hierarchy not writable, skipping");
  
         return true;
  }
  
+/**
+ * systemd guarantees that the order of co-mounted controllers is stable. On
+ * some systems the order of the controllers might be reversed though.
+ *
+ * For example, this is how the order is mismatched on CentOS 7:
+ *
+ *      [root@localhost ~]# cat /proc/self/cgroup
+ *      11:perf_event:/
+ *      10:pids:/
+ *      9:freezer:/
+ * >>>> 8:cpuacct,cpu:/
+ *      7:memory:/
+ *      6:blkio:/
+ *      5:devices:/
+ *      4:hugetlb:/
+ * >>>> 3:net_prio,net_cls:/
+ *      2:cpuset:/
+ *      1:name=systemd:/user.slice/user-0.slice/session-c1.scope
+ *
+ * whereas the mountpoint:
+ *
+ *      | |-/sys/fs/cgroup                    tmpfs         tmpfs      ro,nosuid,nodev,noexec,mode=755
+ *      | | |-/sys/fs/cgroup/systemd          cgroup        cgroup     rw,nosuid,nodev,noexec,relatime,xattr,release_agent=/usr/lib/systemd/systemd-cgroups-agent,name=systemd
+ *      | | |-/sys/fs/cgroup/cpuset           cgroup        cgroup     rw,nosuid,nodev,noexec,relatime,cpuset
+ * >>>> | | |-/sys/fs/cgroup/net_cls,net_prio cgroup        cgroup     rw,nosuid,nodev,noexec,relatime,net_prio,net_cls
+ *      | | |-/sys/fs/cgroup/hugetlb          cgroup        cgroup     rw,nosuid,nodev,noexec,relatime,hugetlb
+ *      | | |-/sys/fs/cgroup/devices          cgroup        cgroup     rw,nosuid,nodev,noexec,relatime,devices
+ *      | | |-/sys/fs/cgroup/blkio            cgroup        cgroup     rw,nosuid,nodev,noexec,relatime,blkio
+ *      | | |-/sys/fs/cgroup/memory           cgroup        cgroup     rw,nosuid,nodev,noexec,relatime,memory
+ * >>>> | | |-/sys/fs/cgroup/cpu,cpuacct      cgroup        cgroup     rw,nosuid,nodev,noexec,relatime,cpuacct,cpu
+ *      | | |-/sys/fs/cgroup/freezer          cgroup        cgroup     rw,nosuid,nodev,noexec,relatime,freezer
+ *      | | |-/sys/fs/cgroup/pids             cgroup        cgroup     rw,nosuid,nodev,noexec,relatime,pids
+ *      | | `-/sys/fs/cgroup/perf_event       cgroup        cgroup     rw,nosuid,nodev,noexec,relatime,perf_event
+ *
+ * Ensure that we always use the systemd-guaranteed stable order when checking
+ * for the mountpoint.
+ */
+__attribute__((returns_nonnull)) __attribute__((nonnull))
+static const char *stable_order(const char *controllers)
+{
+       if (strequal(controllers, "cpuacct,cpu"))
+               return "cpu,cpuacct";
+
+       if (strequal(controllers, "net_prio,net_cls"))
+               return "net_cls,net_prio";
+
+       return unprefix(controllers);
+}
+
  static int __initialize_cgroups(struct cgroup_ops *ops, bool relative,
                                 bool unprivileged)
  {
@@ -3126,7 +3181,7 @@ static int __initialize_cgroups(struct cgroup_ops *ops, bool relative,
                         }
                         if (dfd_mnt < 0) {
                                 if (errno != ENOENT)
-                                       return syserrno(-errno, "Failed to open %d/unified", ops->dfd_mnt);
+                                       return syserror("Failed to open %d/unified", ops->dfd_mnt);
  
                                 SYSTRACE("Unified cgroup not mounted");
                                 continue;
@@ -3137,8 +3192,15 @@ static int __initialize_cgroups(struct cgroup_ops *ops, bool relative,
                                 dfd_base = open_at(dfd_mnt, current_cgroup,
                                                    PROTECT_OPATH_DIRECTORY,
                                                    PROTECT_LOOKUP_BENEATH_XDEV, 0);
-                               if (dfd_base < 0)
-                                       return syserrno(-errno, "Failed to open %d/%s", dfd_mnt, current_cgroup);
+                               if (dfd_base < 0) {
+                                       if (errno != ENOENT)
+                                               return syserror("Failed to open %d/%s",
+                                                               dfd_mnt, current_cgroup);
+
+                                       SYSTRACE("Current cgroup %d/%s does not exist (funky cgroup layout?)",
+                                                dfd_mnt, current_cgroup);
+                                       continue;
+                               }
                                 dfd = dfd_base;
                         }
  
@@ -3150,7 +3212,7 @@ static int __initialize_cgroups(struct cgroup_ops *ops, bool relative,
                                 TRACE("No controllers are enabled for delegation in the unified hierarchy");
                                 controller_list = list_new();
                                 if (!controller_list)
-                                       return syserrno(-ENOMEM, "Failed to create empty controller list");
+                                       return syserror_set(-ENOMEM, "Failed to create empty controller list");
                         }
  
                         controllers = strdup(unified_mnt);
@@ -3172,16 +3234,17 @@ static int __initialize_cgroups(struct cgroup_ops *ops, bool relative,
                         *__current_cgroup = '\0';
                         __current_cgroup++;
  
-                       controllers = strdup(unprefix(__controllers));
+                       controllers = strdup(stable_order(__controllers));
                         if (!controllers)
                                 return ret_errno(ENOMEM);
  
                         dfd_mnt = open_at(ops->dfd_mnt,
-                                         controllers, PROTECT_OPATH_DIRECTORY,
+                                         controllers,
+                                         PROTECT_OPATH_DIRECTORY,
                                           PROTECT_LOOKUP_ABSOLUTE_XDEV, 0);
                         if (dfd_mnt < 0) {
                                 if (errno != ENOENT)
-                                       return syserrno(-errno, "Failed to open %d/%s",
+                                       return syserror("Failed to open %d/%s",
                                                         ops->dfd_mnt, controllers);
  
                                 SYSTRACE("%s not mounted", controllers);
@@ -3207,9 +3270,15 @@ static int __initialize_cgroups(struct cgroup_ops *ops, bool relative,
                                 dfd_base = open_at(dfd_mnt, current_cgroup,
                                                    PROTECT_OPATH_DIRECTORY,
                                                    PROTECT_LOOKUP_BENEATH_XDEV, 0);
-                               if (dfd_base < 0)
-                                       return syserrno(-errno, "Failed to open %d/%s",
-                                                       dfd_mnt, current_cgroup);
+                               if (dfd_base < 0) {
+                                       if (errno != ENOENT)
+                                               return syserror("Failed to open %d/%s",
+                                                               dfd_mnt, current_cgroup);
+
+                                       SYSTRACE("Current cgroup %d/%s does not exist (funky cgroup layout?)",
+                                                dfd_mnt, current_cgroup);
+                                       continue;
+                               }
                                 dfd = dfd_base;
                         }
  
@@ -3223,7 +3292,7 @@ static int __initialize_cgroups(struct cgroup_ops *ops, bool relative,
                          */
                         controller_list = list_add_controllers(__controllers);
                         if (!controller_list)
-                               return syserrno(-ENOMEM, "Failed to create controller list from %s", __controllers);
+                               return syserror_set(-ENOMEM, "Failed to create controller list from %s", __controllers);
  
                         if (skip_hierarchy(ops, controller_list))
                                 continue;
@@ -3234,7 +3303,7 @@ static int __initialize_cgroups(struct cgroup_ops *ops, bool relative,
                 ret = cgroup_hierarchy_add(ops, dfd_mnt, controllers, dfd,
                                            current_cgroup, controller_list, type);
                 if (ret < 0)
-                       return syserrno(ret, "Failed to add %s hierarchy", controllers);
+                       return syserror_ret(ret, "Failed to add %s hierarchy", controllers);
  
                 /* Transfer ownership. */
                 move_fd(dfd_mnt);
@@ -3258,7 +3327,7 @@ static int __initialize_cgroups(struct cgroup_ops *ops, bool relative,
         }
  
         if (!controllers_available(ops))
-               return syserrno_set(-ENOENT, "One or more requested controllers unavailable or not delegated");
+               return syserror_set(-ENOENT, "One or more requested controllers unavailable or not delegated");
  
         return 0;
  }
@@ -3280,7 +3349,7 @@ static int initialize_cgroups(struct cgroup_ops *ops, struct lxc_conf *conf)
         dfd = open_at(-EBADF, DEFAULT_CGROUP_MOUNTPOINT,
                         PROTECT_OPATH_DIRECTORY, PROTECT_LOOKUP_ABSOLUTE_XDEV, 0);
         if (dfd < 0)
-               return syserrno(-errno, "Failed to open " DEFAULT_CGROUP_MOUNTPOINT);
+               return syserror("Failed to open " DEFAULT_CGROUP_MOUNTPOINT);
  
         controllers_use = lxc_global_config_value("lxc.cgroup.use");
         if (controllers_use) {
@@ -3307,7 +3376,7 @@ static int initialize_cgroups(struct cgroup_ops *ops, struct lxc_conf *conf)
  
         ret = __initialize_cgroups(ops, conf->cgroup_meta.relative, !lxc_list_empty(&conf->id_map));
         if (ret < 0)
-               return syserrno(ret, "Failed to initialize cgroups");
+               return syserror_ret(ret, "Failed to initialize cgroups");
  
         /* Transfer ownership to cgroup_ops. */
         move_fd(dfd);
@@ -3334,14 +3403,14 @@ __cgfsng_ops static int cgfsng_data_init(struct cgroup_ops *ops)
  
  struct cgroup_ops *cgroup_ops_init(struct lxc_conf *conf)
  {
-       __do_free struct cgroup_ops *cgfsng_ops = NULL;
+       __cleanup_cgroup_ops struct cgroup_ops *cgfsng_ops = NULL;
  
         cgfsng_ops = zalloc(sizeof(struct cgroup_ops));
         if (!cgfsng_ops)
                 return ret_set_errno(NULL, ENOMEM);
  
-       cgfsng_ops->cgroup_layout = CGROUP_LAYOUT_UNKNOWN;
-       cgfsng_ops->dfd_mnt = -EBADF;
+       cgfsng_ops->cgroup_layout       = CGROUP_LAYOUT_UNKNOWN;
+       cgfsng_ops->dfd_mnt             = -EBADF;
  
         if (initialize_cgroups(cgfsng_ops, conf))
                 return NULL;
@@ -3430,13 +3499,13 @@ static int __cgroup_attach_many(const struct lxc_conf *conf, const char *name,
                 else
                         ret = lxc_writeat(dfd_con, "cgroup.procs", pidstr, pidstr_len);
                 if (ret)
-                       return syserrno(ret, "Failed to attach to cgroup fd %d", dfd_con);
+                       return syserror_ret(ret, "Failed to attach to cgroup fd %d", dfd_con);
                 else
                         TRACE("Attached to cgroup fd %d", dfd_con);
         }
  
         if (idx == 0)
-               return syserrno_set(-ENOENT, "Failed to attach to cgroups");
+               return syserror_set(-ENOENT, "Failed to attach to cgroups");
  
         TRACE("Attached to %s cgroup layout", cgroup_layout_name(ctx->layout));
         return 0;
@@ -3588,9 +3657,9 @@ static int do_cgroup_freeze(int unified_fd,
                             const char *wait_error)
  {
         __do_close int events_fd = -EBADF;
-       call_cleaner(lxc_mainloop_close) struct lxc_epoll_descr *descr_ptr = NULL;
+       call_cleaner(lxc_mainloop_close) struct lxc_async_descr *descr_ptr = NULL;
         int ret;
-       struct lxc_epoll_descr descr = {};
+       struct lxc_async_descr descr = {};
  
         if (timeout != 0) {
                 ret = lxc_mainloop_open(&descr);
@@ -3604,7 +3673,11 @@ static int do_cgroup_freeze(int unified_fd,
                 if (events_fd < 0)
                         return log_error_errno(-errno, errno, "Failed to open cgroup.events file");
  
-               ret = lxc_mainloop_add_handler_events(&descr, events_fd, EPOLLPRI, freezer_cgroup_events_cb, INT_TO_PTR(state_num));
+               ret = lxc_mainloop_add_handler_events(&descr, events_fd, EPOLLPRI,
+                                                     freezer_cgroup_events_cb,
+                                                     default_cleanup_handler,
+                                                     INT_TO_PTR(state_num),
+                                                     "freezer_cgroup_events_cb");
                 if (ret < 0)
                         return log_error_errno(-1, errno, "Failed to add cgroup.events fd handler to mainloop");
         }