#include "commands_utils.h"
#include "conf.h"
#include "config.h"
+#include "error_utils.h"
#include "log.h"
#include "macro.h"
#include "mainloop.h"
/* Given a handler's cgroup data, return the struct hierarchy for the controller
* @c, or NULL if there is none.
*/
-static struct hierarchy *get_hierarchy(struct cgroup_ops *ops, const char *controller)
+static struct hierarchy *get_hierarchy(const struct cgroup_ops *ops, const char *controller)
{
if (!ops->hierarchies)
return log_trace_errno(NULL, errno, "There are no useable cgroup controllers");
return ret_set_errno(NULL, ENOENT);
}
+int prepare_cgroup_fd(const struct cgroup_ops *ops, struct cgroup_fd *fd, bool limit)
+{
+ int dfd;
+ const struct hierarchy *h;
+
+ h = get_hierarchy(ops, fd->controller);
+ if (!h)
+ return ret_errno(ENOENT);
+
+ /*
+ * The client requested that the controller must be in a specific
+ * cgroup version.
+ */
+ if (fd->type != 0 && fd->type != h->fs_type)
+ return ret_errno(EINVAL);
+
+ if (limit)
+ dfd = h->dfd_con;
+ else
+ dfd = h->dfd_lim;
+ if (dfd < 0)
+ return ret_errno(EBADF);
+
+ fd->layout = ops->cgroup_layout;
+ fd->type = h->fs_type;
+ if (fd->type == UNIFIED_HIERARCHY)
+ fd->utilities = h->utilities;
+ fd->fd = dfd;
+
+ return 0;
+}
+
/* Taken over modified from the kernel sources. */
#define NBITS 32 /* bits in uint32_t */
#define DIV_ROUND_UP(n, d) (((n) + (d)-1) / (d))
__do_free_string_list char **list = NULL;
char *it;
- lxc_iterate_parts(it, controllers, " \t\n") {
+ lxc_iterate_parts(it, controllers, ", \t\n") {
int ret;
ret = list_add_string(&list, it);
int idx;
if (abspath(base_cgroup))
- return syserrno_set(-EINVAL, "Container base path must be relative to controller mount");
+ return syserror_set(-EINVAL, "Container base path must be relative to controller mount");
new = zalloc(sizeof(*new));
if (!new)
*/
bytes = lxc_readat(dfd_base, "cgroup.clone_children", &v, 1);
if (bytes < 0)
- return syserrno(false, "Failed to read file %d(cgroup.clone_children)", dfd_base);
+ return syserror_ret(false, "Failed to read file %d(cgroup.clone_children)", dfd_base);
/*
* Initialize cpuset.cpus and make remove any isolated
* and offline cpus.
*/
if (!cpuset1_cpus_initialize(dfd_base, dfd_next, v == '1'))
- return syserrno(false, "Failed to initialize cpuset.cpus");
+ return syserror_ret(false, "Failed to initialize cpuset.cpus");
/* Read cpuset.mems from parent... */
bytes = lxc_readat(dfd_base, "cpuset.mems", mems, sizeof(mems));
if (bytes < 0)
- return syserrno(false, "Failed to read file %d(cpuset.mems)", dfd_base);
+ return syserror_ret(false, "Failed to read file %d(cpuset.mems)", dfd_base);
/* ... and copy to first cgroup in the tree... */
bytes = lxc_writeat(dfd_next, "cpuset.mems", mems, bytes);
if (bytes < 0)
- return syserrno(false, "Failed to write %d(cpuset.mems)", dfd_next);
+ return syserror_ret(false, "Failed to write %d(cpuset.mems)", dfd_next);
/* ... and finally turn on cpuset inheritance. */
bytes = lxc_writeat(dfd_next, "cgroup.clone_children", "1", 1);
if (bytes < 0)
- return syserrno(false, "Failed to write %d(cgroup.clone_children)", dfd_next);
+ return syserror_ret(false, "Failed to write %d(cgroup.clone_children)", dfd_next);
return log_trace(true, "Initialized cpuset in the legacy hierarchy");
}
* absolute nor walks upwards.
*/
if (abspath(cur))
- return syserrno_set(-EINVAL, "No absolute paths allowed");
+ return syserror_set(-EINVAL, "No absolute paths allowed");
if (strnequal(cur, "..", STRLITERALLEN("..")))
- return syserrno_set(-EINVAL, "No upward walking paths allowed");
+ return syserror_set(-EINVAL, "No upward walking paths allowed");
ret = mkdirat(dfd_cur, cur, mode);
if (ret < 0) {
if (errno != EEXIST)
- return syserrno(-errno, "Failed to create %d(%s)", dfd_cur, cur);
+ return syserror("Failed to create %d(%s)", dfd_cur, cur);
ret = -EEXIST;
}
dfd_final = open_at(dfd_cur, cur, PROTECT_OPATH_DIRECTORY, PROTECT_LOOKUP_BENEATH, 0);
if (dfd_final < 0)
- return syserrno(-errno, "Fail to open%s directory %d(%s)",
+ return syserror("Fail to open%s directory %d(%s)",
!ret ? " newly created" : "", dfd_base, cur);
if (dfd_cur != dfd_base)
close(dfd_cur);
else if (cpuset_v1 && !cpuset1_initialize(dfd_base, dfd_final))
- return syserrno(-EINVAL, "Failed to initialize cpuset controller in the legacy hierarchy");
+ return syserror_set(-EINVAL, "Failed to initialize cpuset controller in the legacy hierarchy");
/*
* Leave dfd_final pointing to the last fd we opened so
* it will be automatically zapped if we return early.
/* The final cgroup must be succesfully creatd by us. */
if (ret) {
if (ret != -EEXIST || !eexist_ignore)
- return syserrno_set(ret, "Creating the final cgroup %d(%s) failed", dfd_base, path);
+ return syswarn_set(ret, "Creating the final cgroup %d(%s) failed", dfd_base, path);
}
return move_fd(dfd_final);
const char *cgroup_leaf, bool payload)
{
__do_close int fd_limit = -EBADF, fd_final = -EBADF;
- __do_free char *path = NULL, *limit_path = NULL;
bool cpuset_v1 = false;
/*
/* With isolation both parts need to not already exist. */
fd_limit = __cgroup_tree_create(h->dfd_base, cgroup_limit_dir, 0755, cpuset_v1, false);
if (fd_limit < 0)
- return syserrno(false, "Failed to create limiting cgroup %d(%s)", h->dfd_base, cgroup_limit_dir);
+ return syswarn_ret(false, "Failed to create limiting cgroup %d(%s)", h->dfd_base, cgroup_limit_dir);
+
+ h->path_lim = make_cgroup_path(h, h->at_base, cgroup_limit_dir, NULL);
+ h->dfd_lim = move_fd(fd_limit);
TRACE("Created limit cgroup %d->%d(%s)",
- fd_limit, h->dfd_base, cgroup_limit_dir);
+ h->dfd_lim, h->dfd_base, cgroup_limit_dir);
/*
* With isolation the devices legacy cgroup needs to be
*/
if (string_in_list(h->controllers, "devices") &&
!ops->setup_limits_legacy(ops, conf, true))
- return log_error(false, "Failed to setup legacy device limits");
-
- limit_path = make_cgroup_path(h, h->at_base, cgroup_limit_dir, NULL);
- path = must_make_path(limit_path, cgroup_leaf, NULL);
+ return log_warn(false, "Failed to setup legacy device limits");
/*
* If we use a separate limit cgroup, the leaf cgroup, i.e. the
* cgroup the container actually resides in, is below fd_limit.
*/
- fd_final = __cgroup_tree_create(fd_limit, cgroup_leaf, 0755, cpuset_v1, false);
+ fd_final = __cgroup_tree_create(h->dfd_lim, cgroup_leaf, 0755, cpuset_v1, false);
if (fd_final < 0) {
/* Ensure we don't leave any garbage behind. */
if (cgroup_tree_prune(h->dfd_base, cgroup_limit_dir))
SYSWARN("Failed to destroy %d(%s)", h->dfd_base, cgroup_limit_dir);
else
TRACE("Removed cgroup tree %d(%s)", h->dfd_base, cgroup_limit_dir);
+ return syswarn_ret(false, "Failed to create %s cgroup %d(%s)", payload ? "payload" : "monitor", h->dfd_base, cgroup_limit_dir);
}
- } else {
- path = make_cgroup_path(h, h->at_base, cgroup_limit_dir, NULL);
+ h->dfd_con = move_fd(fd_final);
+ h->path_con = must_make_path(h->path_lim, cgroup_leaf, NULL);
+ } else {
fd_final = __cgroup_tree_create(h->dfd_base, cgroup_limit_dir, 0755, cpuset_v1, false);
- }
- if (fd_final < 0)
- return syserrno(false, "Failed to create %s cgroup %d(%s)", payload ? "payload" : "monitor", h->dfd_base, cgroup_limit_dir);
-
- if (payload) {
- h->dfd_con = move_fd(fd_final);
- h->path_con = move_ptr(path);
+ if (fd_final < 0)
+ return syswarn_ret(false, "Failed to create %s cgroup %d(%s)", payload ? "payload" : "monitor", h->dfd_base, cgroup_limit_dir);
- if (fd_limit < 0)
+ if (payload) {
+ h->dfd_con = move_fd(fd_final);
h->dfd_lim = h->dfd_con;
- else
- h->dfd_lim = move_fd(fd_limit);
+ h->path_con = make_cgroup_path(h, h->at_base, cgroup_limit_dir, NULL);
- if (limit_path)
- h->path_lim = move_ptr(limit_path);
- else
h->path_lim = h->path_con;
- } else {
- h->dfd_mon = move_fd(fd_final);
+ } else {
+ h->dfd_mon = move_fd(fd_final);
+ }
}
return true;
for (int i = 0; arg->hierarchies[i]; i++) {
int dirfd = arg->hierarchies[i]->dfd_con;
+ if (dirfd < 0)
+ return syserror_set(-EBADF, "Invalid cgroup file descriptor");
+
(void)fchowmodat(dirfd, "", destuid, nsgid, 0775);
/*
return true;
}
-__cgfsng_ops static void cgfsng_payload_finalize(struct cgroup_ops *ops)
+__cgfsng_ops static void cgfsng_finalize(struct cgroup_ops *ops)
{
if (!ops)
return;
for (int i = 0; ops->hierarchies[i]; i++) {
struct hierarchy *h = ops->hierarchies[i];
- /*
- * we don't keep the fds for non-unified hierarchies around
- * mainly because we don't make use of them anymore after the
- * core cgroup setup is done but also because there are quite a
- * lot of them.
- */
- if (!is_unified_hierarchy(h))
- close_prot_errno_disarm(h->dfd_con);
+
+ /* Close all monitor cgroup file descriptors. */
+ close_prot_errno_disarm(h->dfd_mon);
}
+ /* Close the cgroup root file descriptor. */
+ close_prot_errno_disarm(ops->dfd_mnt);
/*
* The checking for freezer support should obviously be done at cgroup
dfd_mnt_unified = open_at(rootfs->dfd_mnt, DEFAULT_CGROUP_MOUNTPOINT_RELATIVE,
PROTECT_OPATH_DIRECTORY, PROTECT_LOOKUP_BENEATH_XDEV, 0);
if (dfd_mnt_unified < 0)
- return syserrno(-errno, "Failed to open %d(%s)", rootfs->dfd_mnt,
- DEFAULT_CGROUP_MOUNTPOINT_RELATIVE);
+ return syserror_ret(false, "Failed to open %d(%s)",
+ rootfs->dfd_mnt, DEFAULT_CGROUP_MOUNTPOINT_RELATIVE);
/*
* If cgroup namespaces are supported but the container will
* not have CAP_SYS_ADMIN after it has started we need to mount
*/
ret = cgroupfs_mount(cgroup_automount_type, ops->unified, rootfs, dfd_mnt_unified, "");
if (ret < 0)
- return syserrno(false, "Failed to force mount cgroup filesystem in cgroup namespace");
+ return syserror_ret(false, "Failed to force mount cgroup filesystem in cgroup namespace");
return log_trace(true, "Force mounted cgroup filesystem in new cgroup namespace");
} else {
}
}
- return syserrno(false, "Failed to mount cgroups");
+ return syserror_ret(false, "Failed to mount cgroups");
}
/*
dfd_mnt_tmpfs = open_at(rootfs->dfd_mnt, DEFAULT_CGROUP_MOUNTPOINT_RELATIVE,
PROTECT_OPATH_DIRECTORY, PROTECT_LOOKUP_BENEATH_XDEV, 0);
if (dfd_mnt_tmpfs < 0)
- return syserrno(-errno, "Failed to open %d(%s)", rootfs->dfd_mnt,
- DEFAULT_CGROUP_MOUNTPOINT_RELATIVE);
+ return syserror_ret(false, "Failed to open %d(%s)",
+ rootfs->dfd_mnt, DEFAULT_CGROUP_MOUNTPOINT_RELATIVE);
for (int i = 0; ops->hierarchies[i]; i++) {
__do_free char *hierarchy_mnt = NULL, *path2 = NULL;
ret = mkdirat(dfd_mnt_tmpfs, h->at_mnt, 0000);
if (ret < 0)
- return syserrno(false, "Failed to create cgroup at_mnt %d(%s)", dfd_mnt_tmpfs, h->at_mnt);
+ return syserror_ret(false, "Failed to create cgroup at_mnt %d(%s)", dfd_mnt_tmpfs, h->at_mnt);
if (in_cgroup_ns && wants_force_mount) {
/*
if (!ops->hierarchies)
return ret_set_errno(false, ENOENT);
- /* sanity check n */
+ /* consistency check n */
for (i = 0; i < n; i++)
if (!ops->hierarchies[i])
return ret_set_errno(false, ENOENT);
}
static int freezer_cgroup_events_cb(int fd, uint32_t events, void *cbdata,
- struct lxc_epoll_descr *descr)
+ struct lxc_async_descr *descr)
{
__do_free char *line = NULL;
__do_fclose FILE *f = NULL;
const char *wait_error)
{
__do_close int fd = -EBADF;
- call_cleaner(lxc_mainloop_close) struct lxc_epoll_descr *descr_ptr = NULL;
+ call_cleaner(lxc_mainloop_close) struct lxc_async_descr *descr_ptr = NULL;
int ret;
- struct lxc_epoll_descr descr;
+ struct lxc_async_descr descr;
struct hierarchy *h;
h = ops->unified;
/* automatically cleaned up now */
descr_ptr = &descr;
- ret = lxc_mainloop_add_handler_events(&descr, fd, EPOLLPRI, freezer_cgroup_events_cb, INT_TO_PTR(state_num));
+ ret = lxc_mainloop_add_handler_events(&descr, fd, EPOLLPRI,
+ freezer_cgroup_events_cb,
+ default_cleanup_handler,
+ INT_TO_PTR(state_num),
+ "freezer_cgroup_events_cb");
if (ret < 0)
return log_error_errno(-1, errno, "Failed to add cgroup.events fd handler to mainloop");
}
return cgfsng_get_cgroup_do(ops, controller, false);
}
-__cgfsng_ops static const char *cgfsng_get_limiting_cgroup(struct cgroup_ops *ops,
- const char *controller)
+__cgfsng_ops static const char *cgfsng_get_limit_cgroup(struct cgroup_ops *ops,
+ const char *controller)
{
return cgfsng_get_cgroup_do(ops, controller, true);
}
int *sk_fd, pid_t pid)
{
__do_close int sk = *sk_fd, target_fd0 = -EBADF, target_fd1 = -EBADF;
- int target_fds[2];
char pidstr[INTTYPE_TO_STRLEN(int64_t) + 1];
size_t pidstr_len;
ssize_t ret;
- ret = lxc_abstract_unix_recv_fds(sk, target_fds, 2, NULL, 0);
- if (ret <= 0)
+ ret = lxc_abstract_unix_recv_two_fds(sk, &target_fd0, &target_fd1);
+ if (ret < 0)
return log_error_errno(-1, errno, "Failed to receive target cgroup fd");
- target_fd0 = target_fds[0];
- target_fd1 = target_fds[1];
pidstr_len = sprintf(pidstr, INT64_FMT, (int64_t)pid);
ret = cgroup_attach(conf, name, lxcpath, pid);
if (ret == 0)
return log_trace(0, "Attached to unified cgroup via command handler");
- if (ret != -ENOCGROUP2)
+ if (!ERRNO_IS_NOT_SUPPORTED(ret) && ret != -ENOCGROUP2)
return log_error_errno(ret, errno, "Failed to attach to unified cgroup");
/* Fall back to retrieving the path for the unified cgroup. */
}
path = lxc_cmd_get_cgroup_path(name, lxcpath, h->controllers[0]);
- /* not running */
- if (!path)
- return false;
+ if (!path) {
+ /*
+ * Someone might have created a name=<controller>
+ * controller after the container has started and so
+ * the container doesn't make use of this controller.
+ *
+ * Link: https://github.com/lxc/lxd/issues/8577
+ */
+ TRACE("Skipping unused %s controller", maybe_empty(h->controllers[0]));
+ continue;
+ }
fullpath = build_full_cgpath_from_monitorpath(h, path, "cgroup.procs");
ret = lxc_write_to_file(fullpath, pidstr, len, false, 0666);
if (p)
*p = '\0';
- path = lxc_cmd_get_limiting_cgroup_path(name, lxcpath, controller);
+ path = lxc_cmd_get_limit_cgroup_path(name, lxcpath, controller);
/* not running */
if (!path)
return -1;
return 0;
}
- path = lxc_cmd_get_limiting_cgroup_path(name, lxcpath, controller);
+ path = lxc_cmd_get_limit_cgroup_path(name, lxcpath, controller);
/* not running */
if (!path)
return -1;
else
ret = device_cgroup_rule_parse(&device_item, key, val);
if (ret < 0)
- return syserrno_set(EINVAL, "Failed to parse device rule %s=%s", key, val);
+ return syserror_set(EINVAL, "Failed to parse device rule %s=%s", key, val);
/*
* Note that bpf_list_add_device() returns 1 if it altered the device
* absolute nor walks upwards.
*/
if (abspath(cur))
- return syserrno_set(-EINVAL, "No absolute paths allowed");
+ return syserror_set(-EINVAL, "No absolute paths allowed");
if (strnequal(cur, "..", STRLITERALLEN("..")))
- return syserrno_set(-EINVAL, "No upward walking paths allowed");
+ return syserror_set(-EINVAL, "No upward walking paths allowed");
ret = lxc_writeat(dfd_cur, "cgroup.subtree_control", add_controllers, full_len);
if (ret < 0)
- return syserrno(-errno, "Could not enable \"%s\" controllers in the unified cgroup %d", add_controllers, dfd_cur);
+ return syserror("Could not enable \"%s\" controllers in the unified cgroup %d", add_controllers, dfd_cur);
TRACE("Enabled \"%s\" controllers in the unified cgroup %d", add_controllers, dfd_cur);
dfd_final = open_at(dfd_cur, cur, PROTECT_OPATH_DIRECTORY, PROTECT_LOOKUP_BENEATH, 0);
if (dfd_final < 0)
- return syserrno(-errno, "Fail to open directory %d(%s)", dfd_cur, cur);
+ return syserror("Fail to open directory %d(%s)", dfd_cur, cur);
if (dfd_cur != unified->dfd_base)
close(dfd_cur);
/*
}
*delegate = move_ptr(list);
- return syswarn(0, "Failed to read /sys/kernel/cgroup/delegate");
+ return syswarn_ret(0, "Failed to read /sys/kernel/cgroup/delegate");
}
lxc_iterate_parts(token, buf, " \t\n") {
ret = __list_cgroup_delegate(&list);
if (ret < 0)
- return syserrno(ret, "Failed to determine unified cgroup delegation requirements");
+ return syserror_ret(ret, "Failed to determine unified cgroup delegation requirements");
for (char *const *s = list; s && *s; s++) {
if (!faccessat(dfd_base, *s, W_OK, 0) || errno == ENOENT)
continue;
- return sysinfo(false, "The %s file is not writable, skipping unified hierarchy", *s);
+ return sysinfo_ret(false, "The %s file is not writable, skipping unified hierarchy", *s);
}
*ret_files = move_ptr(list);
static bool legacy_hierarchy_delegated(int dfd_base)
{
- if (faccessat(dfd_base, "cgroup.procs", W_OK, 0) && errno != ENOENT)
- return sysinfo(false, "The cgroup.procs file is not writable, skipping legacy hierarchy");
+ int ret;
+
+ ret = faccessat(dfd_base, ".", W_OK, 0);
+ if (ret < 0 && errno != ENOENT)
+ return sysinfo_ret(false, "Legacy hierarchy not writable, skipping");
return true;
}
+/**
+ * systemd guarantees that the order of co-mounted controllers is stable. On
+ * some systems the order of the controllers might be reversed though.
+ *
+ * For example, this is how the order is mismatched on CentOS 7:
+ *
+ * [root@localhost ~]# cat /proc/self/cgroup
+ * 11:perf_event:/
+ * 10:pids:/
+ * 9:freezer:/
+ * >>>> 8:cpuacct,cpu:/
+ * 7:memory:/
+ * 6:blkio:/
+ * 5:devices:/
+ * 4:hugetlb:/
+ * >>>> 3:net_prio,net_cls:/
+ * 2:cpuset:/
+ * 1:name=systemd:/user.slice/user-0.slice/session-c1.scope
+ *
+ * whereas the mountpoint:
+ *
+ * | |-/sys/fs/cgroup tmpfs tmpfs ro,nosuid,nodev,noexec,mode=755
+ * | | |-/sys/fs/cgroup/systemd cgroup cgroup rw,nosuid,nodev,noexec,relatime,xattr,release_agent=/usr/lib/systemd/systemd-cgroups-agent,name=systemd
+ * | | |-/sys/fs/cgroup/cpuset cgroup cgroup rw,nosuid,nodev,noexec,relatime,cpuset
+ * >>>> | | |-/sys/fs/cgroup/net_cls,net_prio cgroup cgroup rw,nosuid,nodev,noexec,relatime,net_prio,net_cls
+ * | | |-/sys/fs/cgroup/hugetlb cgroup cgroup rw,nosuid,nodev,noexec,relatime,hugetlb
+ * | | |-/sys/fs/cgroup/devices cgroup cgroup rw,nosuid,nodev,noexec,relatime,devices
+ * | | |-/sys/fs/cgroup/blkio cgroup cgroup rw,nosuid,nodev,noexec,relatime,blkio
+ * | | |-/sys/fs/cgroup/memory cgroup cgroup rw,nosuid,nodev,noexec,relatime,memory
+ * >>>> | | |-/sys/fs/cgroup/cpu,cpuacct cgroup cgroup rw,nosuid,nodev,noexec,relatime,cpuacct,cpu
+ * | | |-/sys/fs/cgroup/freezer cgroup cgroup rw,nosuid,nodev,noexec,relatime,freezer
+ * | | |-/sys/fs/cgroup/pids cgroup cgroup rw,nosuid,nodev,noexec,relatime,pids
+ * | | `-/sys/fs/cgroup/perf_event cgroup cgroup rw,nosuid,nodev,noexec,relatime,perf_event
+ *
+ * Ensure that we always use the systemd-guaranteed stable order when checking
+ * for the mountpoint.
+ */
+__attribute__((returns_nonnull)) __attribute__((nonnull))
+static const char *stable_order(const char *controllers)
+{
+ if (strequal(controllers, "cpuacct,cpu"))
+ return "cpu,cpuacct";
+
+ if (strequal(controllers, "net_prio,net_cls"))
+ return "net_cls,net_prio";
+
+ return unprefix(controllers);
+}
+
static int __initialize_cgroups(struct cgroup_ops *ops, bool relative,
bool unprivileged)
{
}
if (dfd_mnt < 0) {
if (errno != ENOENT)
- return syserrno(-errno, "Failed to open %d/unified", ops->dfd_mnt);
+ return syserror("Failed to open %d/unified", ops->dfd_mnt);
SYSTRACE("Unified cgroup not mounted");
continue;
dfd_base = open_at(dfd_mnt, current_cgroup,
PROTECT_OPATH_DIRECTORY,
PROTECT_LOOKUP_BENEATH_XDEV, 0);
- if (dfd_base < 0)
- return syserrno(-errno, "Failed to open %d/%s", dfd_mnt, current_cgroup);
+ if (dfd_base < 0) {
+ if (errno != ENOENT)
+ return syserror("Failed to open %d/%s",
+ dfd_mnt, current_cgroup);
+
+ SYSTRACE("Current cgroup %d/%s does not exist (funky cgroup layout?)",
+ dfd_mnt, current_cgroup);
+ continue;
+ }
dfd = dfd_base;
}
TRACE("No controllers are enabled for delegation in the unified hierarchy");
controller_list = list_new();
if (!controller_list)
- return syserrno(-ENOMEM, "Failed to create empty controller list");
+ return syserror_set(-ENOMEM, "Failed to create empty controller list");
}
controllers = strdup(unified_mnt);
*__current_cgroup = '\0';
__current_cgroup++;
- controllers = strdup(unprefix(__controllers));
+ controllers = strdup(stable_order(__controllers));
if (!controllers)
return ret_errno(ENOMEM);
dfd_mnt = open_at(ops->dfd_mnt,
- controllers, PROTECT_OPATH_DIRECTORY,
+ controllers,
+ PROTECT_OPATH_DIRECTORY,
PROTECT_LOOKUP_ABSOLUTE_XDEV, 0);
if (dfd_mnt < 0) {
if (errno != ENOENT)
- return syserrno(-errno, "Failed to open %d/%s",
+ return syserror("Failed to open %d/%s",
ops->dfd_mnt, controllers);
SYSTRACE("%s not mounted", controllers);
dfd_base = open_at(dfd_mnt, current_cgroup,
PROTECT_OPATH_DIRECTORY,
PROTECT_LOOKUP_BENEATH_XDEV, 0);
- if (dfd_base < 0)
- return syserrno(-errno, "Failed to open %d/%s",
- dfd_mnt, current_cgroup);
+ if (dfd_base < 0) {
+ if (errno != ENOENT)
+ return syserror("Failed to open %d/%s",
+ dfd_mnt, current_cgroup);
+
+ SYSTRACE("Current cgroup %d/%s does not exist (funky cgroup layout?)",
+ dfd_mnt, current_cgroup);
+ continue;
+ }
dfd = dfd_base;
}
*/
controller_list = list_add_controllers(__controllers);
if (!controller_list)
- return syserrno(-ENOMEM, "Failed to create controller list from %s", __controllers);
+ return syserror_set(-ENOMEM, "Failed to create controller list from %s", __controllers);
if (skip_hierarchy(ops, controller_list))
continue;
ret = cgroup_hierarchy_add(ops, dfd_mnt, controllers, dfd,
current_cgroup, controller_list, type);
if (ret < 0)
- return syserrno(ret, "Failed to add %s hierarchy", controllers);
+ return syserror_ret(ret, "Failed to add %s hierarchy", controllers);
/* Transfer ownership. */
move_fd(dfd_mnt);
}
if (!controllers_available(ops))
- return syserrno_set(-ENOENT, "One or more requested controllers unavailable or not delegated");
+ return syserror_set(-ENOENT, "One or more requested controllers unavailable or not delegated");
return 0;
}
dfd = open_at(-EBADF, DEFAULT_CGROUP_MOUNTPOINT,
PROTECT_OPATH_DIRECTORY, PROTECT_LOOKUP_ABSOLUTE_XDEV, 0);
if (dfd < 0)
- return syserrno(-errno, "Failed to open " DEFAULT_CGROUP_MOUNTPOINT);
+ return syserror("Failed to open " DEFAULT_CGROUP_MOUNTPOINT);
controllers_use = lxc_global_config_value("lxc.cgroup.use");
if (controllers_use) {
ret = __initialize_cgroups(ops, conf->cgroup_meta.relative, !lxc_list_empty(&conf->id_map));
if (ret < 0)
- return syserrno(ret, "Failed to initialize cgroups");
+ return syserror_ret(ret, "Failed to initialize cgroups");
/* Transfer ownership to cgroup_ops. */
move_fd(dfd);
struct cgroup_ops *cgroup_ops_init(struct lxc_conf *conf)
{
- __do_free struct cgroup_ops *cgfsng_ops = NULL;
+ __cleanup_cgroup_ops struct cgroup_ops *cgfsng_ops = NULL;
cgfsng_ops = zalloc(sizeof(struct cgroup_ops));
if (!cgfsng_ops)
return ret_set_errno(NULL, ENOMEM);
- cgfsng_ops->cgroup_layout = CGROUP_LAYOUT_UNKNOWN;
- cgfsng_ops->dfd_mnt = -EBADF;
+ cgfsng_ops->cgroup_layout = CGROUP_LAYOUT_UNKNOWN;
+ cgfsng_ops->dfd_mnt = -EBADF;
if (initialize_cgroups(cgfsng_ops, conf))
return NULL;
cgfsng_ops->payload_delegate_controllers = cgfsng_payload_delegate_controllers;
cgfsng_ops->payload_create = cgfsng_payload_create;
cgfsng_ops->payload_enter = cgfsng_payload_enter;
- cgfsng_ops->payload_finalize = cgfsng_payload_finalize;
+ cgfsng_ops->finalize = cgfsng_finalize;
cgfsng_ops->get_cgroup = cgfsng_get_cgroup;
cgfsng_ops->get = cgfsng_get;
cgfsng_ops->set = cgfsng_set;
cgfsng_ops->chown = cgfsng_chown;
cgfsng_ops->mount = cgfsng_mount;
cgfsng_ops->devices_activate = cgfsng_devices_activate;
- cgfsng_ops->get_limiting_cgroup = cgfsng_get_limiting_cgroup;
+ cgfsng_ops->get_limit_cgroup = cgfsng_get_limit_cgroup;
cgfsng_ops->criu_escape = cgfsng_criu_escape;
cgfsng_ops->criu_num_hierarchies = cgfsng_criu_num_hierarchies;
return move_ptr(cgfsng_ops);
}
-int cgroup_attach(const struct lxc_conf *conf, const char *name,
- const char *lxcpath, pid_t pid)
+static int __unified_attach_fd(const struct lxc_conf *conf, int fd_unified, pid_t pid)
{
- __do_close int unified_fd = -EBADF;
int ret;
- if (!conf || is_empty_string(name) || is_empty_string(lxcpath) || pid <= 0)
- return ret_errno(EINVAL);
-
- unified_fd = lxc_cmd_get_cgroup2_fd(name, lxcpath);
- if (unified_fd < 0)
- return ret_errno(ENOCGROUP2);
-
if (!lxc_list_empty(&conf->id_map)) {
struct userns_exec_unified_attach_data args = {
.conf = conf,
- .unified_fd = unified_fd,
+ .unified_fd = fd_unified,
.pid = pid,
};
cgroup_unified_attach_child_wrapper,
&args);
} else {
- ret = cgroup_attach_leaf(conf, unified_fd, pid);
+ ret = cgroup_attach_leaf(conf, fd_unified, pid);
+ }
+
+ return ret;
+}
+
+static int __cgroup_attach_many(const struct lxc_conf *conf, const char *name,
+ const char *lxcpath, pid_t pid)
+{
+ call_cleaner(put_cgroup_ctx) struct cgroup_ctx *ctx = &(struct cgroup_ctx){};
+ int ret;
+ size_t idx;
+ ssize_t pidstr_len;
+ char pidstr[INTTYPE_TO_STRLEN(pid_t)];
+
+ ret = lxc_cmd_get_cgroup_ctx(name, lxcpath, sizeof(struct cgroup_ctx), ctx);
+ if (ret < 0)
+ return ret_errno(ENOSYS);
+
+ pidstr_len = strnprintf(pidstr, sizeof(pidstr), "%d", pid);
+ if (pidstr_len < 0)
+ return pidstr_len;
+
+ for (idx = 0; idx < ctx->fd_len; idx++) {
+ int dfd_con = ctx->fd[idx];
+
+ if (unified_cgroup_fd(dfd_con))
+ ret = __unified_attach_fd(conf, dfd_con, pid);
+ else
+ ret = lxc_writeat(dfd_con, "cgroup.procs", pidstr, pidstr_len);
+ if (ret)
+ return syserror_ret(ret, "Failed to attach to cgroup fd %d", dfd_con);
+ else
+ TRACE("Attached to cgroup fd %d", dfd_con);
+ }
+
+ if (idx == 0)
+ return syserror_set(-ENOENT, "Failed to attach to cgroups");
+
+ TRACE("Attached to %s cgroup layout", cgroup_layout_name(ctx->layout));
+ return 0;
+}
+
+static int __cgroup_attach_unified(const struct lxc_conf *conf, const char *name,
+ const char *lxcpath, pid_t pid)
+{
+ __do_close int dfd_unified = -EBADF;
+
+ if (!conf || is_empty_string(name) || is_empty_string(lxcpath) || pid <= 0)
+ return ret_errno(EINVAL);
+
+ dfd_unified = lxc_cmd_get_cgroup2_fd(name, lxcpath);
+ if (dfd_unified < 0)
+ return ret_errno(ENOSYS);
+
+ return __unified_attach_fd(conf, dfd_unified, pid);
+}
+
+int cgroup_attach(const struct lxc_conf *conf, const char *name,
+ const char *lxcpath, pid_t pid)
+{
+ int ret;
+
+ ret = __cgroup_attach_many(conf, name, lxcpath, pid);
+ if (ret < 0) {
+ if (!ERRNO_IS_NOT_SUPPORTED(ret))
+ return ret;
+
+ ret = __cgroup_attach_unified(conf, name, lxcpath, pid);
+ if (ret < 0 && ERRNO_IS_NOT_SUPPORTED(ret))
+ return ret_errno(ENOSYS);
}
return ret;
}
/* Connects to command socket therefore isn't callable from command handler. */
-int cgroup_get(const char *name, const char *lxcpath,
- const char *filename, char *buf, size_t len)
+int cgroup_get(const char *name, const char *lxcpath, const char *key, char *buf, size_t len)
{
- __do_close int unified_fd = -EBADF;
- ssize_t ret;
+ __do_close int dfd = -EBADF;
+ struct cgroup_fd fd = {
+ .fd = -EBADF,
+ };
+ size_t len_controller;
+ int ret;
- if (is_empty_string(filename) || is_empty_string(name) ||
- is_empty_string(lxcpath))
+ if (is_empty_string(name) || is_empty_string(lxcpath) ||
+ is_empty_string(key))
return ret_errno(EINVAL);
if ((buf && !len) || (len && !buf))
return ret_errno(EINVAL);
- unified_fd = lxc_cmd_get_limiting_cgroup2_fd(name, lxcpath);
- if (unified_fd < 0)
- return ret_errno(ENOCGROUP2);
+ len_controller = strcspn(key, ".");
+ len_controller++; /* Don't forget the \0 byte. */
+ if (len_controller >= MAX_CGROUP_ROOT_NAMELEN)
+ return ret_errno(EINVAL);
+ (void)strlcpy(fd.controller, key, len_controller);
- ret = lxc_read_try_buf_at(unified_fd, filename, buf, len);
- if (ret < 0)
- SYSERROR("Failed to read cgroup value");
+ ret = lxc_cmd_get_limit_cgroup_fd(name, lxcpath, sizeof(struct cgroup_fd), &fd);
+ if (ret < 0) {
+ if (!ERRNO_IS_NOT_SUPPORTED(ret))
+ return ret;
+
+ dfd = lxc_cmd_get_limit_cgroup2_fd(name, lxcpath);
+ if (dfd < 0) {
+ if (!ERRNO_IS_NOT_SUPPORTED(ret))
+ return ret;
+
+ return ret_errno(ENOSYS);
+ }
+ fd.type = UNIFIED_HIERARCHY;
+ fd.fd = move_fd(dfd);
+ }
+ dfd = move_fd(fd.fd);
+
+ TRACE("Reading %s from %s cgroup hierarchy", key, cgroup_hierarchy_name(fd.type));
+
+ if (fd.type == UNIFIED_HIERARCHY && strequal(fd.controller, "devices"))
+ return ret_errno(EOPNOTSUPP);
+ else
+ ret = lxc_read_try_buf_at(dfd, key, buf, len);
return ret;
}
/* Connects to command socket therefore isn't callable from command handler. */
-int cgroup_set(const char *name, const char *lxcpath,
- const char *filename, const char *value)
+int cgroup_set(const char *name, const char *lxcpath, const char *key, const char *value)
{
- __do_close int unified_fd = -EBADF;
- ssize_t ret;
+ __do_close int dfd = -EBADF;
+ struct cgroup_fd fd = {
+ .fd = -EBADF,
+ };
+ size_t len_controller;
+ int ret;
- if (is_empty_string(filename) || is_empty_string(value) ||
- is_empty_string(name) || is_empty_string(lxcpath))
+ if (is_empty_string(name) || is_empty_string(lxcpath) ||
+ is_empty_string(key) || is_empty_string(value))
return ret_errno(EINVAL);
- unified_fd = lxc_cmd_get_limiting_cgroup2_fd(name, lxcpath);
- if (unified_fd < 0)
- return ret_errno(ENOCGROUP2);
+ len_controller = strcspn(key, ".");
+ len_controller++; /* Don't forget the \0 byte. */
+ if (len_controller >= MAX_CGROUP_ROOT_NAMELEN)
+ return ret_errno(EINVAL);
+ (void)strlcpy(fd.controller, key, len_controller);
+
+ ret = lxc_cmd_get_limit_cgroup_fd(name, lxcpath, sizeof(struct cgroup_fd), &fd);
+ if (ret < 0) {
+ if (!ERRNO_IS_NOT_SUPPORTED(ret))
+ return ret;
- if (strnequal(filename, "devices.", STRLITERALLEN("devices."))) {
+ dfd = lxc_cmd_get_limit_cgroup2_fd(name, lxcpath);
+ if (dfd < 0) {
+ if (!ERRNO_IS_NOT_SUPPORTED(ret))
+ return ret;
+
+ return ret_errno(ENOSYS);
+ }
+ fd.type = UNIFIED_HIERARCHY;
+ fd.fd = move_fd(dfd);
+ }
+ dfd = move_fd(fd.fd);
+
+ TRACE("Setting %s to %s in %s cgroup hierarchy", key, value, cgroup_hierarchy_name(fd.type));
+
+ if (fd.type == UNIFIED_HIERARCHY && strequal(fd.controller, "devices")) {
struct device_item device = {};
- ret = device_cgroup_rule_parse(&device, filename, value);
+ ret = device_cgroup_rule_parse(&device, key, value);
if (ret < 0)
- return log_error_errno(-1, EINVAL, "Failed to parse device string %s=%s", filename, value);
+ return log_error_errno(-1, EINVAL, "Failed to parse device string %s=%s",
+ key, value);
ret = lxc_cmd_add_bpf_device_cgroup(name, lxcpath, &device);
} else {
- ret = lxc_writeat(unified_fd, filename, value, strlen(value));
+ ret = lxc_writeat(dfd, key, value, strlen(value));
}
return ret;
const char *wait_error)
{
__do_close int events_fd = -EBADF;
- call_cleaner(lxc_mainloop_close) struct lxc_epoll_descr *descr_ptr = NULL;
+ call_cleaner(lxc_mainloop_close) struct lxc_async_descr *descr_ptr = NULL;
int ret;
- struct lxc_epoll_descr descr = {};
+ struct lxc_async_descr descr = {};
if (timeout != 0) {
ret = lxc_mainloop_open(&descr);
if (events_fd < 0)
return log_error_errno(-errno, errno, "Failed to open cgroup.events file");
- ret = lxc_mainloop_add_handler_events(&descr, events_fd, EPOLLPRI, freezer_cgroup_events_cb, INT_TO_PTR(state_num));
+ ret = lxc_mainloop_add_handler_events(&descr, events_fd, EPOLLPRI,
+ freezer_cgroup_events_cb,
+ default_cleanup_handler,
+ INT_TO_PTR(state_num),
+ "freezer_cgroup_events_cb");
if (ret < 0)
return log_error_errno(-1, errno, "Failed to add cgroup.events fd handler to mainloop");
}
if (is_empty_string(name) || is_empty_string(lxcpath))
return ret_errno(EINVAL);
- unified_fd = lxc_cmd_get_limiting_cgroup2_fd(name, lxcpath);
+ unified_fd = lxc_cmd_get_limit_cgroup2_fd(name, lxcpath);
if (unified_fd < 0)
return ret_errno(ENOCGROUP2);
if (is_empty_string(name) || is_empty_string(lxcpath))
return ret_errno(EINVAL);
- unified_fd = lxc_cmd_get_limiting_cgroup2_fd(name, lxcpath);
+ unified_fd = lxc_cmd_get_limit_cgroup2_fd(name, lxcpath);
if (unified_fd < 0)
return ret_errno(ENOCGROUP2);