#include "commands.h"
#include "conf.h"
#include "log.h"
+#include "macro.h"
#include "storage/storage.h"
#include "utils.h"
-lxc_log_define(lxc_cgfsng, lxc);
+#ifndef HAVE_STRLCPY
+#include "include/strlcpy.h"
+#endif
-static struct cgroup_ops cgfsng_ops;
+#ifndef HAVE_STRLCAT
+#include "include/strlcat.h"
+#endif
-/* A descriptor for a mounted hierarchy
- *
- * @controllers
- * - legacy hierarchy
- * Either NULL, or a null-terminated list of all the co-mounted controllers.
- * - unified hierarchy
- * Either NULL, or a null-terminated list of all enabled controllers.
- *
- * @mountpoint
- * - The mountpoint we will use.
- * - legacy hierarchy
- * It will be either /sys/fs/cgroup/controller or
- * /sys/fs/cgroup/controllerlist.
- * - unified hierarchy
- * It will either be /sys/fs/cgroup or /sys/fs/cgroup/<mountpoint-name>
- * depending on whether this is a hybrid cgroup layout (mix of legacy and
- * unified hierarchies) or a pure unified cgroup layout.
- *
- * @base_cgroup
- * - The cgroup under which the container cgroup path
- * is created. This will be either the caller's cgroup (if not root), or
- * init's cgroup (if root).
- *
- * @fullcgpath
- * - The full path to the containers cgroup.
- *
- * @version
- * - legacy hierarchy
- * If the hierarchy is a legacy hierarchy this will be set to
- * CGROUP_SUPER_MAGIC.
- * - unified hierarchy
- * If the hierarchy is a legacy hierarchy this will be set to
- * CGROUP2_SUPER_MAGIC.
- */
-struct hierarchy {
- char **controllers;
- char *mountpoint;
- char *base_cgroup;
- char *fullcgpath;
- int version;
-};
-
-/* The cgroup data which is attached to the lxc_handler.
- *
- * @cgroup_pattern
- * - A copy of lxc.cgroup.pattern.
- *
- * @container_cgroup
- * - If not null, the cgroup which was created for the container. For each
- * hierarchy, it is created under the @hierarchy->base_cgroup directory.
- * Relative to the base_cgroup it is the same for all hierarchies.
- *
- * @name
- * - The name of the container.
- *
- * @cgroup_meta
- * - A copy of the container's cgroup information. This overrides
- * @cgroup_pattern.
- *
- * @cgroup_layout
- * - What cgroup layout the container is running with.
- * - CGROUP_LAYOUT_UNKNOWN
- * The cgroup layout could not be determined. This should be treated as an
- * error condition.
- * - CGROUP_LAYOUT_LEGACY
- * The container is running with all controllers mounted into legacy cgroup
- * hierarchies.
- * - CGROUP_LAYOUT_HYBRID
- * The container is running with at least one controller mounted into a
- * legacy cgroup hierarchy and a mountpoint for the unified hierarchy. The
- * unified hierarchy can be empty (no controllers enabled) or non-empty
- * (controllers enabled).
- * - CGROUP_LAYOUT_UNIFIED
- * The container is running on a pure unified cgroup hierarchy. The unified
- * hierarchy can be empty (no controllers enabled) or non-empty (controllers
- * enabled).
- */
-struct cgfsng_handler_data {
- char *cgroup_pattern;
- char *container_cgroup; /* cgroup we created for the container */
- char *name; /* container name */
- /* per-container cgroup information */
- struct lxc_cgroup cgroup_meta;
- cgroup_layout_t cgroup_layout;
-};
-
-/* @hierarchies
- * - A NULL-terminated array of struct hierarchy, one per legacy hierarchy. No
- * duplicates. First sufficient, writeable mounted hierarchy wins.
- */
-struct hierarchy **hierarchies;
-/* Pointer to the unified hierarchy in the null terminated list @hierarchies.
- * This is merely a convenience for hybrid cgroup layouts to easily retrieve the
- * unified hierarchy without iterating throught @hierarchies.
- */
-struct hierarchy *unified;
-/*
- * @cgroup_layout
- * - What cgroup layout the container is running with.
- * - CGROUP_LAYOUT_UNKNOWN
- * The cgroup layout could not be determined. This should be treated as an
- * error condition.
- * - CGROUP_LAYOUT_LEGACY
- * The container is running with all controllers mounted into legacy cgroup
- * hierarchies.
- * - CGROUP_LAYOUT_HYBRID
- * The container is running with at least one controller mounted into a
- * legacy cgroup hierarchy and a mountpoint for the unified hierarchy. The
- * unified hierarchy can be empty (no controllers enabled) or non-empty
- * (controllers enabled).
- * - CGROUP_LAYOUT_UNIFIED
- * The container is running on a pure unified cgroup hierarchy. The unified
- * hierarchy can be empty (no controllers enabled) or non-empty (controllers
- * enabled).
- */
-cgroup_layout_t cgroup_layout;
-/* What controllers is the container supposed to use. */
-char *cgroup_use;
-
-/* @lxc_cgfsng_debug
- * - Whether to print debug info to stdout for the cgfsng driver.
- */
-static bool lxc_cgfsng_debug;
-
-#define CGFSNG_DEBUG(format, ...) \
- do { \
- if (lxc_cgfsng_debug) \
- printf("cgfsng: " format, ##__VA_ARGS__); \
- } while (0)
+lxc_log_define(cgfsng, cgroup);
static void free_string_list(char **clist)
{
len = strlen(entry);
prefixed = must_alloc(len + 6);
- memcpy(prefixed, "name=", sizeof("name="));
- memcpy(prefixed + sizeof("name="), entry, len);
+
+ memcpy(prefixed, "name=", STRLITERALLEN("name="));
+ memcpy(prefixed + STRLITERALLEN("name="), entry, len);
prefixed[len + 5] = '\0';
return prefixed;
}
(*clist)[newentry] = copy;
}
-static void free_handler_data(struct cgfsng_handler_data *d)
-{
- free(d->cgroup_pattern);
- free(d->container_cgroup);
- free(d->name);
- if (d->cgroup_meta.dir)
- free(d->cgroup_meta.dir);
- if (d->cgroup_meta.controllers)
- free(d->cgroup_meta.controllers);
- free(d);
-}
-
/* Given a handler's cgroup data, return the struct hierarchy for the controller
* @c, or NULL if there is none.
*/
-struct hierarchy *get_hierarchy(const char *c)
+struct hierarchy *get_hierarchy(struct cgroup_ops *ops, const char *controller)
{
int i;
- if (!hierarchies)
+ errno = ENOENT;
+
+ if (!ops->hierarchies) {
+ TRACE("There are no useable cgroup controllers");
return NULL;
+ }
- for (i = 0; hierarchies[i]; i++) {
- if (!c) {
+ for (i = 0; ops->hierarchies[i]; i++) {
+ if (!controller) {
/* This is the empty unified hierarchy. */
- if (hierarchies[i]->controllers &&
- !hierarchies[i]->controllers[0])
- return hierarchies[i];
+ if (ops->hierarchies[i]->controllers &&
+ !ops->hierarchies[i]->controllers[0])
+ return ops->hierarchies[i];
- return NULL;
+ continue;
}
- if (string_in_list(hierarchies[i]->controllers, c))
- return hierarchies[i];
+ if (string_in_list(ops->hierarchies[i]->controllers, controller))
+ return ops->hierarchies[i];
}
+ if (controller)
+ WARN("There is no useable %s controller", controller);
+ else
+ WARN("There is no empty unified cgroup hierarchy");
+
return NULL;
}
char *token;
size_t arrlen;
uint32_t *bitarr;
- char *saveptr = NULL;
arrlen = BITS_TO_LONGS(nbits);
bitarr = calloc(arrlen, sizeof(uint32_t));
if (!bitarr)
return NULL;
- for (; (token = strtok_r(buf, ",", &saveptr)); buf = NULL) {
+ lxc_iterate_parts(token, buf, ",") {
errno = 0;
unsigned end, start;
char *range;
int ret;
size_t i;
char **cpulist = NULL;
- char numstr[LXC_NUMSTRLEN64] = {0};
+ char numstr[INTTYPE_TO_STRLEN(size_t)] = {0};
for (i = 0; i <= nbits; i++) {
if (!is_set(i, bitarr))
continue;
- ret = snprintf(numstr, LXC_NUMSTRLEN64, "%zu", i);
- if (ret < 0 || (size_t)ret >= LXC_NUMSTRLEN64) {
+ ret = snprintf(numstr, sizeof(numstr), "%zu", i);
+ if (ret < 0 || (size_t)ret >= sizeof(numstr)) {
lxc_free_array((void **)cpulist, free);
return NULL;
}
/* Get maximum number of cpus found in possible cpuset. */
maxposs = get_max_cpus(posscpus);
- if (maxposs < 0)
+ if (maxposs < 0 || maxposs >= INT_MAX - 1)
goto on_error;
if (!file_exists(__ISOL_CPUS)) {
/* Get maximum number of cpus found in isolated cpuset. */
maxisol = get_max_cpus(isolcpus);
- if (maxisol < 0)
+ if (maxisol < 0 || maxisol >= INT_MAX - 1)
goto on_error;
if (maxposs < maxisol)
*lastslash = oldv;
free(fpath);
fpath = must_make_path(path, "cpuset.cpus", NULL);
- ret = lxc_write_to_file(fpath, cpulist, strlen(cpulist), false);
+ ret = lxc_write_to_file(fpath, cpulist, strlen(cpulist), false, 0666);
if (ret < 0) {
SYSERROR("Failed to write cpu list to \"%s\"", fpath);
goto on_error;
*lastslash = oldv;
fpath = must_make_path(path, file, NULL);
- ret = lxc_write_to_file(fpath, value, len, false);
+ ret = lxc_write_to_file(fpath, value, len, false, 0666);
if (ret < 0)
SYSERROR("Failed to write \"%s\" to file \"%s\"", value, fpath);
free(fpath);
if (slash)
*slash = '\0';
- cgpath = must_make_path(h->mountpoint, h->base_cgroup, cgname, NULL);
+ cgpath = must_make_path(h->mountpoint, h->container_base_path, cgname, NULL);
if (slash)
*slash = '/';
}
}
- clonechildrenpath =
- must_make_path(cgpath, "cgroup.clone_children", NULL);
+ clonechildrenpath = must_make_path(cgpath, "cgroup.clone_children", NULL);
/* unified hierarchy doesn't have clone_children */
if (!file_exists(clonechildrenpath)) {
free(clonechildrenpath);
}
free(cgpath);
- ret = lxc_write_to_file(clonechildrenpath, "1", 1, false);
+ ret = lxc_write_to_file(clonechildrenpath, "1", 1, false, 0666);
if (ret < 0) {
/* Set clone_children so children inherit our settings */
SYSERROR("Failed to write 1 to \"%s\"", clonechildrenpath);
/* Return true if all of the controllers which we require have been found. The
* required list is freezer and anything in lxc.cgroup.use.
*/
-static bool all_controllers_found(void)
+static bool all_controllers_found(struct cgroup_ops *ops)
{
- char *p;
- char *saveptr = NULL;
- struct hierarchy **hlist = hierarchies;
+ char **cur;
+ struct hierarchy **hlist = ops->hierarchies;
if (!controller_found(hlist, "freezer")) {
- CGFSNG_DEBUG("No freezer controller mountpoint found\n");
+ ERROR("No freezer controller mountpoint found");
return false;
}
- if (!cgroup_use)
+ if (!ops->cgroup_use)
return true;
- for (p = strtok_r(cgroup_use, ",", &saveptr); p;
- p = strtok_r(NULL, ",", &saveptr)) {
- if (!controller_found(hlist, p)) {
- CGFSNG_DEBUG("No %s controller mountpoint found\n", p);
+ for (cur = ops->cgroup_use; cur && *cur; cur++)
+ if (!controller_found(hlist, *cur)) {
+ ERROR("No %s controller mountpoint found", *cur);
return false;
}
- }
return true;
}
*/
int i;
char *dup, *p2, *tok;
- char *p = line, *saveptr = NULL, *sep = ",";
+ char *p = line, *sep = ",";
char **aret = NULL;
for (i = 0; i < 4; i++) {
* verify /sys/fs/cgroup/ in this field.
*/
if (strncmp(p, "/sys/fs/cgroup/", 15) != 0) {
- CGFSNG_DEBUG("Found hierarchy not under /sys/fs/cgroup: \"%s\"\n", p);
+ ERROR("Found hierarchy not under /sys/fs/cgroup: \"%s\"", p);
return NULL;
}
p += 15;
p2 = strchr(p, ' ');
if (!p2) {
- CGFSNG_DEBUG("Corrupt mountinfo\n");
+ ERROR("Corrupt mountinfo");
return NULL;
}
*p2 = '\0';
if (type == CGROUP_SUPER_MAGIC) {
- /* strdup() here for v1 hierarchies. Otherwise strtok_r() will
- * destroy mountpoints such as "/sys/fs/cgroup/cpu,cpuacct".
+ /* strdup() here for v1 hierarchies. Otherwise
+ * lxc_iterate_parts() will destroy mountpoints such as
+ * "/sys/fs/cgroup/cpu,cpuacct".
*/
dup = strdup(p);
if (!dup)
return NULL;
- for (tok = strtok_r(dup, sep, &saveptr); tok;
- tok = strtok_r(NULL, sep, &saveptr))
+ lxc_iterate_parts(tok, dup, sep) {
must_append_controller(klist, nlist, &aret, tok);
+ }
free(dup);
}
static char **cg_unified_get_controllers(const char *file)
{
char *buf, *tok;
- char *saveptr = NULL, *sep = " \t\n";
+ char *sep = " \t\n";
char **aret = NULL;
buf = read_file(file);
if (!buf)
return NULL;
- for (tok = strtok_r(buf, sep, &saveptr); tok;
- tok = strtok_r(NULL, sep, &saveptr)) {
+ lxc_iterate_parts(tok, buf, sep) {
int newentry;
char *copy;
return aret;
}
-static struct hierarchy *add_hierarchy(char **clist, char *mountpoint,
- char *base_cgroup, int type)
+static struct hierarchy *add_hierarchy(struct hierarchy ***h, char **clist, char *mountpoint,
+ char *container_base_path, int type)
{
struct hierarchy *new;
int newentry;
new = must_alloc(sizeof(*new));
new->controllers = clist;
new->mountpoint = mountpoint;
- new->base_cgroup = base_cgroup;
- new->fullcgpath = NULL;
+ new->container_base_path = container_base_path;
+ new->container_full_path = NULL;
+ new->monitor_full_path = NULL;
new->version = type;
- newentry = append_null_to_list((void ***)&hierarchies);
- hierarchies[newentry] = new;
+ newentry = append_null_to_list((void ***)h);
+ (*h)[newentry] = new;
return new;
}
*/
static bool controller_in_clist(char *cgline, char *c)
{
- char *tok, *saveptr = NULL, *eol, *tmp;
+ char *tok, *eol, *tmp;
size_t len;
eol = strchr(cgline, ':');
memcpy(tmp, cgline, len);
tmp[len] = '\0';
- for (tok = strtok_r(tmp, ",", &saveptr); tok;
- tok = strtok_r(NULL, ",", &saveptr)) {
+ lxc_iterate_parts(tok, tmp, ",") {
if (strcmp(tok, c) == 0)
return true;
}
return -1;
while (getline(&line, &len, f) != -1) {
- char *p, *p2, *tok, *saveptr = NULL;
+ char *p, *p2, *tok;
p = strchr(line, ':');
if (!p)
continue;
continue;
}
- for (tok = strtok_r(p, ",", &saveptr); tok;
- tok = strtok_r(NULL, ",", &saveptr)) {
+ lxc_iterate_parts(tok, p, ",") {
if (strncmp(tok, "name=", 5) == 0)
must_append_string(nlist, tok);
else
s[--len] = '\0';
}
-static void lxc_cgfsng_print_handler_data(const struct cgfsng_handler_data *d)
-{
- printf("Cgroup information:\n");
- printf(" container name: %s\n", d->name ? d->name : "(null)");
- printf(" lxc.cgroup.use: %s\n", cgroup_use ? cgroup_use : "(null)");
- printf(" lxc.cgroup.pattern: %s\n",
- d->cgroup_pattern ? d->cgroup_pattern : "(null)");
- printf(" lxc.cgroup.dir: %s\n",
- d->cgroup_meta.dir ? d->cgroup_meta.dir : "(null)");
- printf(" cgroup: %s\n",
- d->container_cgroup ? d->container_cgroup : "(null)");
-}
-
-static void lxc_cgfsng_print_hierarchies()
+static void lxc_cgfsng_print_hierarchies(struct cgroup_ops *ops)
{
int i;
struct hierarchy **it;
- if (!hierarchies) {
- printf(" No hierarchies found\n");
+ if (!ops->hierarchies) {
+ TRACE(" No hierarchies found");
return;
}
- printf(" Hierarchies:\n");
- for (i = 0, it = hierarchies; it && *it; it++, i++) {
+ TRACE(" Hierarchies:");
+ for (i = 0, it = ops->hierarchies; it && *it; it++, i++) {
int j;
char **cit;
- printf(" %d: base_cgroup: %s\n", i, (*it)->base_cgroup ? (*it)->base_cgroup : "(null)");
- printf(" mountpoint: %s\n", (*it)->mountpoint ? (*it)->mountpoint : "(null)");
- printf(" controllers:\n");
+ TRACE(" %d: base_cgroup: %s", i, (*it)->container_base_path ? (*it)->container_base_path : "(null)");
+ TRACE(" mountpoint: %s", (*it)->mountpoint ? (*it)->mountpoint : "(null)");
+ TRACE(" controllers:");
for (j = 0, cit = (*it)->controllers; cit && *cit; cit++, j++)
- printf(" %d: %s\n", j, *cit);
+ TRACE(" %d: %s", j, *cit);
}
}
int k;
char **it;
- printf("basecginfo is:\n");
- printf("%s\n", basecginfo);
+ TRACE("basecginfo is:");
+ TRACE("%s", basecginfo);
for (k = 0, it = klist; it && *it; it++, k++)
- printf("kernel subsystem %d: %s\n", k, *it);
+ TRACE("kernel subsystem %d: %s", k, *it);
for (k = 0, it = nlist; it && *it; it++, k++)
- printf("named subsystem %d: %s\n", k, *it);
+ TRACE("named subsystem %d: %s", k, *it);
}
-static void lxc_cgfsng_print_debuginfo(const struct cgfsng_handler_data *d)
+static int cgroup_rmdir(struct hierarchy **hierarchies,
+ const char *container_cgroup)
{
- lxc_cgfsng_print_handler_data(d);
- lxc_cgfsng_print_hierarchies();
+ int i;
+
+ if (!container_cgroup || !hierarchies)
+ return 0;
+
+ for (i = 0; hierarchies[i]; i++) {
+ int ret;
+ struct hierarchy *h = hierarchies[i];
+
+ if (!h->container_full_path)
+ continue;
+
+ ret = recursive_destroy(h->container_full_path);
+ if (ret < 0)
+ WARN("Failed to destroy \"%s\"", h->container_full_path);
+
+ free(h->container_full_path);
+ h->container_full_path = NULL;
+ }
+
+ return 0;
}
-/* At startup, parse_hierarchies finds all the info we need about cgroup
- * mountpoints and current cgroups, and stores it in @d.
- */
-static bool cg_hybrid_init(void)
+struct generic_userns_exec_data {
+ struct hierarchy **hierarchies;
+ const char *container_cgroup;
+ struct lxc_conf *conf;
+ uid_t origuid; /* target uid in parent namespace */
+ char *path;
+};
+
+static int cgroup_rmdir_wrapper(void *data)
{
int ret;
- char *basecginfo;
- bool will_escape;
- FILE *f;
- size_t len = 0;
- char *line = NULL;
- char **klist = NULL, **nlist = NULL;
+ struct generic_userns_exec_data *arg = data;
+ uid_t nsuid = (arg->conf->root_nsuid_map != NULL) ? 0 : arg->conf->init_uid;
+ gid_t nsgid = (arg->conf->root_nsgid_map != NULL) ? 0 : arg->conf->init_gid;
- /* Root spawned containers escape the current cgroup, so use init's
- * cgroups as our base in that case.
- */
- will_escape = (geteuid() == 0);
- if (will_escape)
- basecginfo = read_file("/proc/1/cgroup");
- else
- basecginfo = read_file("/proc/self/cgroup");
- if (!basecginfo)
- return false;
+ ret = setresgid(nsgid, nsgid, nsgid);
+ if (ret < 0) {
+ SYSERROR("Failed to setresgid(%d, %d, %d)", (int)nsgid,
+ (int)nsgid, (int)nsgid);
+ return -1;
+ }
- ret = get_existing_subsystems(&klist, &nlist);
+ ret = setresuid(nsuid, nsuid, nsuid);
if (ret < 0) {
- CGFSNG_DEBUG("Failed to retrieve available legacy cgroup controllers\n");
- free(basecginfo);
- return false;
+ SYSERROR("Failed to setresuid(%d, %d, %d)", (int)nsuid,
+ (int)nsuid, (int)nsuid);
+ return -1;
}
- f = fopen("/proc/self/mountinfo", "r");
- if (!f) {
- CGFSNG_DEBUG("Failed to open \"/proc/self/mountinfo\"\n");
- free(basecginfo);
- return false;
+ ret = setgroups(0, NULL);
+ if (ret < 0 && errno != EPERM) {
+ SYSERROR("Failed to setgroups(0, NULL)");
+ return -1;
}
- if (lxc_cgfsng_debug)
- lxc_cgfsng_print_basecg_debuginfo(basecginfo, klist, nlist);
+ return cgroup_rmdir(arg->hierarchies, arg->container_cgroup);
+}
- while (getline(&line, &len, f) != -1) {
- int type;
- bool writeable;
- struct hierarchy *new;
- char *base_cgroup = NULL, *mountpoint = NULL;
- char **controller_list = NULL;
+__cgfsng_ops static void cgfsng_payload_destroy(struct cgroup_ops *ops,
+ struct lxc_handler *handler)
+{
+ int ret;
+ struct generic_userns_exec_data wrap;
- type = get_cgroup_version(line);
- if (type == 0)
- continue;
+ wrap.origuid = 0;
+ wrap.container_cgroup = ops->container_cgroup;
+ wrap.hierarchies = ops->hierarchies;
+ wrap.conf = handler->conf;
- if (type == CGROUP2_SUPER_MAGIC && unified)
- continue;
+ if (handler->conf && !lxc_list_empty(&handler->conf->id_map))
+ ret = userns_exec_1(handler->conf, cgroup_rmdir_wrapper, &wrap,
+ "cgroup_rmdir_wrapper");
+ else
+ ret = cgroup_rmdir(ops->hierarchies, ops->container_cgroup);
+ if (ret < 0) {
+ WARN("Failed to destroy cgroups");
+ return;
+ }
+}
- if (cgroup_layout == CGROUP_LAYOUT_UNKNOWN) {
- if (type == CGROUP2_SUPER_MAGIC)
- cgroup_layout = CGROUP_LAYOUT_UNIFIED;
- else if (type == CGROUP_SUPER_MAGIC)
- cgroup_layout = CGROUP_LAYOUT_LEGACY;
- } else if (cgroup_layout == CGROUP_LAYOUT_UNIFIED) {
- if (type == CGROUP_SUPER_MAGIC)
- cgroup_layout = CGROUP_LAYOUT_HYBRID;
- } else if (cgroup_layout == CGROUP_LAYOUT_LEGACY) {
- if (type == CGROUP2_SUPER_MAGIC)
- cgroup_layout = CGROUP_LAYOUT_HYBRID;
- }
+__cgfsng_ops static void cgfsng_monitor_destroy(struct cgroup_ops *ops,
+ struct lxc_handler *handler)
+{
+ int len;
+ char *pivot_path;
+ struct lxc_conf *conf = handler->conf;
+ char pidstr[INTTYPE_TO_STRLEN(pid_t)];
- controller_list = cg_hybrid_get_controllers(klist, nlist, line, type);
- if (!controller_list && type == CGROUP_SUPER_MAGIC)
- continue;
+ if (!ops->hierarchies)
+ return;
- if (type == CGROUP_SUPER_MAGIC)
- if (controller_list_is_dup(hierarchies, controller_list))
- goto next;
+ len = snprintf(pidstr, sizeof(pidstr), "%d", handler->monitor_pid);
+ if (len < 0 || (size_t)len >= sizeof(pidstr))
+ return;
- mountpoint = cg_hybrid_get_mountpoint(line);
- if (!mountpoint) {
- CGFSNG_DEBUG("Failed parsing mountpoint from \"%s\"\n", line);
- goto next;
- }
+ for (int i = 0; ops->hierarchies[i]; i++) {
+ int ret;
+ struct hierarchy *h = ops->hierarchies[i];
- if (type == CGROUP_SUPER_MAGIC)
- base_cgroup = cg_hybrid_get_current_cgroup(basecginfo, controller_list[0], CGROUP_SUPER_MAGIC);
+ if (!h->monitor_full_path)
+ continue;
+
+ if (conf && conf->cgroup_meta.dir)
+ pivot_path = must_make_path(h->mountpoint,
+ h->container_base_path,
+ conf->cgroup_meta.dir,
+ PIVOT_CGROUP,
+ "cgroup.procs", NULL);
else
- base_cgroup = cg_hybrid_get_current_cgroup(basecginfo, NULL, CGROUP2_SUPER_MAGIC);
- if (!base_cgroup) {
- CGFSNG_DEBUG("Failed to find current cgroup\n");
+ pivot_path = must_make_path(h->mountpoint,
+ h->container_base_path,
+ PIVOT_CGROUP,
+ "cgroup.procs", NULL);
+
+ ret = mkdir_p(pivot_path, 0755);
+ if (ret < 0 && errno != EEXIST)
goto next;
- }
- trim(base_cgroup);
- prune_init_scope(base_cgroup);
- if (type == CGROUP2_SUPER_MAGIC)
- writeable = test_writeable_v2(mountpoint, base_cgroup);
- else
- writeable = test_writeable_v1(mountpoint, base_cgroup);
- if (!writeable)
+ /* Move ourselves into the pivot cgroup to delete our own
+ * cgroup.
+ */
+ ret = lxc_write_to_file(pivot_path, pidstr, len, false, 0666);
+ if (ret != 0)
goto next;
- if (type == CGROUP2_SUPER_MAGIC) {
- char *cgv2_ctrl_path;
+ ret = recursive_destroy(h->monitor_full_path);
+ if (ret < 0)
+ WARN("Failed to destroy \"%s\"", h->monitor_full_path);
- cgv2_ctrl_path = must_make_path(mountpoint, base_cgroup,
- "cgroup.controllers",
- NULL);
+ next:
+ free(pivot_path);
+ }
+}
- controller_list = cg_unified_get_controllers(cgv2_ctrl_path);
- free(cgv2_ctrl_path);
- if (!controller_list)
- controller_list = cg_unified_make_empty_controller();
- }
+static bool cg_unified_create_cgroup(struct hierarchy *h, char *cgname)
+{
+ size_t i, parts_len;
+ char **it;
+ size_t full_len = 0;
+ char *add_controllers = NULL, *cgroup = NULL;
+ char **parts = NULL;
+ bool bret = false;
- new = add_hierarchy(controller_list, mountpoint, base_cgroup, type);
- if (type == CGROUP2_SUPER_MAGIC && !unified)
- unified = new;
+ if (h->version != CGROUP2_SUPER_MAGIC)
+ return true;
- continue;
+ if (!h->controllers)
+ return true;
- next:
- free_string_list(controller_list);
- free(mountpoint);
- free(base_cgroup);
+ /* For now we simply enable all controllers that we have detected by
+ * creating a string like "+memory +pids +cpu +io".
+ * TODO: In the near future we might want to support "-<controller>"
+ * etc. but whether supporting semantics like this make sense will need
+ * some thinking.
+ */
+ for (it = h->controllers; it && *it; it++) {
+ full_len += strlen(*it) + 2;
+ add_controllers = must_realloc(add_controllers, full_len + 1);
+
+ if (h->controllers[0] == *it)
+ add_controllers[0] = '\0';
+
+ (void)strlcat(add_controllers, "+", full_len + 1);
+ (void)strlcat(add_controllers, *it, full_len + 1);
+
+ if ((it + 1) && *(it + 1))
+ (void)strlcat(add_controllers, " ", full_len + 1);
}
- free_string_list(klist);
- free_string_list(nlist);
+ parts = lxc_string_split(cgname, '/');
+ if (!parts)
+ goto on_error;
- free(basecginfo);
+ parts_len = lxc_array_len((void **)parts);
+ if (parts_len > 0)
+ parts_len--;
- fclose(f);
- free(line);
+ cgroup = must_make_path(h->mountpoint, h->container_base_path, NULL);
+ for (i = 0; i < parts_len; i++) {
+ int ret;
+ char *target;
- if (lxc_cgfsng_debug) {
- printf("Writable cgroup hierarchies:\n");
- lxc_cgfsng_print_hierarchies();
+ cgroup = must_append_path(cgroup, parts[i], NULL);
+ target = must_make_path(cgroup, "cgroup.subtree_control", NULL);
+ ret = lxc_write_to_file(target, add_controllers, full_len, false, 0666);
+ free(target);
+ if (ret < 0) {
+ SYSERROR("Could not enable \"%s\" controllers in the "
+ "unified cgroup \"%s\"", add_controllers, cgroup);
+ goto on_error;
+ }
}
- /* verify that all controllers in cgroup.use and all crucial
- * controllers are accounted for
- */
- if (!all_controllers_found())
- return false;
+ bret = true;
- return true;
+on_error:
+ lxc_free_array((void **)parts, free);
+ free(add_controllers);
+ free(cgroup);
+ return bret;
}
-static int cg_is_pure_unified(void)
+static bool monitor_create_path_for_hierarchy(struct hierarchy *h, char *cgname)
{
-
int ret;
- struct statfs fs;
- ret = statfs("/sys/fs/cgroup", &fs);
- if (ret < 0)
- return -ENOMEDIUM;
+ h->monitor_full_path = must_make_path(h->mountpoint, h->container_base_path, cgname, NULL);
+ if (dir_exists(h->monitor_full_path))
+ return true;
- if (is_fs_type(&fs, CGROUP2_SUPER_MAGIC))
- return CGROUP2_SUPER_MAGIC;
+ if (!cg_legacy_handle_cpuset_hierarchy(h, cgname)) {
+ ERROR("Failed to handle legacy cpuset controller");
+ return false;
+ }
- return 0;
+ ret = mkdir_p(h->monitor_full_path, 0755);
+ if (ret < 0) {
+ ERROR("Failed to create cgroup \"%s\"", h->monitor_full_path);
+ return false;
+ }
+
+ return cg_unified_create_cgroup(h, cgname);
}
-/* Get current cgroup from /proc/self/cgroup for the cgroupfs v2 hierarchy. */
-static char *cg_unified_get_current_cgroup(void)
+static bool container_create_path_for_hierarchy(struct hierarchy *h, char *cgname)
{
- char *basecginfo, *base_cgroup;
- bool will_escape;
- char *copy = NULL;
+ int ret;
- will_escape = (geteuid() == 0);
- if (will_escape)
- basecginfo = read_file("/proc/1/cgroup");
- else
- basecginfo = read_file("/proc/self/cgroup");
- if (!basecginfo)
- return NULL;
-
- base_cgroup = strstr(basecginfo, "0::/");
- if (!base_cgroup)
- goto cleanup_on_err;
+ h->container_full_path = must_make_path(h->mountpoint, h->container_base_path, cgname, NULL);
+ if (dir_exists(h->container_full_path)) {
+ ERROR("The cgroup \"%s\" already existed", h->container_full_path);
+ return false;
+ }
- base_cgroup = base_cgroup + 3;
- copy = copy_to_eol(base_cgroup);
- if (!copy)
- goto cleanup_on_err;
+ if (!cg_legacy_handle_cpuset_hierarchy(h, cgname)) {
+ ERROR("Failed to handle legacy cpuset controller");
+ return false;
+ }
-cleanup_on_err:
- free(basecginfo);
- if (copy)
- trim(copy);
+ ret = mkdir_p(h->container_full_path, 0755);
+ if (ret < 0) {
+ ERROR("Failed to create cgroup \"%s\"", h->container_full_path);
+ return false;
+ }
- return copy;
+ return cg_unified_create_cgroup(h, cgname);
}
-static int cg_unified_init(void)
+static void remove_path_for_hierarchy(struct hierarchy *h, char *cgname, bool monitor)
{
int ret;
- char *mountpoint, *subtree_path;
- char **delegatable;
- char *base_cgroup = NULL;
+ char *full_path;
- ret = cg_is_pure_unified();
- if (ret == -ENOMEDIUM)
- return -ENOMEDIUM;
+ if (monitor)
+ full_path = h->monitor_full_path;
+ else
+ full_path = h->container_full_path;
- if (ret != CGROUP2_SUPER_MAGIC)
- return 0;
+ ret = rmdir(full_path);
+ if (ret < 0)
+ SYSERROR("Failed to rmdir(\"%s\") from failed creation attempt", full_path);
- base_cgroup = cg_unified_get_current_cgroup();
- if (!base_cgroup)
- return -EINVAL;
- prune_init_scope(base_cgroup);
+ free(full_path);
- /* We assume that we have already been given controllers to delegate
- * further down the hierarchy. If not it is up to the user to delegate
- * them to us.
- */
- mountpoint = must_copy_string("/sys/fs/cgroup");
- subtree_path = must_make_path(mountpoint, base_cgroup,
- "cgroup.subtree_control", NULL);
- delegatable = cg_unified_get_controllers(subtree_path);
- free(subtree_path);
- if (!delegatable)
- delegatable = cg_unified_make_empty_controller();
- if (!delegatable[0])
- CGFSNG_DEBUG("No controllers are enabled for delegation\n");
+ if (monitor)
+ h->monitor_full_path = NULL;
+ else
+ h->container_full_path = NULL;
+}
- /* TODO: If the user requested specific controllers via lxc.cgroup.use
- * we should verify here. The reason I'm not doing it right is that I'm
- * not convinced that lxc.cgroup.use will be the future since it is a
- * global property. I much rather have an option that lets you request
- * controllers per container.
- */
+__cgfsng_ops static inline bool cgfsng_monitor_create(struct cgroup_ops *ops,
+ struct lxc_handler *handler)
+{
+ char *monitor_cgroup, *offset, *tmp;
+ int idx = 0;
+ size_t len;
+ bool bret = false;
+ struct lxc_conf *conf = handler->conf;
- add_hierarchy(delegatable, mountpoint, base_cgroup, CGROUP2_SUPER_MAGIC);
- unified = hierarchies[0];
+ if (!conf)
+ return bret;
- cgroup_layout = CGROUP_LAYOUT_UNIFIED;
- return CGROUP2_SUPER_MAGIC;
-}
+ if (conf->cgroup_meta.dir)
+ tmp = lxc_string_join("/",
+ (const char *[]){conf->cgroup_meta.dir,
+ ops->monitor_pattern,
+ handler->name, NULL},
+ false);
+ else
+ tmp = must_make_path(ops->monitor_pattern, handler->name, NULL);
+ if (!tmp)
+ return bret;
-static bool cg_init(void)
-{
- int ret;
- const char *tmp;
+ len = strlen(tmp) + 5; /* leave room for -NNN\0 */
+ monitor_cgroup = must_alloc(len);
+ (void)strlcpy(monitor_cgroup, tmp, len);
+ free(tmp);
+ offset = monitor_cgroup + len - 5;
- errno = 0;
- tmp = lxc_global_config_value("lxc.cgroup.use");
- if (!cgroup_use && errno != 0) { /* lxc.cgroup.use can be NULL */
- CGFSNG_DEBUG("Failed to retrieve list of cgroups to use\n");
- return false;
- }
- cgroup_use = must_copy_string(tmp);
+ do {
+ if (idx) {
+ int ret = snprintf(offset, 5, "-%d", idx);
+ if (ret < 0 || (size_t)ret >= 5)
+ goto on_error;
+ }
- ret = cg_unified_init();
- if (ret < 0)
- return false;
+ for (int i = 0; ops->hierarchies[i]; i++) {
+ if (!monitor_create_path_for_hierarchy(ops->hierarchies[i], monitor_cgroup)) {
+ ERROR("Failed to create cgroup \"%s\"", ops->hierarchies[i]->monitor_full_path);
+ free(ops->hierarchies[i]->container_full_path);
+ ops->hierarchies[i]->container_full_path = NULL;
- if (ret == CGROUP2_SUPER_MAGIC)
- return true;
+ for (int j = 0; j < i; j++)
+ remove_path_for_hierarchy(ops->hierarchies[j], monitor_cgroup, true);
+
+ idx++;
+ break;
+ }
+ }
+ } while (idx > 0 && idx < 1000);
+
+ if (idx < 1000)
+ bret = true;
+
+on_error:
+ free(monitor_cgroup);
- return cg_hybrid_init();
+ return bret;
}
-static void *cgfsng_init(struct lxc_handler *handler)
+/* Try to create the same cgroup in all hierarchies. Start with cgroup_pattern;
+ * next cgroup_pattern-1, -2, ..., -999.
+ */
+__cgfsng_ops static inline bool cgfsng_payload_create(struct cgroup_ops *ops,
+ struct lxc_handler *handler)
{
- const char *cgroup_pattern;
- struct cgfsng_handler_data *d;
+ int i;
+ size_t len;
+ char *container_cgroup, *offset, *tmp;
+ int idx = 0;
+ struct lxc_conf *conf = handler->conf;
- d = must_alloc(sizeof(*d));
- memset(d, 0, sizeof(*d));
+ if (ops->container_cgroup) {
+ WARN("cgfsng_create called a second time: %s", ops->container_cgroup);
+ return false;
+ }
- /* copy container name */
- d->name = must_copy_string(handler->name);
+ if (!conf)
+ return false;
- /* copy per-container cgroup information */
- d->cgroup_meta.dir = NULL;
- d->cgroup_meta.controllers = NULL;
- if (handler->conf) {
- d->cgroup_meta.dir = must_copy_string(handler->conf->cgroup_meta.dir);
- d->cgroup_meta.controllers = must_copy_string(handler->conf->cgroup_meta.controllers);
+ if (conf->cgroup_meta.dir)
+ tmp = lxc_string_join("/", (const char *[]){conf->cgroup_meta.dir, handler->name, NULL}, false);
+ else
+ tmp = lxc_string_replace("%n", handler->name, ops->cgroup_pattern);
+ if (!tmp) {
+ ERROR("Failed expanding cgroup name pattern");
+ return false;
}
- /* copy system-wide cgroup information */
- cgroup_pattern = lxc_global_config_value("lxc.cgroup.pattern");
- if (!cgroup_pattern) {
- /* lxc.cgroup.pattern is only NULL on error. */
- ERROR("Failed to retrieve cgroup pattern");
+ len = strlen(tmp) + 5; /* leave room for -NNN\0 */
+ container_cgroup = must_alloc(len);
+ (void)strlcpy(container_cgroup, tmp, len);
+ free(tmp);
+ offset = container_cgroup + len - 5;
+
+again:
+ if (idx == 1000) {
+ ERROR("Too many conflicting cgroup names");
goto out_free;
}
- d->cgroup_pattern = must_copy_string(cgroup_pattern);
- d->cgroup_layout = cgroup_layout;
- if (d->cgroup_layout == CGROUP_LAYOUT_LEGACY)
- TRACE("Running with legacy cgroup layout");
- else if (d->cgroup_layout == CGROUP_LAYOUT_HYBRID)
- TRACE("Running with hybrid cgroup layout");
- else if (d->cgroup_layout == CGROUP_LAYOUT_UNIFIED)
- TRACE("Running with unified cgroup layout");
- else
- WARN("Running with unknown cgroup layout");
+ if (idx) {
+ int ret;
+
+ ret = snprintf(offset, 5, "-%d", idx);
+ if (ret < 0 || (size_t)ret >= 5) {
+ FILE *f = fopen("/dev/null", "w");
+ if (f) {
+ fprintf(f, "Workaround for GCC7 bug: "
+ "https://gcc.gnu.org/bugzilla/"
+ "show_bug.cgi?id=78969");
+ fclose(f);
+ }
+ }
+ }
+
+ for (i = 0; ops->hierarchies[i]; i++) {
+ if (!container_create_path_for_hierarchy(ops->hierarchies[i], container_cgroup)) {
+ ERROR("Failed to create cgroup \"%s\"", ops->hierarchies[i]->container_full_path);
+ free(ops->hierarchies[i]->container_full_path);
+ ops->hierarchies[i]->container_full_path = NULL;
+ for (int j = 0; j < i; j++)
+ remove_path_for_hierarchy(ops->hierarchies[j], container_cgroup, false);
+ idx++;
+ goto again;
+ }
+ }
- if (lxc_cgfsng_debug)
- lxc_cgfsng_print_debuginfo(d);
+ ops->container_cgroup = container_cgroup;
- return d;
+ return true;
out_free:
- free_handler_data(d);
- return NULL;
+ free(container_cgroup);
+
+ return false;
}
-static int recursive_destroy(char *dirname)
+__cgfsng_ops static bool __do_cgroup_enter(struct cgroup_ops *ops, pid_t pid,
+ bool monitor)
{
- int ret;
- struct dirent *direntp;
- DIR *dir;
- int r = 0;
-
- dir = opendir(dirname);
- if (!dir)
- return -1;
-
- while ((direntp = readdir(dir))) {
- char *pathname;
- struct stat mystat;
+ int len;
+ char pidstr[INTTYPE_TO_STRLEN(pid_t)];
- if (!strcmp(direntp->d_name, ".") ||
- !strcmp(direntp->d_name, ".."))
- continue;
+ len = snprintf(pidstr, sizeof(pidstr), "%d", pid);
+ if (len < 0 || (size_t)len >= sizeof(pidstr))
+ return false;
- pathname = must_make_path(dirname, direntp->d_name, NULL);
+ for (int i = 0; ops->hierarchies[i]; i++) {
+ int ret;
+ char *path;
- ret = lstat(pathname, &mystat);
- if (ret < 0) {
- if (!r)
- WARN("Failed to stat \"%s\"", pathname);
- r = -1;
- goto next;
+ if (monitor)
+ path = must_make_path(ops->hierarchies[i]->monitor_full_path,
+ "cgroup.procs", NULL);
+ else
+ path = must_make_path(ops->hierarchies[i]->container_full_path,
+ "cgroup.procs", NULL);
+ ret = lxc_write_to_file(path, pidstr, len, false, 0666);
+ if (ret != 0) {
+ SYSERROR("Failed to enter cgroup \"%s\"", path);
+ free(path);
+ return false;
}
-
- if (!S_ISDIR(mystat.st_mode))
- goto next;
-
- ret = recursive_destroy(pathname);
- if (ret < 0)
- r = -1;
- next:
- free(pathname);
- }
-
- ret = rmdir(dirname);
- if (ret < 0) {
- if (!r)
- WARN("%s - Failed to delete \"%s\"", strerror(errno), dirname);
- r = -1;
- }
-
- ret = closedir(dir);
- if (ret < 0) {
- if (!r)
- WARN("%s - Failed to delete \"%s\"", strerror(errno), dirname);
- r = -1;
+ free(path);
}
- return r;
+ return true;
}
-static int cgroup_rmdir(char *container_cgroup)
+__cgfsng_ops static bool cgfsng_monitor_enter(struct cgroup_ops *ops, pid_t pid)
{
- int i;
-
- if (!container_cgroup || !hierarchies)
- return 0;
+ return __do_cgroup_enter(ops, pid, true);
+}
- for (i = 0; hierarchies[i]; i++) {
- int ret;
- struct hierarchy *h = hierarchies[i];
+static bool cgfsng_payload_enter(struct cgroup_ops *ops, pid_t pid)
+{
+ return __do_cgroup_enter(ops, pid, false);
+}
- if (!h->fullcgpath)
- continue;
+static int chowmod(char *path, uid_t chown_uid, gid_t chown_gid,
+ mode_t chmod_mode)
+{
+ int ret;
- ret = recursive_destroy(h->fullcgpath);
- if (ret < 0)
- WARN("Failed to destroy \"%s\"", h->fullcgpath);
+ ret = chown(path, chown_uid, chown_gid);
+ if (ret < 0) {
+ SYSWARN("Failed to chown(%s, %d, %d)", path, (int)chown_uid, (int)chown_gid);
+ return -1;
+ }
- free(h->fullcgpath);
- h->fullcgpath = NULL;
+ ret = chmod(path, chmod_mode);
+ if (ret < 0) {
+ SYSWARN("Failed to chmod(%s, %d)", path, (int)chmod_mode);
+ return -1;
}
return 0;
}
-struct generic_userns_exec_data {
- struct cgfsng_handler_data *d;
- struct lxc_conf *conf;
- uid_t origuid; /* target uid in parent namespace */
- char *path;
-};
-
-static int cgroup_rmdir_wrapper(void *data)
+/* chgrp the container cgroups to container group. We leave
+ * the container owner as cgroup owner. So we must make the
+ * directories 775 so that the container can create sub-cgroups.
+ *
+ * Also chown the tasks and cgroup.procs files. Those may not
+ * exist depending on kernel version.
+ */
+static int chown_cgroup_wrapper(void *data)
{
- int ret;
+ int i, ret;
+ uid_t destuid;
struct generic_userns_exec_data *arg = data;
uid_t nsuid = (arg->conf->root_nsuid_map != NULL) ? 0 : arg->conf->init_uid;
gid_t nsgid = (arg->conf->root_nsgid_map != NULL) ? 0 : arg->conf->init_gid;
ret = setresgid(nsgid, nsgid, nsgid);
if (ret < 0) {
- SYSERROR("Failed to setresgid(%d, %d, %d)", (int)nsgid,
- (int)nsgid, (int)nsgid);
+ SYSERROR("Failed to setresgid(%d, %d, %d)",
+ (int)nsgid, (int)nsgid, (int)nsgid);
return -1;
}
ret = setresuid(nsuid, nsuid, nsuid);
if (ret < 0) {
- SYSERROR("Failed to setresuid(%d, %d, %d)", (int)nsuid,
- (int)nsuid, (int)nsuid);
+ SYSERROR("Failed to setresuid(%d, %d, %d)",
+ (int)nsuid, (int)nsuid, (int)nsuid);
return -1;
}
return -1;
}
- return cgroup_rmdir(arg->d->container_cgroup);
-}
-
-static void cgfsng_destroy(void *hdata, struct lxc_conf *conf)
-{
- int ret;
- struct cgfsng_handler_data *d = hdata;
- struct generic_userns_exec_data wrap;
+ destuid = get_ns_uid(arg->origuid);
+ if (destuid == LXC_INVALID_UID)
+ destuid = 0;
- if (!d)
- return;
+ for (i = 0; arg->hierarchies[i]; i++) {
+ char *fullpath;
+ char *path = arg->hierarchies[i]->container_full_path;
- wrap.origuid = 0;
- wrap.d = hdata;
- wrap.conf = conf;
+ ret = chowmod(path, destuid, nsgid, 0775);
+ if (ret < 0)
+ return -1;
- if (conf && !lxc_list_empty(&conf->id_map))
- ret = userns_exec_1(conf, cgroup_rmdir_wrapper, &wrap,
- "cgroup_rmdir_wrapper");
- else
- ret = cgroup_rmdir(d->container_cgroup);
- if (ret < 0) {
- WARN("Failed to destroy cgroups");
- return;
- }
+ /* Failures to chown() these are inconvenient but not
+ * detrimental We leave these owned by the container launcher,
+ * so that container root can write to the files to attach. We
+ * chmod() them 664 so that container systemd can write to the
+ * files (which systemd in wily insists on doing).
+ */
- free_handler_data(d);
-}
-
-struct cgroup_ops *cgfsng_ops_init(void)
-{
- if (getenv("LXC_DEBUG_CGFSNG"))
- lxc_cgfsng_debug = true;
-
- if (!cg_init())
- return NULL;
-
- return &cgfsng_ops;
-}
-
-static bool cg_unified_create_cgroup(struct hierarchy *h, char *cgname)
-{
- size_t i, parts_len;
- char **it;
- size_t full_len = 0;
- char *add_controllers = NULL, *cgroup = NULL;
- char **parts = NULL;
- bool bret = false;
-
- if (h->version != CGROUP2_SUPER_MAGIC)
- return true;
-
- if (!h->controllers)
- return true;
+ if (arg->hierarchies[i]->version == CGROUP_SUPER_MAGIC) {
+ fullpath = must_make_path(path, "tasks", NULL);
+ (void)chowmod(fullpath, destuid, nsgid, 0664);
+ free(fullpath);
+ }
- /* For now we simply enable all controllers that we have detected by
- * creating a string like "+memory +pids +cpu +io".
- * TODO: In the near future we might want to support "-<controller>"
- * etc. but whether supporting semantics like this make sense will need
- * some thinking.
- */
- for (it = h->controllers; it && *it; it++) {
- full_len += strlen(*it) + 2;
- add_controllers = must_realloc(add_controllers, full_len + 1);
- if (h->controllers[0] == *it)
- add_controllers[0] = '\0';
- strcat(add_controllers, "+");
- strcat(add_controllers, *it);
- if ((it + 1) && *(it + 1))
- strcat(add_controllers, " ");
- }
+ fullpath = must_make_path(path, "cgroup.procs", NULL);
+ (void)chowmod(fullpath, destuid, nsgid, 0664);
+ free(fullpath);
- parts = lxc_string_split(cgname, '/');
- if (!parts)
- goto on_error;
- parts_len = lxc_array_len((void **)parts);
- if (parts_len > 0)
- parts_len--;
+ if (arg->hierarchies[i]->version != CGROUP2_SUPER_MAGIC)
+ continue;
- cgroup = must_make_path(h->mountpoint, h->base_cgroup, NULL);
- for (i = 0; i < parts_len; i++) {
- int ret;
- char *target;
+ fullpath = must_make_path(path, "cgroup.subtree_control", NULL);
+ (void)chowmod(fullpath, destuid, nsgid, 0664);
+ free(fullpath);
- cgroup = must_append_path(cgroup, parts[i], NULL);
- target = must_make_path(cgroup, "cgroup.subtree_control", NULL);
- ret = lxc_write_to_file(target, add_controllers, full_len, false);
- free(target);
- if (ret < 0) {
- SYSERROR("Could not enable \"%s\" controllers in the "
- "unified cgroup \"%s\"", add_controllers, cgroup);
- goto on_error;
- }
+ fullpath = must_make_path(path, "cgroup.threads", NULL);
+ (void)chowmod(fullpath, destuid, nsgid, 0664);
+ free(fullpath);
}
- bret = true;
-
-on_error:
- lxc_free_array((void **)parts, free);
- free(add_controllers);
- free(cgroup);
- return bret;
+ return 0;
}
-static bool create_path_for_hierarchy(struct hierarchy *h, char *cgname)
+__cgfsng_ops static bool cgfsng_chown(struct cgroup_ops *ops,
+ struct lxc_conf *conf)
{
- int ret;
+ struct generic_userns_exec_data wrap;
- h->fullcgpath = must_make_path(h->mountpoint, h->base_cgroup, cgname, NULL);
- if (dir_exists(h->fullcgpath)) {
- ERROR("The cgroup \"%s\" already existed", h->fullcgpath);
- return false;
- }
+ if (lxc_list_empty(&conf->id_map))
+ return true;
- if (!cg_legacy_handle_cpuset_hierarchy(h, cgname)) {
- ERROR("Failed to handle legacy cpuset controller");
- return false;
- }
+ wrap.origuid = geteuid();
+ wrap.path = NULL;
+ wrap.hierarchies = ops->hierarchies;
+ wrap.conf = conf;
- ret = mkdir_p(h->fullcgpath, 0755);
- if (ret < 0) {
- ERROR("Failed to create cgroup \"%s\"", h->fullcgpath);
+ if (userns_exec_1(conf, chown_cgroup_wrapper, &wrap,
+ "chown_cgroup_wrapper") < 0) {
+ ERROR("Error requesting cgroup chown in new user namespace");
return false;
}
- return cg_unified_create_cgroup(h, cgname);
+ return true;
}
-static void remove_path_for_hierarchy(struct hierarchy *h, char *cgname)
+/* cgroup-full:* is done, no need to create subdirs */
+static bool cg_mount_needs_subdirs(int type)
{
- int ret;
-
- ret = rmdir(h->fullcgpath);
- if (ret < 0)
- SYSERROR("Failed to rmdir(\"%s\") from failed creation attempt", h->fullcgpath);
+ if (type >= LXC_AUTO_CGROUP_FULL_RO)
+ return false;
- free(h->fullcgpath);
- h->fullcgpath = NULL;
+ return true;
}
-/* Try to create the same cgroup in all hierarchies. Start with cgroup_pattern;
- * next cgroup_pattern-1, -2, ..., -999.
+/* After $rootfs/sys/fs/container/controller/the/cg/path has been created,
+ * remount controller ro if needed and bindmount the cgroupfs onto
+ * controll/the/cg/path.
*/
-static inline bool cgfsng_create(void *hdata)
+static int cg_legacy_mount_controllers(int type, struct hierarchy *h,
+ char *controllerpath, char *cgpath,
+ const char *container_cgroup)
{
- int i;
- size_t len;
- char *container_cgroup, *offset, *tmp;
- int idx = 0;
- struct cgfsng_handler_data *d = hdata;
-
- if (!d)
- return false;
+ int ret, remount_flags;
+ char *sourcepath;
+ int flags = MS_BIND;
- if (d->container_cgroup) {
- WARN("cgfsng_create called a second time");
- return false;
- }
+ if (type == LXC_AUTO_CGROUP_RO || type == LXC_AUTO_CGROUP_MIXED) {
+ ret = mount(controllerpath, controllerpath, "cgroup", MS_BIND, NULL);
+ if (ret < 0) {
+ SYSERROR("Failed to bind mount \"%s\" onto \"%s\"",
+ controllerpath, controllerpath);
+ return -1;
+ }
- if (d->cgroup_meta.dir)
- tmp = lxc_string_join("/", (const char *[]){d->cgroup_meta.dir, d->name, NULL}, false);
- else
- tmp = lxc_string_replace("%n", d->name, d->cgroup_pattern);
- if (!tmp) {
- ERROR("Failed expanding cgroup name pattern");
- return false;
- }
- len = strlen(tmp) + 5; /* leave room for -NNN\0 */
- container_cgroup = must_alloc(len);
- strcpy(container_cgroup, tmp);
- free(tmp);
- offset = container_cgroup + len - 5;
+ remount_flags = add_required_remount_flags(controllerpath,
+ controllerpath,
+ flags | MS_REMOUNT);
+ ret = mount(controllerpath, controllerpath, "cgroup",
+ remount_flags | MS_REMOUNT | MS_BIND | MS_RDONLY,
+ NULL);
+ if (ret < 0) {
+ SYSERROR("Failed to remount \"%s\" ro", controllerpath);
+ return -1;
+ }
-again:
- if (idx == 1000) {
- ERROR("Too many conflicting cgroup names");
- goto out_free;
+ INFO("Remounted %s read-only", controllerpath);
}
- if (idx) {
- int ret;
+ sourcepath = must_make_path(h->mountpoint, h->container_base_path,
+ container_cgroup, NULL);
+ if (type == LXC_AUTO_CGROUP_RO)
+ flags |= MS_RDONLY;
- ret = snprintf(offset, 5, "-%d", idx);
- if (ret < 0 || (size_t)ret >= 5) {
- FILE *f = fopen("/dev/null", "w");
- if (f) {
- fprintf(f, "Workaround for GCC7 bug: "
- "https://gcc.gnu.org/bugzilla/"
- "show_bug.cgi?id=78969");
- fclose(f);
- }
- }
+ ret = mount(sourcepath, cgpath, "cgroup", flags, NULL);
+ if (ret < 0) {
+ SYSERROR("Failed to mount \"%s\" onto \"%s\"", h->controllers[0], cgpath);
+ free(sourcepath);
+ return -1;
}
+ INFO("Mounted \"%s\" onto \"%s\"", h->controllers[0], cgpath);
- for (i = 0; hierarchies[i]; i++) {
- if (!create_path_for_hierarchy(hierarchies[i], container_cgroup)) {
- int j;
- ERROR("Failed to create cgroup \"%s\"", hierarchies[i]->fullcgpath);
- free(hierarchies[i]->fullcgpath);
- hierarchies[i]->fullcgpath = NULL;
- for (j = 0; j < i; j++)
- remove_path_for_hierarchy(hierarchies[j], container_cgroup);
- idx++;
- goto again;
+ if (flags & MS_RDONLY) {
+ remount_flags = add_required_remount_flags(sourcepath, cgpath,
+ flags | MS_REMOUNT);
+ ret = mount(sourcepath, cgpath, "cgroup", remount_flags, NULL);
+ if (ret < 0) {
+ SYSERROR("Failed to remount \"%s\" ro", cgpath);
+ free(sourcepath);
+ return -1;
}
+ INFO("Remounted %s read-only", cgpath);
}
- d->container_cgroup = container_cgroup;
-
- return true;
-
-out_free:
- free(container_cgroup);
-
- return false;
+ free(sourcepath);
+ INFO("Completed second stage cgroup automounts for \"%s\"", cgpath);
+ return 0;
}
-static bool cgfsng_enter(void *hdata, pid_t pid)
+/* __cg_mount_direct
+ *
+ * Mount cgroup hierarchies directly without using bind-mounts. The main
+ * uses-cases are mounting cgroup hierarchies in cgroup namespaces and mounting
+ * cgroups for the LXC_AUTO_CGROUP_FULL option.
+ */
+static int __cg_mount_direct(int type, struct hierarchy *h,
+ const char *controllerpath)
{
- int i, len;
- char pidstr[25];
-
- len = snprintf(pidstr, 25, "%d", pid);
- if (len < 0 || len >= 25)
- return false;
-
- for (i = 0; hierarchies[i]; i++) {
- int ret;
- char *fullpath;
-
- fullpath = must_make_path(hierarchies[i]->fullcgpath,
- "cgroup.procs", NULL);
- ret = lxc_write_to_file(fullpath, pidstr, len, false);
- if (ret != 0) {
- SYSERROR("Failed to enter cgroup \"%s\"", fullpath);
- free(fullpath);
- return false;
- }
- free(fullpath);
- }
+ int ret;
+ char *controllers = NULL;
+ char *fstype = "cgroup2";
+ unsigned long flags = 0;
- return true;
-}
+ flags |= MS_NOSUID;
+ flags |= MS_NOEXEC;
+ flags |= MS_NODEV;
+ flags |= MS_RELATIME;
-static int chowmod(char *path, uid_t chown_uid, gid_t chown_gid,
- mode_t chmod_mode)
-{
- int ret;
+ if (type == LXC_AUTO_CGROUP_RO || type == LXC_AUTO_CGROUP_FULL_RO)
+ flags |= MS_RDONLY;
- ret = chown(path, chown_uid, chown_gid);
- if (ret < 0) {
- WARN("%s - Failed to chown(%s, %d, %d)", strerror(errno), path,
- (int)chown_uid, (int)chown_gid);
- return -1;
+ if (h->version != CGROUP2_SUPER_MAGIC) {
+ controllers = lxc_string_join(",", (const char **)h->controllers, false);
+ if (!controllers)
+ return -ENOMEM;
+ fstype = "cgroup";
}
- ret = chmod(path, chmod_mode);
+ ret = mount("cgroup", controllerpath, fstype, flags, controllers);
+ free(controllers);
if (ret < 0) {
- WARN("%s - Failed to chmod(%s, %d)", strerror(errno), path,
- (int)chmod_mode);
+ SYSERROR("Failed to mount \"%s\" with cgroup filesystem type %s", controllerpath, fstype);
return -1;
}
+ DEBUG("Mounted \"%s\" with cgroup filesystem type %s", controllerpath, fstype);
return 0;
}
-/* chgrp the container cgroups to container group. We leave
- * the container owner as cgroup owner. So we must make the
- * directories 775 so that the container can create sub-cgroups.
- *
- * Also chown the tasks and cgroup.procs files. Those may not
- * exist depending on kernel version.
- */
-static int chown_cgroup_wrapper(void *data)
+static inline int cg_mount_in_cgroup_namespace(int type, struct hierarchy *h,
+ const char *controllerpath)
{
- int i, ret;
- uid_t destuid;
- struct generic_userns_exec_data *arg = data;
- uid_t nsuid = (arg->conf->root_nsuid_map != NULL) ? 0 : arg->conf->init_uid;
- gid_t nsgid = (arg->conf->root_nsgid_map != NULL) ? 0 : arg->conf->init_gid;
+ return __cg_mount_direct(type, h, controllerpath);
+}
- ret = setresgid(nsgid, nsgid, nsgid);
- if (ret < 0) {
- SYSERROR("Failed to setresgid(%d, %d, %d)",
- (int)nsgid, (int)nsgid, (int)nsgid);
- return -1;
- }
+static inline int cg_mount_cgroup_full(int type, struct hierarchy *h,
+ const char *controllerpath)
+{
+ if (type < LXC_AUTO_CGROUP_FULL_RO || type > LXC_AUTO_CGROUP_FULL_MIXED)
+ return 0;
- ret = setresuid(nsuid, nsuid, nsuid);
- if (ret < 0) {
- SYSERROR("Failed to setresuid(%d, %d, %d)",
- (int)nsuid, (int)nsuid, (int)nsuid);
- return -1;
- }
-
- ret = setgroups(0, NULL);
- if (ret < 0 && errno != EPERM) {
- SYSERROR("Failed to setgroups(0, NULL)");
- return -1;
- }
-
- destuid = get_ns_uid(arg->origuid);
-
- for (i = 0; hierarchies[i]; i++) {
- char *fullpath;
- char *path = hierarchies[i]->fullcgpath;
-
- ret = chowmod(path, destuid, nsgid, 0775);
- if (ret < 0)
- return -1;
-
- /* Failures to chown() these are inconvenient but not
- * detrimental We leave these owned by the container launcher,
- * so that container root can write to the files to attach. We
- * chmod() them 664 so that container systemd can write to the
- * files (which systemd in wily insists on doing).
- */
-
- if (hierarchies[i]->version == CGROUP_SUPER_MAGIC) {
- fullpath = must_make_path(path, "tasks", NULL);
- (void)chowmod(fullpath, destuid, nsgid, 0664);
- free(fullpath);
- }
-
- fullpath = must_make_path(path, "cgroup.procs", NULL);
- (void)chowmod(fullpath, destuid, 0, 0664);
- free(fullpath);
-
- if (hierarchies[i]->version != CGROUP2_SUPER_MAGIC)
- continue;
-
- fullpath = must_make_path(path, "cgroup.subtree_control", NULL);
- (void)chowmod(fullpath, destuid, nsgid, 0664);
- free(fullpath);
-
- fullpath = must_make_path(path, "cgroup.threads", NULL);
- (void)chowmod(fullpath, destuid, nsgid, 0664);
- free(fullpath);
- }
-
- return 0;
-}
-
-static bool cgfsng_chown(void *hdata, struct lxc_conf *conf)
-{
- struct cgfsng_handler_data *d = hdata;
- struct generic_userns_exec_data wrap;
-
- if (!d)
- return false;
-
- if (lxc_list_empty(&conf->id_map))
- return true;
-
- wrap.origuid = geteuid();
- wrap.path = NULL;
- wrap.d = d;
- wrap.conf = conf;
-
- if (userns_exec_1(conf, chown_cgroup_wrapper, &wrap,
- "chown_cgroup_wrapper") < 0) {
- ERROR("Error requesting cgroup chown in new user namespace");
- return false;
- }
-
- return true;
-}
-
-/* We've safe-mounted a tmpfs as parent, so we don't need to protect against
- * symlinks any more - just use mount.
- *
- * mount cgroup-full if requested
- */
-static int mount_cgroup_full(int type, struct hierarchy *h, char *dest,
- char *container_cgroup)
-{
- int ret;
- char *rwpath, *source;
-
- if (type < LXC_AUTO_CGROUP_FULL_RO || type > LXC_AUTO_CGROUP_FULL_MIXED)
- return 0;
-
- ret = mount(h->mountpoint, dest, "cgroup", MS_BIND, NULL);
- if (ret < 0) {
- SYSERROR("Failed to bind mount cgroup \"%s\" onto \"%s\"",
- h->mountpoint, dest);
- return -1;
- }
-
- if (type != LXC_AUTO_CGROUP_FULL_RW) {
- unsigned long flags = MS_BIND | MS_NOSUID | MS_NOEXEC | MS_NODEV |
- MS_REMOUNT | MS_RDONLY;
-
- ret = mount(NULL, dest, "cgroup", flags, NULL);
- if (ret < 0) {
- SYSERROR("Failed to remount cgroup \"%s\" read-only", dest);
- return -1;
- }
- }
-
- INFO("Bind mounted \"%s\" onto \"%s\"", h->mountpoint, dest);
- if (type != LXC_AUTO_CGROUP_FULL_MIXED)
- return 0;
-
- /* mount just the container path rw */
- source = must_make_path(h->mountpoint, h->base_cgroup, container_cgroup, NULL);
- rwpath = must_make_path(dest, h->base_cgroup, container_cgroup, NULL);
- ret = mount(source, rwpath, "cgroup", MS_BIND, NULL);
- if (ret < 0)
- WARN("%s - Failed to mount cgroup \"%s\" read-write",
- strerror(errno), rwpath);
-
- TRACE("Mounted cgroup \"%s\" read-write", rwpath);
- free(rwpath);
- free(source);
- return 0;
-}
-
-/* cgroup-full:* is done, no need to create subdirs */
-static bool cg_mount_needs_subdirs(int type)
-{
- if (type >= LXC_AUTO_CGROUP_FULL_RO)
- return false;
-
- return true;
-}
-
-/* After $rootfs/sys/fs/container/controller/the/cg/path has been created,
- * remount controller ro if needed and bindmount the cgroupfs onto
- * controll/the/cg/path.
- */
-static int do_secondstage_mounts_if_needed(int type, struct hierarchy *h,
- char *controllerpath, char *cgpath,
- const char *container_cgroup)
-{
- int ret, remount_flags;
- char *sourcepath;
- int flags = MS_BIND;
-
- if (type == LXC_AUTO_CGROUP_RO || type == LXC_AUTO_CGROUP_MIXED) {
- ret = mount(controllerpath, controllerpath, "cgroup", MS_BIND, NULL);
- if (ret < 0) {
- SYSERROR("Failed to bind mount \"%s\" onto \"%s\"",
- controllerpath, controllerpath);
- return -1;
- }
-
- remount_flags = add_required_remount_flags(controllerpath,
- controllerpath,
- flags | MS_REMOUNT);
- ret = mount(controllerpath, controllerpath, "cgroup",
- MS_REMOUNT | MS_BIND | MS_RDONLY, NULL);
- if (ret < 0) {
- SYSERROR("Failed to remount \"%s\" ro", controllerpath);
- return -1;
- }
-
- INFO("Remounted %s read-only", controllerpath);
- }
-
- sourcepath = must_make_path(h->mountpoint, h->base_cgroup,
- container_cgroup, NULL);
- if (type == LXC_AUTO_CGROUP_RO)
- flags |= MS_RDONLY;
-
- ret = mount(sourcepath, cgpath, "cgroup", flags, NULL);
- if (ret < 0) {
- SYSERROR("Failed to mount \"%s\" onto \"%s\"", h->controllers[0], cgpath);
- free(sourcepath);
- return -1;
- }
- INFO("Mounted \"%s\" onto \"%s\"", h->controllers[0], cgpath);
-
- if (flags & MS_RDONLY) {
- remount_flags = add_required_remount_flags(sourcepath, cgpath,
- flags | MS_REMOUNT);
- ret = mount(sourcepath, cgpath, "cgroup", remount_flags, NULL);
- if (ret < 0) {
- SYSERROR("Failed to remount \"%s\" ro", cgpath);
- free(sourcepath);
- return -1;
- }
- INFO("Remounted %s read-only", cgpath);
- }
-
- free(sourcepath);
- INFO("Completed second stage cgroup automounts for \"%s\"", cgpath);
- return 0;
-}
-
-static int cg_mount_in_cgroup_namespace(int type, struct hierarchy *h,
- const char *controllerpath)
-{
- int ret;
- char *controllers = NULL;
- char *fstype = "cgroup2";
- unsigned long flags = 0;
-
- flags |= MS_NOSUID;
- flags |= MS_NOEXEC;
- flags |= MS_NODEV;
- flags |= MS_RELATIME;
-
- if (type == LXC_AUTO_CGROUP_RO || type == LXC_AUTO_CGROUP_FULL_RO)
- flags |= MS_RDONLY;
-
- if (h->version != CGROUP2_SUPER_MAGIC) {
- controllers = lxc_string_join(",", (const char **)h->controllers, false);
- if (!controllers)
- return -ENOMEM;
- fstype = "cgroup";
- }
-
- ret = mount("cgroup", controllerpath, fstype, flags, controllers);
- free(controllers);
- if (ret < 0) {
- SYSERROR("Failed to mount %s with cgroup filesystem type %s", controllerpath, fstype);
- return -1;
- }
-
- DEBUG("Mounted %s with cgroup filesystem type %s", controllerpath, fstype);
- return 0;
+ return __cg_mount_direct(type, h, controllerpath);
}
-static bool cgfsng_mount(void *hdata, const char *root, int type)
+__cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops,
+ struct lxc_handler *handler,
+ const char *root, int type)
{
int i, ret;
char *tmpfspath = NULL;
bool has_cgns = false, retval = false, wants_force_mount = false;
- struct lxc_handler *handler = hdata;
- struct cgfsng_handler_data *d = handler->cgroup_data;
if ((type & LXC_AUTO_CGROUP_MASK) == 0)
return true;
/* Mount tmpfs */
tmpfspath = must_make_path(root, "/sys/fs/cgroup", NULL);
- ret = safe_mount("cgroup_root", tmpfspath, "tmpfs",
+ ret = safe_mount(NULL, tmpfspath, "tmpfs",
MS_NOSUID | MS_NODEV | MS_NOEXEC | MS_RELATIME,
"size=10240k,mode=755", root);
if (ret < 0)
goto on_error;
- for (i = 0; hierarchies[i]; i++) {
+ for (i = 0; ops->hierarchies[i]; i++) {
char *controllerpath, *path2;
- struct hierarchy *h = hierarchies[i];
+ struct hierarchy *h = ops->hierarchies[i];
char *controller = strrchr(h->mountpoint, '/');
if (!controller)
continue;
}
- ret = mount_cgroup_full(type, h, controllerpath, d->container_cgroup);
+ ret = cg_mount_cgroup_full(type, h, controllerpath);
if (ret < 0) {
free(controllerpath);
goto on_error;
continue;
}
- path2 = must_make_path(controllerpath, h->base_cgroup,
- d->container_cgroup, NULL);
+ path2 = must_make_path(controllerpath, h->container_base_path,
+ ops->container_cgroup, NULL);
ret = mkdir_p(path2, 0755);
if (ret < 0) {
free(controllerpath);
goto on_error;
}
- ret = do_secondstage_mounts_if_needed(type, h, controllerpath,
- path2, d->container_cgroup);
+ ret = cg_legacy_mount_controllers(type, h, controllerpath,
+ path2, ops->container_cgroup);
free(controllerpath);
free(path2);
if (ret < 0)
while ((direntp = readdir(dir))) {
struct stat mystat;
- if (!direntp)
- break;
-
if (!strcmp(direntp->d_name, ".") ||
!strcmp(direntp->d_name, ".."))
continue;
return count;
}
-static int cgfsng_nrtasks(void *hdata)
+__cgfsng_ops static int cgfsng_nrtasks(struct cgroup_ops *ops)
{
int count;
char *path;
- struct cgfsng_handler_data *d = hdata;
- if (!d || !d->container_cgroup || !hierarchies)
+ if (!ops->container_cgroup || !ops->hierarchies)
return -1;
- path = must_make_path(hierarchies[0]->fullcgpath, NULL);
+ path = must_make_path(ops->hierarchies[0]->container_full_path, NULL);
count = recursive_count_nrtasks(path);
free(path);
return count;
}
/* Only root needs to escape to the cgroup of its init. */
-static bool cgfsng_escape()
+__cgfsng_ops static bool cgfsng_escape(const struct cgroup_ops *ops,
+ struct lxc_conf *conf)
{
int i;
- if (geteuid())
+ if (conf->cgroup_meta.relative || geteuid())
return true;
- for (i = 0; hierarchies[i]; i++) {
+ for (i = 0; ops->hierarchies[i]; i++) {
int ret;
char *fullpath;
- fullpath = must_make_path(hierarchies[i]->mountpoint,
- hierarchies[i]->base_cgroup,
+ fullpath = must_make_path(ops->hierarchies[i]->mountpoint,
+ ops->hierarchies[i]->container_base_path,
"cgroup.procs", NULL);
- ret = lxc_write_to_file(fullpath, "0", 2, false);
+ ret = lxc_write_to_file(fullpath, "0", 2, false, 0666);
if (ret != 0) {
SYSERROR("Failed to escape to cgroup \"%s\"", fullpath);
free(fullpath);
return true;
}
-static int cgfsng_num_hierarchies(void)
+__cgfsng_ops static int cgfsng_num_hierarchies(struct cgroup_ops *ops)
{
int i;
- for (i = 0; hierarchies[i]; i++)
+ for (i = 0; ops->hierarchies[i]; i++)
;
return i;
}
-static bool cgfsng_get_hierarchies(int n, char ***out)
+__cgfsng_ops static bool cgfsng_get_hierarchies(struct cgroup_ops *ops, int n, char ***out)
{
int i;
/* sanity check n */
for (i = 0; i < n; i++)
- if (!hierarchies[i])
+ if (!ops->hierarchies[i])
return false;
- *out = hierarchies[i]->controllers;
+ *out = ops->hierarchies[i]->controllers;
return true;
}
/* TODO: If the unified cgroup hierarchy grows a freezer controller this needs
* to be adapted.
*/
-static bool cgfsng_unfreeze(void *hdata)
+__cgfsng_ops static bool cgfsng_unfreeze(struct cgroup_ops *ops)
{
int ret;
char *fullpath;
struct hierarchy *h;
- h = get_hierarchy("freezer");
+ h = get_hierarchy(ops, "freezer");
if (!h)
return false;
- fullpath = must_make_path(h->fullcgpath, "freezer.state", NULL);
- ret = lxc_write_to_file(fullpath, THAWED, THAWED_LEN, false);
+ fullpath = must_make_path(h->container_full_path, "freezer.state", NULL);
+ ret = lxc_write_to_file(fullpath, THAWED, THAWED_LEN, false, 0666);
free(fullpath);
if (ret < 0)
return false;
return true;
}
-static const char *cgfsng_get_cgroup(void *hdata, const char *subsystem)
+__cgfsng_ops static const char *cgfsng_get_cgroup(struct cgroup_ops *ops,
+ const char *controller)
{
struct hierarchy *h;
- h = get_hierarchy(subsystem);
- if (!h)
+ h = get_hierarchy(ops, controller);
+ if (!h) {
+ WARN("Failed to find hierarchy for controller \"%s\"",
+ controller ? controller : "(null)");
return NULL;
+ }
- return h->fullcgpath ? h->fullcgpath + strlen(h->mountpoint) : NULL;
+ return h->container_full_path ? h->container_full_path + strlen(h->mountpoint) : NULL;
}
/* Given a cgroup path returned from lxc_cmd_get_cgroup_path, build a full path,
base_path = must_make_path(h->mountpoint, container_cgroup, NULL);
full_path = must_make_path(base_path, "cgroup.procs", NULL);
/* cgroup is populated */
- ret = lxc_write_to_file(full_path, pidstr, pidstr_len, false);
+ ret = lxc_write_to_file(full_path, pidstr, pidstr_len, false, 0666);
if (ret < 0 && errno != EBUSY)
goto on_error;
free(full_path);
- len = strlen(base_path) + sizeof("/lxc-1000") - 1 +
- sizeof("/cgroup-procs") - 1;
+ len = strlen(base_path) + STRLITERALLEN("/lxc-1000") +
+ STRLITERALLEN("/cgroup-procs");
full_path = must_alloc(len + 1);
do {
if (idx)
if (ret < 0 && errno != EEXIST)
goto on_error;
- strcat(full_path, "/cgroup.procs");
- ret = lxc_write_to_file(full_path, pidstr, len, false);
+ (void)strlcat(full_path, "/cgroup.procs", len + 1);
+ ret = lxc_write_to_file(full_path, pidstr, len, false, 0666);
if (ret == 0)
goto on_success;
return fret;
}
-static bool cgfsng_attach(const char *name, const char *lxcpath, pid_t pid)
+__cgfsng_ops static bool cgfsng_attach(struct cgroup_ops *ops, const char *name,
+ const char *lxcpath, pid_t pid)
{
int i, len, ret;
- char pidstr[25];
+ char pidstr[INTTYPE_TO_STRLEN(pid_t)];
- len = snprintf(pidstr, 25, "%d", pid);
- if (len < 0 || len >= 25)
+ len = snprintf(pidstr, sizeof(pidstr), "%d", pid);
+ if (len < 0 || (size_t)len >= sizeof(pidstr))
return false;
- for (i = 0; hierarchies[i]; i++) {
+ for (i = 0; ops->hierarchies[i]; i++) {
char *path;
char *fullpath = NULL;
- struct hierarchy *h = hierarchies[i];
+ struct hierarchy *h = ops->hierarchies[i];
if (h->version == CGROUP2_SUPER_MAGIC) {
ret = __cg_unified_attach(h, name, lxcpath, pidstr, len,
continue;
fullpath = build_full_cgpath_from_monitorpath(h, path, "cgroup.procs");
- ret = lxc_write_to_file(fullpath, pidstr, len, false);
+ free(path);
+ ret = lxc_write_to_file(fullpath, pidstr, len, false, 0666);
if (ret < 0) {
SYSERROR("Failed to attach %d to %s", (int)pid, fullpath);
free(fullpath);
* don't have a cgroup_data set up, so we ask the running container through the
* commands API for the cgroup path.
*/
-static int cgfsng_get(const char *filename, char *value, size_t len,
- const char *name, const char *lxcpath)
+__cgfsng_ops static int cgfsng_get(struct cgroup_ops *ops, const char *filename,
+ char *value, size_t len, const char *name,
+ const char *lxcpath)
{
int ret = -1;
size_t controller_len;
controller_len = strlen(filename);
controller = alloca(controller_len + 1);
- strcpy(controller, filename);
+ (void)strlcpy(controller, filename, controller_len + 1);
+
p = strchr(controller, '.');
if (p)
*p = '\0';
if (!path)
return -1;
- h = get_hierarchy(controller);
+ h = get_hierarchy(ops, controller);
if (h) {
char *fullpath;
* don't have a cgroup_data set up, so we ask the running container through the
* commands API for the cgroup path.
*/
-static int cgfsng_set(const char *filename, const char *value, const char *name,
- const char *lxcpath)
+__cgfsng_ops static int cgfsng_set(struct cgroup_ops *ops,
+ const char *filename, const char *value,
+ const char *name, const char *lxcpath)
{
int ret = -1;
size_t controller_len;
controller_len = strlen(filename);
controller = alloca(controller_len + 1);
- strcpy(controller, filename);
+ (void)strlcpy(controller, filename, controller_len + 1);
+
p = strchr(controller, '.');
if (p)
*p = '\0';
if (!path)
return -1;
- h = get_hierarchy(controller);
+ h = get_hierarchy(ops, controller);
if (h) {
char *fullpath;
fullpath = build_full_cgpath_from_monitorpath(h, path, filename);
- ret = lxc_write_to_file(fullpath, value, strlen(value), false);
+ ret = lxc_write_to_file(fullpath, value, strlen(value), false, 0666);
free(fullpath);
}
free(path);
return ret;
}
-/*
- * Called from setup_limits - here we have the container's cgroup_data because
- * we created the cgroups
+/* Called from setup_limits - here we have the container's cgroup_data because
+ * we created the cgroups.
*/
-static int cg_legacy_set_data(const char *filename, const char *value,
- struct cgfsng_handler_data *d)
+static int cg_legacy_set_data(struct cgroup_ops *ops, const char *filename,
+ const char *value)
{
- char *fullpath, *p;
size_t len;
+ char *fullpath, *p;
/* "b|c <2^64-1>:<2^64-1> r|w|m" = 47 chars max */
char converted_value[50];
struct hierarchy *h;
len = strlen(filename);
controller = alloca(len + 1);
- strcpy(controller, filename);
+ (void)strlcpy(controller, filename, len + 1);
+
p = strchr(controller, '.');
if (p)
*p = '\0';
value = converted_value;
}
- h = get_hierarchy(controller);
+ h = get_hierarchy(ops, controller);
if (!h) {
ERROR("Failed to setup limits for the \"%s\" controller. "
"The controller seems to be unused by \"cgfsng\" cgroup "
return -ENOENT;
}
- fullpath = must_make_path(h->fullcgpath, filename, NULL);
- ret = lxc_write_to_file(fullpath, value, strlen(value), false);
+ fullpath = must_make_path(h->container_full_path, filename, NULL);
+ ret = lxc_write_to_file(fullpath, value, strlen(value), false, 0666);
free(fullpath);
return ret;
}
-static bool __cg_legacy_setup_limits(void *hdata,
+static bool __cg_legacy_setup_limits(struct cgroup_ops *ops,
struct lxc_list *cgroup_settings,
bool do_devices)
{
- struct cgfsng_handler_data *d = hdata;
- struct lxc_list *iterator, *sorted_cgroup_settings, *next;
+ struct lxc_list *iterator, *next, *sorted_cgroup_settings;
struct lxc_cgroup *cg;
bool ret = false;
cg = iterator->elem;
if (do_devices == !strncmp("devices", cg->subsystem, 7)) {
- if (cg_legacy_set_data(cg->subsystem, cg->value, d)) {
+ if (cg_legacy_set_data(ops, cg->subsystem, cg->value)) {
if (do_devices && (errno == EACCES || errno == EPERM)) {
- WARN("Error setting %s to %s for %s",
- cg->subsystem, cg->value, d->name);
+ WARN("Failed to set \"%s\" to \"%s\"",
+ cg->subsystem, cg->value);
continue;
}
- SYSERROR("Error setting %s to %s for %s",
- cg->subsystem, cg->value, d->name);
+ WARN("Failed to set \"%s\" to \"%s\"",
+ cg->subsystem, cg->value);
goto out;
}
- DEBUG("cgroup '%s' set to '%s'", cg->subsystem, cg->value);
+ DEBUG("Set controller \"%s\" set to \"%s\"",
+ cg->subsystem, cg->value);
}
}
return ret;
}
-static bool __cg_unified_setup_limits(void *hdata,
+static bool __cg_unified_setup_limits(struct cgroup_ops *ops,
struct lxc_list *cgroup_settings)
{
struct lxc_list *iterator;
- struct hierarchy *h = unified;
+ struct hierarchy *h = ops->unified;
if (lxc_list_empty(cgroup_settings))
return true;
char *fullpath;
struct lxc_cgroup *cg = iterator->elem;
- fullpath = must_make_path(h->fullcgpath, cg->subsystem, NULL);
- ret = lxc_write_to_file(fullpath, cg->value, strlen(cg->value), false);
+ fullpath = must_make_path(h->container_full_path, cg->subsystem, NULL);
+ ret = lxc_write_to_file(fullpath, cg->value, strlen(cg->value), false, 0666);
free(fullpath);
if (ret < 0) {
- SYSERROR("Failed to set \"%s\" to \"%s\"", cg->subsystem, cg->value);
+ SYSERROR("Failed to set \"%s\" to \"%s\"",
+ cg->subsystem, cg->value);
return false;
}
TRACE("Set \"%s\" to \"%s\"", cg->subsystem, cg->value);
return true;
}
-static bool cgfsng_setup_limits(void *hdata, struct lxc_conf *conf,
- bool do_devices)
+__cgfsng_ops static bool cgfsng_setup_limits(struct cgroup_ops *ops,
+ struct lxc_conf *conf,
+ bool do_devices)
{
bool bret;
- bret = __cg_legacy_setup_limits(hdata, &conf->cgroup, do_devices);
+ bret = __cg_legacy_setup_limits(ops, &conf->cgroup, do_devices);
if (!bret)
return false;
- return __cg_unified_setup_limits(hdata, &conf->cgroup2);
-}
-
-static struct cgroup_ops cgfsng_ops = {
- .init = cgfsng_init,
- .destroy = cgfsng_destroy,
- .create = cgfsng_create,
- .enter = cgfsng_enter,
- .escape = cgfsng_escape,
- .num_hierarchies = cgfsng_num_hierarchies,
- .get_hierarchies = cgfsng_get_hierarchies,
- .get_cgroup = cgfsng_get_cgroup,
- .get = cgfsng_get,
- .set = cgfsng_set,
- .unfreeze = cgfsng_unfreeze,
- .setup_limits = cgfsng_setup_limits,
- .name = "cgroupfs-ng",
- .attach = cgfsng_attach,
- .chown = cgfsng_chown,
- .mount_cgroup = cgfsng_mount,
- .nrtasks = cgfsng_nrtasks,
- .driver = CGFSNG,
-
- /* unsupported */
- .create_legacy = NULL,
-};
+ return __cg_unified_setup_limits(ops, &conf->cgroup2);
+}
+
+static bool cgroup_use_wants_controllers(const struct cgroup_ops *ops,
+ char **controllers)
+{
+ char **cur_ctrl, **cur_use;
+
+ if (!ops->cgroup_use)
+ return true;
+
+ for (cur_ctrl = controllers; cur_ctrl && *cur_ctrl; cur_ctrl++) {
+ bool found = false;
+
+ for (cur_use = ops->cgroup_use; cur_use && *cur_use; cur_use++) {
+ if (strcmp(*cur_use, *cur_ctrl) != 0)
+ continue;
+
+ found = true;
+ break;
+ }
+
+ if (found)
+ continue;
+
+ return false;
+ }
+
+ return true;
+}
+
+/* At startup, parse_hierarchies finds all the info we need about cgroup
+ * mountpoints and current cgroups, and stores it in @d.
+ */
+static bool cg_hybrid_init(struct cgroup_ops *ops, bool relative)
+{
+ int ret;
+ char *basecginfo;
+ FILE *f;
+ size_t len = 0;
+ char *line = NULL;
+ char **klist = NULL, **nlist = NULL;
+
+ /* Root spawned containers escape the current cgroup, so use init's
+ * cgroups as our base in that case.
+ */
+ if (!relative && (geteuid() == 0))
+ basecginfo = read_file("/proc/1/cgroup");
+ else
+ basecginfo = read_file("/proc/self/cgroup");
+ if (!basecginfo)
+ return false;
+
+ ret = get_existing_subsystems(&klist, &nlist);
+ if (ret < 0) {
+ ERROR("Failed to retrieve available legacy cgroup controllers");
+ free(basecginfo);
+ return false;
+ }
+
+ f = fopen("/proc/self/mountinfo", "r");
+ if (!f) {
+ ERROR("Failed to open \"/proc/self/mountinfo\"");
+ free(basecginfo);
+ return false;
+ }
+
+ lxc_cgfsng_print_basecg_debuginfo(basecginfo, klist, nlist);
+
+ while (getline(&line, &len, f) != -1) {
+ int type;
+ bool writeable;
+ struct hierarchy *new;
+ char *base_cgroup = NULL, *mountpoint = NULL;
+ char **controller_list = NULL;
+
+ type = get_cgroup_version(line);
+ if (type == 0)
+ continue;
+
+ if (type == CGROUP2_SUPER_MAGIC && ops->unified)
+ continue;
+
+ if (ops->cgroup_layout == CGROUP_LAYOUT_UNKNOWN) {
+ if (type == CGROUP2_SUPER_MAGIC)
+ ops->cgroup_layout = CGROUP_LAYOUT_UNIFIED;
+ else if (type == CGROUP_SUPER_MAGIC)
+ ops->cgroup_layout = CGROUP_LAYOUT_LEGACY;
+ } else if (ops->cgroup_layout == CGROUP_LAYOUT_UNIFIED) {
+ if (type == CGROUP_SUPER_MAGIC)
+ ops->cgroup_layout = CGROUP_LAYOUT_HYBRID;
+ } else if (ops->cgroup_layout == CGROUP_LAYOUT_LEGACY) {
+ if (type == CGROUP2_SUPER_MAGIC)
+ ops->cgroup_layout = CGROUP_LAYOUT_HYBRID;
+ }
+
+ controller_list = cg_hybrid_get_controllers(klist, nlist, line, type);
+ if (!controller_list && type == CGROUP_SUPER_MAGIC)
+ continue;
+
+ if (type == CGROUP_SUPER_MAGIC)
+ if (controller_list_is_dup(ops->hierarchies, controller_list))
+ goto next;
+
+ mountpoint = cg_hybrid_get_mountpoint(line);
+ if (!mountpoint) {
+ ERROR("Failed parsing mountpoint from \"%s\"", line);
+ goto next;
+ }
+
+ if (type == CGROUP_SUPER_MAGIC)
+ base_cgroup = cg_hybrid_get_current_cgroup(basecginfo, controller_list[0], CGROUP_SUPER_MAGIC);
+ else
+ base_cgroup = cg_hybrid_get_current_cgroup(basecginfo, NULL, CGROUP2_SUPER_MAGIC);
+ if (!base_cgroup) {
+ ERROR("Failed to find current cgroup");
+ goto next;
+ }
+
+ trim(base_cgroup);
+ prune_init_scope(base_cgroup);
+ if (type == CGROUP2_SUPER_MAGIC)
+ writeable = test_writeable_v2(mountpoint, base_cgroup);
+ else
+ writeable = test_writeable_v1(mountpoint, base_cgroup);
+ if (!writeable)
+ goto next;
+
+ if (type == CGROUP2_SUPER_MAGIC) {
+ char *cgv2_ctrl_path;
+
+ cgv2_ctrl_path = must_make_path(mountpoint, base_cgroup,
+ "cgroup.controllers",
+ NULL);
+
+ controller_list = cg_unified_get_controllers(cgv2_ctrl_path);
+ free(cgv2_ctrl_path);
+ if (!controller_list) {
+ controller_list = cg_unified_make_empty_controller();
+ TRACE("No controllers are enabled for "
+ "delegation in the unified hierarchy");
+ }
+ }
+
+ /* Exclude all controllers that cgroup use does not want. */
+ if (!cgroup_use_wants_controllers(ops, controller_list))
+ goto next;
+
+ new = add_hierarchy(&ops->hierarchies, controller_list, mountpoint, base_cgroup, type);
+ if (type == CGROUP2_SUPER_MAGIC && !ops->unified)
+ ops->unified = new;
+
+ continue;
+
+ next:
+ free_string_list(controller_list);
+ free(mountpoint);
+ free(base_cgroup);
+ }
+
+ free_string_list(klist);
+ free_string_list(nlist);
+
+ free(basecginfo);
+
+ fclose(f);
+ free(line);
+
+ TRACE("Writable cgroup hierarchies:");
+ lxc_cgfsng_print_hierarchies(ops);
+
+ /* verify that all controllers in cgroup.use and all crucial
+ * controllers are accounted for
+ */
+ if (!all_controllers_found(ops))
+ return false;
+
+ return true;
+}
+
+static int cg_is_pure_unified(void)
+{
+
+ int ret;
+ struct statfs fs;
+
+ ret = statfs("/sys/fs/cgroup", &fs);
+ if (ret < 0)
+ return -ENOMEDIUM;
+
+ if (is_fs_type(&fs, CGROUP2_SUPER_MAGIC))
+ return CGROUP2_SUPER_MAGIC;
+
+ return 0;
+}
+
+/* Get current cgroup from /proc/self/cgroup for the cgroupfs v2 hierarchy. */
+static char *cg_unified_get_current_cgroup(bool relative)
+{
+ char *basecginfo, *base_cgroup;
+ char *copy = NULL;
+
+ if (!relative && (geteuid() == 0))
+ basecginfo = read_file("/proc/1/cgroup");
+ else
+ basecginfo = read_file("/proc/self/cgroup");
+ if (!basecginfo)
+ return NULL;
+
+ base_cgroup = strstr(basecginfo, "0::/");
+ if (!base_cgroup)
+ goto cleanup_on_err;
+
+ base_cgroup = base_cgroup + 3;
+ copy = copy_to_eol(base_cgroup);
+ if (!copy)
+ goto cleanup_on_err;
+
+cleanup_on_err:
+ free(basecginfo);
+ if (copy)
+ trim(copy);
+
+ return copy;
+}
+
+static int cg_unified_init(struct cgroup_ops *ops, bool relative)
+{
+ int ret;
+ char *mountpoint, *subtree_path;
+ char **delegatable;
+ char *base_cgroup = NULL;
+
+ ret = cg_is_pure_unified();
+ if (ret == -ENOMEDIUM)
+ return -ENOMEDIUM;
+
+ if (ret != CGROUP2_SUPER_MAGIC)
+ return 0;
+
+ base_cgroup = cg_unified_get_current_cgroup(relative);
+ if (!base_cgroup)
+ return -EINVAL;
+ prune_init_scope(base_cgroup);
+
+ /* We assume that we have already been given controllers to delegate
+ * further down the hierarchy. If not it is up to the user to delegate
+ * them to us.
+ */
+ mountpoint = must_copy_string("/sys/fs/cgroup");
+ subtree_path = must_make_path(mountpoint, base_cgroup,
+ "cgroup.subtree_control", NULL);
+ delegatable = cg_unified_get_controllers(subtree_path);
+ free(subtree_path);
+ if (!delegatable)
+ delegatable = cg_unified_make_empty_controller();
+ if (!delegatable[0])
+ TRACE("No controllers are enabled for delegation");
+
+ /* TODO: If the user requested specific controllers via lxc.cgroup.use
+ * we should verify here. The reason I'm not doing it right is that I'm
+ * not convinced that lxc.cgroup.use will be the future since it is a
+ * global property. I much rather have an option that lets you request
+ * controllers per container.
+ */
+
+ add_hierarchy(&ops->hierarchies, delegatable, mountpoint, base_cgroup, CGROUP2_SUPER_MAGIC);
+
+ ops->cgroup_layout = CGROUP_LAYOUT_UNIFIED;
+ return CGROUP2_SUPER_MAGIC;
+}
+
+static bool cg_init(struct cgroup_ops *ops, struct lxc_conf *conf)
+{
+ int ret;
+ const char *tmp;
+ bool relative = conf->cgroup_meta.relative;
+
+ tmp = lxc_global_config_value("lxc.cgroup.use");
+ if (tmp) {
+ char *chop, *cur, *pin;
+
+ pin = must_copy_string(tmp);
+ chop = pin;
+
+ lxc_iterate_parts(cur, chop, ",") {
+ must_append_string(&ops->cgroup_use, cur);
+ }
+
+ free(pin);
+ }
+
+ ret = cg_unified_init(ops, relative);
+ if (ret < 0)
+ return false;
+
+ if (ret == CGROUP2_SUPER_MAGIC)
+ return true;
+
+ return cg_hybrid_init(ops, relative);
+}
+
+__cgfsng_ops static bool cgfsng_data_init(struct cgroup_ops *ops)
+{
+ const char *cgroup_pattern;
+
+ /* copy system-wide cgroup information */
+ cgroup_pattern = lxc_global_config_value("lxc.cgroup.pattern");
+ if (!cgroup_pattern) {
+ /* lxc.cgroup.pattern is only NULL on error. */
+ ERROR("Failed to retrieve cgroup pattern");
+ return false;
+ }
+ ops->cgroup_pattern = must_copy_string(cgroup_pattern);
+ ops->monitor_pattern = MONITOR_CGROUP;
+
+ return true;
+}
+
+struct cgroup_ops *cgfsng_ops_init(struct lxc_conf *conf)
+{
+ struct cgroup_ops *cgfsng_ops;
+
+ cgfsng_ops = malloc(sizeof(struct cgroup_ops));
+ if (!cgfsng_ops)
+ return NULL;
+
+ memset(cgfsng_ops, 0, sizeof(struct cgroup_ops));
+ cgfsng_ops->cgroup_layout = CGROUP_LAYOUT_UNKNOWN;
+
+ if (!cg_init(cgfsng_ops, conf)) {
+ free(cgfsng_ops);
+ return NULL;
+ }
+
+ cgfsng_ops->data_init = cgfsng_data_init;
+ cgfsng_ops->payload_destroy = cgfsng_payload_destroy;
+ cgfsng_ops->monitor_destroy = cgfsng_monitor_destroy;
+ cgfsng_ops->monitor_create = cgfsng_monitor_create;
+ cgfsng_ops->monitor_enter = cgfsng_monitor_enter;
+ cgfsng_ops->payload_create = cgfsng_payload_create;
+ cgfsng_ops->payload_enter = cgfsng_payload_enter;
+ cgfsng_ops->escape = cgfsng_escape;
+ cgfsng_ops->num_hierarchies = cgfsng_num_hierarchies;
+ cgfsng_ops->get_hierarchies = cgfsng_get_hierarchies;
+ cgfsng_ops->get_cgroup = cgfsng_get_cgroup;
+ cgfsng_ops->get = cgfsng_get;
+ cgfsng_ops->set = cgfsng_set;
+ cgfsng_ops->unfreeze = cgfsng_unfreeze;
+ cgfsng_ops->setup_limits = cgfsng_setup_limits;
+ cgfsng_ops->driver = "cgfsng";
+ cgfsng_ops->version = "1.0.0";
+ cgfsng_ops->attach = cgfsng_attach;
+ cgfsng_ops->chown = cgfsng_chown;
+ cgfsng_ops->mount = cgfsng_mount;
+ cgfsng_ops->nrtasks = cgfsng_nrtasks;
+
+ return cgfsng_ops;
+}