]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/cgroups/cgfsng.c
Merge pull request #2166 from brauner/2018-02-15/fix_cgfsng_chown
[mirror_lxc.git] / src / lxc / cgroups / cgfsng.c
CommitLineData
ccb4cabe
SH
1/*
2 * lxc: linux Container library
3 *
4 * Copyright © 2016 Canonical Ltd.
5 *
6 * Authors:
7 * Serge Hallyn <serge.hallyn@ubuntu.com>
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24/*
25 * cgfs-ng.c: this is a new, simplified implementation of a filesystem
26 * cgroup backend. The original cgfs.c was designed to be as flexible
27 * as possible. It would try to find cgroup filesystems no matter where
28 * or how you had them mounted, and deduce the most usable mount for
0e7ff52c 29 * each controller.
ccb4cabe
SH
30 *
31 * This new implementation assumes that cgroup filesystems are mounted
32 * under /sys/fs/cgroup/clist where clist is either the controller, or
33 * a comman-separated list of controllers.
34 */
a54694f8 35
ccb4cabe 36#include "config.h"
a54694f8
CB
37
38#include <ctype.h>
39#include <dirent.h>
40#include <errno.h>
41#include <grp.h>
42#include <stdint.h>
ccb4cabe
SH
43#include <stdio.h>
44#include <stdlib.h>
a54694f8 45#include <string.h>
ccb4cabe 46#include <unistd.h>
a54694f8 47#include <sys/types.h>
ccb4cabe 48
c8bf519d 49#include <linux/types.h>
50#include <linux/kdev_t.h>
51
b635e92d 52#include "caps.h"
ccb4cabe 53#include "cgroup.h"
6328fd9c 54#include "cgroup_utils.h"
ccb4cabe 55#include "commands.h"
43654d34 56#include "conf.h"
a54694f8 57#include "log.h"
43654d34 58#include "storage/storage.h"
a54694f8 59#include "utils.h"
ccb4cabe
SH
60
61lxc_log_define(lxc_cgfsng, lxc);
62
63static struct cgroup_ops cgfsng_ops;
64
ccb4cabe
SH
65/*
66 * A descriptor for a mounted hierarchy
67 * @controllers: either NULL, or a null-terminated list of all
68 * the co-mounted controllers
69 * @mountpoint: the mountpoint we will use. It will be either
70 * /sys/fs/cgroup/controller or /sys/fs/cgroup/controllerlist
71 * @base_cgroup: the cgroup under which the container cgroup path
72 is created. This will be either the caller's cgroup (if not
73 root), or init's cgroup (if root).
74 */
75struct hierarchy {
76 char **controllers;
77 char *mountpoint;
78 char *base_cgroup;
79 char *fullcgpath;
d6337a5f 80 int version;
ccb4cabe
SH
81};
82
83/*
84 * The cgroup data which is attached to the lxc_handler.
43654d34
CB
85 * @cgroup_pattern : A copy of the lxc.cgroup.pattern
86 * @container_cgroup : If not null, the cgroup which was created for the
87 * container. For each hierarchy, it is created under the
88 * @hierarchy->base_cgroup directory. Relative to the
89 * base_cgroup it is the same for all hierarchies.
90 * @name : The name of the container.
91 * @cgroup_meta : A copy of the container's cgroup information. This
92 * overrides @cgroup_pattern.
ccb4cabe
SH
93 */
94struct cgfsng_handler_data {
ccb4cabe 95 char *cgroup_pattern;
1a0e70ac
CB
96 char *container_cgroup; /* cgroup we created for the container */
97 char *name; /* container name */
43654d34
CB
98 /* per-container cgroup information */
99 struct lxc_cgroup cgroup_meta;
d6337a5f 100 cgroup_layout_t cgroup_layout;
ccb4cabe
SH
101};
102
457ca9aa
SH
103/*
104 * @hierarchies - a NULL-terminated array of struct hierarchy, one per
d6337a5f
CB
105 * legacy hierarchy. No duplicates. First sufficient, writeable
106 * mounted hierarchy wins
457ca9aa
SH
107 */
108struct hierarchy **hierarchies;
d6337a5f
CB
109struct hierarchy *unified;
110cgroup_layout_t cgroup_layout;
457ca9aa
SH
111
112/*
113 * @cgroup_use - a copy of the lxc.cgroup.use
114 */
115char *cgroup_use;
116
e4aeecf5
CB
117/*
118 * @lxc_cgfsng_debug - whether to print debug info to stdout for the cgfsng
119 * driver
120 */
121static bool lxc_cgfsng_debug;
122
65d78313
MPS
123#define CGFSNG_DEBUG(format, ...) do { \
124 if (lxc_cgfsng_debug) \
125 printf("cgfsng: " format, ##__VA_ARGS__); \
126} while(0)
127
ccb4cabe
SH
128static void free_string_list(char **clist)
129{
130 if (clist) {
131 int i;
132
133 for (i = 0; clist[i]; i++)
134 free(clist[i]);
135 free(clist);
136 }
137}
138
ccb4cabe
SH
139/* Allocate a pointer, do not fail */
140static void *must_alloc(size_t sz)
141{
142 return must_realloc(NULL, sz);
143}
144
ccb4cabe
SH
145/*
146 * This is a special case - return a copy of @entry
147 * prepending 'name='. I.e. turn systemd into name=systemd.
148 * Do not fail.
149 */
150static char *must_prefix_named(char *entry)
151{
152 char *ret;
153 size_t len = strlen(entry);
154
155 ret = must_alloc(len + 6);
156 snprintf(ret, len + 6, "name=%s", entry);
157 return ret;
158}
159
160/*
161 * Given a pointer to a null-terminated array of pointers, realloc to
162 * add one entry, and point the new entry to NULL. Do not fail. Return
163 * the index to the second-to-last entry - that is, the one which is
164 * now available for use (keeping the list null-terminated).
165 */
166static int append_null_to_list(void ***list)
167{
168 int newentry = 0;
169
170 if (*list)
171 for (; (*list)[newentry]; newentry++);
172
173 *list = must_realloc(*list, (newentry + 2) * sizeof(void **));
174 (*list)[newentry + 1] = NULL;
175 return newentry;
176}
177
178/*
179 * Given a null-terminated array of strings, check whether @entry
180 * is one of the strings
181 */
182static bool string_in_list(char **list, const char *entry)
183{
184 int i;
185
186 if (!list)
187 return false;
d6337a5f 188
ccb4cabe
SH
189 for (i = 0; list[i]; i++)
190 if (strcmp(list[i], entry) == 0)
191 return true;
192
193 return false;
194}
195
196/*
197 * append an entry to the clist. Do not fail.
198 * *clist must be NULL the first time we are called.
199 *
200 * We also handle named subsystems here. Any controller which is not a
201 * kernel subsystem, we prefix 'name='. Any which is both a kernel and
202 * named subsystem, we refuse to use because we're not sure which we
203 * have here. (TODO - we could work around this in some cases by just
204 * remounting to be unambiguous, or by comparing mountpoint contents
205 * with current cgroup)
206 *
207 * The last entry will always be NULL.
208 */
209static void must_append_controller(char **klist, char **nlist, char ***clist, char *entry)
210{
211 int newentry;
212 char *copy;
213
214 if (string_in_list(klist, entry) && string_in_list(nlist, entry)) {
c2712f64 215 ERROR("Refusing to use ambiguous controller \"%s\"", entry);
ccb4cabe
SH
216 ERROR("It is both a named and kernel subsystem");
217 return;
218 }
219
220 newentry = append_null_to_list((void ***)clist);
221
222 if (strncmp(entry, "name=", 5) == 0)
223 copy = must_copy_string(entry);
224 else if (string_in_list(klist, entry))
225 copy = must_copy_string(entry);
226 else
227 copy = must_prefix_named(entry);
228
229 (*clist)[newentry] = copy;
230}
231
ccb4cabe
SH
232static void free_handler_data(struct cgfsng_handler_data *d)
233{
ccb4cabe
SH
234 free(d->cgroup_pattern);
235 free(d->container_cgroup);
236 free(d->name);
43654d34
CB
237 if (d->cgroup_meta.dir)
238 free(d->cgroup_meta.dir);
239 if (d->cgroup_meta.controllers)
240 free(d->cgroup_meta.controllers);
ccb4cabe
SH
241 free(d);
242}
243
244/*
245 * Given a handler's cgroup data, return the struct hierarchy for the
246 * controller @c, or NULL if there is none.
247 */
457ca9aa 248struct hierarchy *get_hierarchy(const char *c)
ccb4cabe
SH
249{
250 int i;
251
457ca9aa 252 if (!hierarchies)
ccb4cabe 253 return NULL;
d6337a5f 254
457ca9aa 255 for (i = 0; hierarchies[i]; i++) {
d6337a5f
CB
256 if (!c) {
257 /* This is the empty unified hierarchy. */
258 if (hierarchies[i]->controllers &&
259 !hierarchies[i]->controllers[0])
260 return hierarchies[i];
261
262 return NULL;
263 }
264
457ca9aa
SH
265 if (string_in_list(hierarchies[i]->controllers, c))
266 return hierarchies[i];
ccb4cabe 267 }
d6337a5f 268
ccb4cabe
SH
269 return NULL;
270}
271
a54694f8
CB
272#define BATCH_SIZE 50
273static void batch_realloc(char **mem, size_t oldlen, size_t newlen)
274{
275 int newbatches = (newlen / BATCH_SIZE) + 1;
276 int oldbatches = (oldlen / BATCH_SIZE) + 1;
277
278 if (!*mem || newbatches > oldbatches) {
279 *mem = must_realloc(*mem, newbatches * BATCH_SIZE);
280 }
281}
282
283static void append_line(char **dest, size_t oldlen, char *new, size_t newlen)
284{
285 size_t full = oldlen + newlen;
286
287 batch_realloc(dest, oldlen, full + 1);
288
289 memcpy(*dest + oldlen, new, newlen + 1);
290}
291
292/* Slurp in a whole file */
d6337a5f 293static char *read_file(const char *fnam)
a54694f8
CB
294{
295 FILE *f;
296 char *line = NULL, *buf = NULL;
297 size_t len = 0, fulllen = 0;
298 int linelen;
299
300 f = fopen(fnam, "r");
301 if (!f)
302 return NULL;
303 while ((linelen = getline(&line, &len, f)) != -1) {
304 append_line(&buf, fulllen, line, linelen);
305 fulllen += linelen;
306 }
307 fclose(f);
308 free(line);
309 return buf;
310}
311
312/* Taken over modified from the kernel sources. */
313#define NBITS 32 /* bits in uint32_t */
314#define DIV_ROUND_UP(n, d) (((n) + (d)-1) / (d))
315#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, NBITS)
316
317static void set_bit(unsigned bit, uint32_t *bitarr)
318{
319 bitarr[bit / NBITS] |= (1 << (bit % NBITS));
320}
321
322static void clear_bit(unsigned bit, uint32_t *bitarr)
323{
324 bitarr[bit / NBITS] &= ~(1 << (bit % NBITS));
325}
326
327static bool is_set(unsigned bit, uint32_t *bitarr)
328{
329 return (bitarr[bit / NBITS] & (1 << (bit % NBITS))) != 0;
330}
331
332/* Create cpumask from cpulist aka turn:
333 *
334 * 0,2-3
335 *
336 * into bit array
337 *
338 * 1 0 1 1
339 */
340static uint32_t *lxc_cpumask(char *buf, size_t nbits)
341{
342 char *token;
343 char *saveptr = NULL;
344 size_t arrlen = BITS_TO_LONGS(nbits);
345 uint32_t *bitarr = calloc(arrlen, sizeof(uint32_t));
346 if (!bitarr)
347 return NULL;
348
349 for (; (token = strtok_r(buf, ",", &saveptr)); buf = NULL) {
350 errno = 0;
351 unsigned start = strtoul(token, NULL, 0);
352 unsigned end = start;
353
354 char *range = strchr(token, '-');
355 if (range)
356 end = strtoul(range + 1, NULL, 0);
357 if (!(start <= end)) {
358 free(bitarr);
359 return NULL;
360 }
361
362 if (end >= nbits) {
363 free(bitarr);
364 return NULL;
365 }
366
367 while (start <= end)
368 set_bit(start++, bitarr);
369 }
370
371 return bitarr;
372}
373
a54694f8
CB
374/* Turn cpumask into simple, comma-separated cpulist. */
375static char *lxc_cpumask_to_cpulist(uint32_t *bitarr, size_t nbits)
376{
377 size_t i;
378 int ret;
eab15c1e 379 char numstr[LXC_NUMSTRLEN64] = {0};
a54694f8
CB
380 char **cpulist = NULL;
381
382 for (i = 0; i <= nbits; i++) {
383 if (is_set(i, bitarr)) {
eab15c1e
CB
384 ret = snprintf(numstr, LXC_NUMSTRLEN64, "%zu", i);
385 if (ret < 0 || (size_t)ret >= LXC_NUMSTRLEN64) {
a54694f8
CB
386 lxc_free_array((void **)cpulist, free);
387 return NULL;
388 }
389 if (lxc_append_string(&cpulist, numstr) < 0) {
390 lxc_free_array((void **)cpulist, free);
391 return NULL;
392 }
393 }
394 }
395 return lxc_string_join(",", (const char **)cpulist, false);
396}
397
398static ssize_t get_max_cpus(char *cpulist)
399{
400 char *c1, *c2;
401 char *maxcpus = cpulist;
402 size_t cpus = 0;
403
404 c1 = strrchr(maxcpus, ',');
405 if (c1)
406 c1++;
407
408 c2 = strrchr(maxcpus, '-');
409 if (c2)
410 c2++;
411
412 if (!c1 && !c2)
413 c1 = maxcpus;
414 else if (c1 > c2)
415 c2 = c1;
416 else if (c1 < c2)
417 c1 = c2;
1a0e70ac 418 else if (!c1 && c2) /* The reverse case is obvs. not needed. */
a54694f8
CB
419 c1 = c2;
420
421 /* If the above logic is correct, c1 should always hold a valid string
422 * here.
423 */
424
425 errno = 0;
426 cpus = strtoul(c1, NULL, 0);
427 if (errno != 0)
428 return -1;
429
430 return cpus;
431}
432
6f9584d8 433#define __ISOL_CPUS "/sys/devices/system/cpu/isolated"
a3926f6a 434static bool cg_legacy_filter_and_set_cpus(char *path, bool am_initialized)
a54694f8
CB
435{
436 char *lastslash, *fpath, oldv;
437 int ret;
438 ssize_t i;
439
440 ssize_t maxposs = 0, maxisol = 0;
441 char *cpulist = NULL, *posscpus = NULL, *isolcpus = NULL;
442 uint32_t *possmask = NULL, *isolmask = NULL;
6f9584d8 443 bool bret = false, flipped_bit = false;
a54694f8
CB
444
445 lastslash = strrchr(path, '/');
1a0e70ac 446 if (!lastslash) { /* bug... this shouldn't be possible */
6f9584d8 447 ERROR("Invalid path: %s.", path);
a54694f8
CB
448 return bret;
449 }
450 oldv = *lastslash;
451 *lastslash = '\0';
452 fpath = must_make_path(path, "cpuset.cpus", NULL);
453 posscpus = read_file(fpath);
6f9584d8
CB
454 if (!posscpus) {
455 SYSERROR("Could not read file: %s.\n", fpath);
456 goto on_error;
457 }
a54694f8
CB
458
459 /* Get maximum number of cpus found in possible cpuset. */
460 maxposs = get_max_cpus(posscpus);
461 if (maxposs < 0)
6f9584d8 462 goto on_error;
a54694f8 463
6f9584d8
CB
464 if (!file_exists(__ISOL_CPUS)) {
465 /* This system doesn't expose isolated cpus. */
466 DEBUG("Path: "__ISOL_CPUS" to read isolated cpus from does not exist.\n");
65d29cbc
CB
467 cpulist = posscpus;
468 /* No isolated cpus but we weren't already initialized by
469 * someone. We should simply copy the parents cpuset.cpus
470 * values.
471 */
472 if (!am_initialized) {
473 DEBUG("Copying cpuset of parent cgroup.");
474 goto copy_parent;
475 }
476 /* No isolated cpus but we were already initialized by someone.
477 * Nothing more to do for us.
478 */
6f9584d8
CB
479 goto on_success;
480 }
481
482 isolcpus = read_file(__ISOL_CPUS);
483 if (!isolcpus) {
484 SYSERROR("Could not read file "__ISOL_CPUS);
485 goto on_error;
486 }
a54694f8 487 if (!isdigit(isolcpus[0])) {
6f9584d8 488 DEBUG("No isolated cpus detected.");
a54694f8
CB
489 cpulist = posscpus;
490 /* No isolated cpus but we weren't already initialized by
491 * someone. We should simply copy the parents cpuset.cpus
492 * values.
493 */
6f9584d8
CB
494 if (!am_initialized) {
495 DEBUG("Copying cpuset of parent cgroup.");
a54694f8 496 goto copy_parent;
6f9584d8 497 }
a54694f8
CB
498 /* No isolated cpus but we were already initialized by someone.
499 * Nothing more to do for us.
500 */
6f9584d8 501 goto on_success;
a54694f8
CB
502 }
503
504 /* Get maximum number of cpus found in isolated cpuset. */
505 maxisol = get_max_cpus(isolcpus);
506 if (maxisol < 0)
6f9584d8 507 goto on_error;
a54694f8
CB
508
509 if (maxposs < maxisol)
510 maxposs = maxisol;
511 maxposs++;
512
513 possmask = lxc_cpumask(posscpus, maxposs);
6f9584d8
CB
514 if (!possmask) {
515 ERROR("Could not create cpumask for all possible cpus.\n");
516 goto on_error;
517 }
a54694f8
CB
518
519 isolmask = lxc_cpumask(isolcpus, maxposs);
6f9584d8
CB
520 if (!isolmask) {
521 ERROR("Could not create cpumask for all isolated cpus.\n");
522 goto on_error;
523 }
a54694f8
CB
524
525 for (i = 0; i <= maxposs; i++) {
526 if (is_set(i, isolmask) && is_set(i, possmask)) {
6f9584d8 527 flipped_bit = true;
a54694f8
CB
528 clear_bit(i, possmask);
529 }
530 }
531
6f9584d8
CB
532 if (!flipped_bit) {
533 DEBUG("No isolated cpus present in cpuset.");
534 goto on_success;
535 }
536 DEBUG("Removed isolated cpus from cpuset.");
537
a54694f8 538 cpulist = lxc_cpumask_to_cpulist(possmask, maxposs);
6f9584d8
CB
539 if (!cpulist) {
540 ERROR("Could not create cpu list.\n");
541 goto on_error;
542 }
a54694f8
CB
543
544copy_parent:
545 *lastslash = oldv;
dcbc861e 546 free(fpath);
a54694f8
CB
547 fpath = must_make_path(path, "cpuset.cpus", NULL);
548 ret = lxc_write_to_file(fpath, cpulist, strlen(cpulist), false);
6f9584d8
CB
549 if (ret < 0) {
550 SYSERROR("Could not write cpu list to: %s.\n", fpath);
551 goto on_error;
552 }
553
554on_success:
555 bret = true;
a54694f8 556
6f9584d8 557on_error:
a54694f8
CB
558 free(fpath);
559
560 free(isolcpus);
561 free(isolmask);
562
563 if (posscpus != cpulist)
564 free(posscpus);
565 free(possmask);
566
567 free(cpulist);
568 return bret;
569}
570
e3a3fecf
SH
571/* Copy contents of parent(@path)/@file to @path/@file */
572static bool copy_parent_file(char *path, char *file)
573{
574 char *lastslash, *value = NULL, *fpath, oldv;
575 int len = 0;
576 int ret;
577
578 lastslash = strrchr(path, '/');
1a0e70ac 579 if (!lastslash) { /* bug... this shouldn't be possible */
e3a3fecf
SH
580 ERROR("cgfsng:copy_parent_file: bad path %s", path);
581 return false;
582 }
583 oldv = *lastslash;
584 *lastslash = '\0';
585 fpath = must_make_path(path, file, NULL);
586 len = lxc_read_from_file(fpath, NULL, 0);
587 if (len <= 0)
588 goto bad;
589 value = must_alloc(len + 1);
590 if (lxc_read_from_file(fpath, value, len) != len)
591 goto bad;
592 free(fpath);
593 *lastslash = oldv;
594 fpath = must_make_path(path, file, NULL);
595 ret = lxc_write_to_file(fpath, value, len, false);
596 if (ret < 0)
597 SYSERROR("Unable to write %s to %s", value, fpath);
598 free(fpath);
599 free(value);
600 return ret >= 0;
601
602bad:
603 SYSERROR("Error reading '%s'", fpath);
604 free(fpath);
605 free(value);
606 return false;
607}
608
609/*
610 * Initialize the cpuset hierarchy in first directory of @gname and
611 * set cgroup.clone_children so that children inherit settings.
612 * Since the h->base_path is populated by init or ourselves, we know
613 * it is already initialized.
614 */
a3926f6a 615static bool cg_legacy_handle_cpuset_hierarchy(struct hierarchy *h, char *cgname)
e3a3fecf
SH
616{
617 char *cgpath, *clonechildrenpath, v, *slash;
618
619 if (!string_in_list(h->controllers, "cpuset"))
620 return true;
621
622 if (*cgname == '/')
623 cgname++;
624 slash = strchr(cgname, '/');
625 if (slash)
626 *slash = '\0';
627
628 cgpath = must_make_path(h->mountpoint, h->base_cgroup, cgname, NULL);
629 if (slash)
630 *slash = '/';
631 if (mkdir(cgpath, 0755) < 0 && errno != EEXIST) {
632 SYSERROR("Failed to create '%s'", cgpath);
633 free(cgpath);
634 return false;
635 }
6f9584d8 636
e3a3fecf 637 clonechildrenpath = must_make_path(cgpath, "cgroup.clone_children", NULL);
6328fd9c
CB
638 /* unified hierarchy doesn't have clone_children */
639 if (!file_exists(clonechildrenpath)) {
e3a3fecf
SH
640 free(clonechildrenpath);
641 free(cgpath);
642 return true;
643 }
644 if (lxc_read_from_file(clonechildrenpath, &v, 1) < 0) {
645 SYSERROR("Failed to read '%s'", clonechildrenpath);
646 free(clonechildrenpath);
647 free(cgpath);
648 return false;
649 }
650
a54694f8 651 /* Make sure any isolated cpus are removed from cpuset.cpus. */
a3926f6a 652 if (!cg_legacy_filter_and_set_cpus(cgpath, v == '1')) {
6f9584d8
CB
653 SYSERROR("Failed to remove isolated cpus.");
654 free(clonechildrenpath);
655 free(cgpath);
a54694f8 656 return false;
6f9584d8 657 }
a54694f8 658
e3a3fecf 659 if (v == '1') { /* already set for us by someone else */
6f9584d8 660 DEBUG("\"cgroup.clone_children\" was already set to \"1\".");
e3a3fecf
SH
661 free(clonechildrenpath);
662 free(cgpath);
663 return true;
664 }
665
666 /* copy parent's settings */
a54694f8 667 if (!copy_parent_file(cgpath, "cpuset.mems")) {
6f9584d8 668 SYSERROR("Failed to copy \"cpuset.mems\" settings.");
e3a3fecf
SH
669 free(cgpath);
670 free(clonechildrenpath);
671 return false;
672 }
673 free(cgpath);
674
675 if (lxc_write_to_file(clonechildrenpath, "1", 1, false) < 0) {
676 /* Set clone_children so children inherit our settings */
677 SYSERROR("Failed to write 1 to %s", clonechildrenpath);
678 free(clonechildrenpath);
679 return false;
680 }
681 free(clonechildrenpath);
682 return true;
683}
684
ccb4cabe
SH
685/*
686 * Given two null-terminated lists of strings, return true if any string
687 * is in both.
688 */
689static bool controller_lists_intersect(char **l1, char **l2)
690{
691 int i;
692
693 if (!l1 || !l2)
694 return false;
695
696 for (i = 0; l1[i]; i++) {
697 if (string_in_list(l2, l1[i]))
698 return true;
699 }
700 return false;
701}
702
703/*
704 * For a null-terminated list of controllers @clist, return true if any of
705 * those controllers is already listed the null-terminated list of
706 * hierarchies @hlist. Realistically, if one is present, all must be present.
707 */
708static bool controller_list_is_dup(struct hierarchy **hlist, char **clist)
709{
710 int i;
711
712 if (!hlist)
713 return false;
714 for (i = 0; hlist[i]; i++)
715 if (controller_lists_intersect(hlist[i]->controllers, clist))
716 return true;
717 return false;
718
719}
720
721/*
722 * Return true if the controller @entry is found in the null-terminated
723 * list of hierarchies @hlist
724 */
725static bool controller_found(struct hierarchy **hlist, char *entry)
726{
727 int i;
d6337a5f 728
ccb4cabe
SH
729 if (!hlist)
730 return false;
731
732 for (i = 0; hlist[i]; i++)
733 if (string_in_list(hlist[i]->controllers, entry))
734 return true;
d6337a5f 735
ccb4cabe
SH
736 return false;
737}
738
739/*
c30b61c3
SH
740 * Return true if all of the controllers which we require have been found.
741 * The required list is freezer and anything in * lxc.cgroup.use.
ccb4cabe 742 */
457ca9aa 743static bool all_controllers_found(void)
ccb4cabe
SH
744{
745 char *p, *saveptr = NULL;
457ca9aa 746 struct hierarchy ** hlist = hierarchies;
ccb4cabe 747
ccb4cabe 748 if (!controller_found(hlist, "freezer")) {
65d78313 749 CGFSNG_DEBUG("No freezer controller mountpoint found\n");
ccb4cabe
SH
750 return false;
751 }
752
457ca9aa 753 if (!cgroup_use)
ccb4cabe 754 return true;
c2712f64 755
457ca9aa 756 for (p = strtok_r(cgroup_use, ",", &saveptr); p;
ccb4cabe
SH
757 p = strtok_r(NULL, ",", &saveptr)) {
758 if (!controller_found(hlist, p)) {
65d78313 759 CGFSNG_DEBUG("No %s controller mountpoint found\n", p);
ccb4cabe
SH
760 return false;
761 }
762 }
c2712f64 763
ccb4cabe
SH
764 return true;
765}
766
ccb4cabe
SH
767/*
768 * Get the controllers from a mountinfo line
769 * There are other ways we could get this info. For lxcfs, field 3
770 * is /cgroup/controller-list. For cgroupfs, we could parse the mount
771 * options. But we simply assume that the mountpoint must be
772 * /sys/fs/cgroup/controller-list
773 */
a3926f6a
CB
774static char **cg_hybrid_get_controllers(char **klist, char **nlist, char *line,
775 int type)
ccb4cabe 776{
6328fd9c 777 /* the fourth field is /sys/fs/cgroup/comma-delimited-controller-list */
ccb4cabe 778 int i;
411ac6d8 779 char *dup, *p2, *tok;
d6337a5f 780 char *p = line, *saveptr = NULL, *sep = ",";
411ac6d8 781 char **aret = NULL;
6328fd9c 782
ccb4cabe 783 for (i = 0; i < 4; i++) {
235f1815 784 p = strchr(p, ' ');
ccb4cabe
SH
785 if (!p)
786 return NULL;
787 p++;
788 }
a55f31bd 789
ccb4cabe
SH
790 /* note - if we change how mountinfo works, then our caller
791 * will need to verify /sys/fs/cgroup/ in this field */
c2712f64 792 if (strncmp(p, "/sys/fs/cgroup/", 15)) {
65d78313 793 CGFSNG_DEBUG("Found hierarchy not under /sys/fs/cgroup: \"%s\"\n", p);
ccb4cabe 794 return NULL;
5059aae9 795 }
d6337a5f 796
ccb4cabe 797 p += 15;
235f1815 798 p2 = strchr(p, ' ');
ccb4cabe 799 if (!p2) {
65d78313 800 CGFSNG_DEBUG("Corrupt mountinfo\n");
ccb4cabe
SH
801 return NULL;
802 }
803 *p2 = '\0';
6328fd9c 804
d6337a5f
CB
805 if (type == CGROUP_SUPER_MAGIC) {
806 /* strdup() here for v1 hierarchies. Otherwise strtok_r() will
807 * destroy mountpoints such as "/sys/fs/cgroup/cpu,cpuacct".
808 */
809 dup = strdup(p);
810 if (!dup)
811 return NULL;
812
813 for (tok = strtok_r(dup, sep, &saveptr); tok;
814 tok = strtok_r(NULL, sep, &saveptr))
815 must_append_controller(klist, nlist, &aret, tok);
816
817 free(dup);
411ac6d8 818 }
d6337a5f
CB
819 *p2 = ' ';
820 return aret;
821}
411ac6d8 822
d6337a5f
CB
823static char **cg_unified_make_empty_controller(void)
824{
825 int newentry;
826 char **aret = NULL;
827
828 newentry = append_null_to_list((void ***)&aret);
829 aret[newentry] = NULL;
830 return aret;
831}
832
833static char **cg_unified_get_controllers(const char *file)
834{
835 char *buf, *tok;
836 char *saveptr = NULL, *sep = " \t\n";
837 char **aret = NULL;
838
839 buf = read_file(file);
840 if (!buf)
411ac6d8 841 return NULL;
6328fd9c 842
d6337a5f
CB
843 for (tok = strtok_r(buf, sep, &saveptr); tok;
844 tok = strtok_r(NULL, sep, &saveptr)) {
845 int newentry;
846 char *copy;
847
848 newentry = append_null_to_list((void ***)&aret);
849 copy = must_copy_string(tok);
850 aret[newentry] = copy;
ccb4cabe
SH
851 }
852
d6337a5f 853 free(buf);
ccb4cabe
SH
854 return aret;
855}
856
d6337a5f
CB
857static struct hierarchy *add_hierarchy(char **clist, char *mountpoint,
858 char *base_cgroup, int type)
ccb4cabe
SH
859{
860 struct hierarchy *new;
861 int newentry;
862
863 new = must_alloc(sizeof(*new));
864 new->controllers = clist;
865 new->mountpoint = mountpoint;
866 new->base_cgroup = base_cgroup;
867 new->fullcgpath = NULL;
d6337a5f 868 new->version = type;
6328fd9c 869
457ca9aa
SH
870 newentry = append_null_to_list((void ***)&hierarchies);
871 hierarchies[newentry] = new;
d6337a5f 872 return new;
ccb4cabe
SH
873}
874
875/*
876 * Get a copy of the mountpoint from @line, which is a line from
877 * /proc/self/mountinfo
878 */
a3926f6a 879static char *cg_hybrid_get_mountpoint(char *line)
ccb4cabe
SH
880{
881 int i;
d6337a5f 882 char *p2;
ccb4cabe 883 size_t len;
d6337a5f
CB
884 char *p = line;
885 char *sret = NULL;
ccb4cabe
SH
886
887 for (i = 0; i < 4; i++) {
235f1815 888 p = strchr(p, ' ');
ccb4cabe
SH
889 if (!p)
890 return NULL;
891 p++;
892 }
d6337a5f
CB
893
894 if (strncmp(p, "/sys/fs/cgroup/", 15))
895 return NULL;
896
897 p2 = strchr(p + 15, ' ');
898 if (!p2)
899 return NULL;
900 *p2 = '\0';
901
ccb4cabe
SH
902 len = strlen(p);
903 sret = must_alloc(len + 1);
904 memcpy(sret, p, len);
905 sret[len] = '\0';
906 return sret;
907}
908
909/*
910 * Given a multi-line string, return a null-terminated copy of the
911 * current line.
912 */
913static char *copy_to_eol(char *p)
914{
235f1815 915 char *p2 = strchr(p, '\n'), *sret;
ccb4cabe
SH
916 size_t len;
917
918 if (!p2)
919 return NULL;
920
921 len = p2 - p;
922 sret = must_alloc(len + 1);
923 memcpy(sret, p, len);
924 sret[len] = '\0';
925 return sret;
926}
927
928/*
929 * cgline: pointer to character after the first ':' in a line in a
930 * \n-terminated /proc/self/cgroup file. Check whether * controller c is
931 * present.
932 */
933static bool controller_in_clist(char *cgline, char *c)
934{
935 char *tok, *saveptr = NULL, *eol, *tmp;
936 size_t len;
937
235f1815 938 eol = strchr(cgline, ':');
ccb4cabe
SH
939 if (!eol)
940 return false;
941
942 len = eol - cgline;
943 tmp = alloca(len + 1);
944 memcpy(tmp, cgline, len);
945 tmp[len] = '\0';
946
947 for (tok = strtok_r(tmp, ",", &saveptr); tok;
d6337a5f 948 tok = strtok_r(NULL, ",", &saveptr)) {
ccb4cabe
SH
949 if (strcmp(tok, c) == 0)
950 return true;
951 }
d6337a5f 952
ccb4cabe
SH
953 return false;
954}
955
956/*
957 * @basecginfo is a copy of /proc/$$/cgroup. Return the current
958 * cgroup for @controller
959 */
a3926f6a 960static char *cg_hybrid_get_current_cgroup(char *basecginfo, char *controller, int type)
ccb4cabe
SH
961{
962 char *p = basecginfo;
6328fd9c 963
d6337a5f
CB
964 for (;;) {
965 bool is_cgv2_base_cgroup = false;
966
6328fd9c 967 /* cgroup v2 entry in "/proc/<pid>/cgroup": "0::/some/path" */
d6337a5f
CB
968 if ((type == CGROUP2_SUPER_MAGIC) && (*p == '0'))
969 is_cgv2_base_cgroup = true;
ccb4cabe 970
235f1815 971 p = strchr(p, ':');
ccb4cabe
SH
972 if (!p)
973 return NULL;
974 p++;
d6337a5f
CB
975
976 if (is_cgv2_base_cgroup || (controller && controller_in_clist(p, controller))) {
235f1815 977 p = strchr(p, ':');
ccb4cabe
SH
978 if (!p)
979 return NULL;
980 p++;
981 return copy_to_eol(p);
982 }
983
235f1815 984 p = strchr(p, '\n');
ccb4cabe
SH
985 if (!p)
986 return NULL;
987 p++;
988 }
989}
990
ccb4cabe
SH
991static void must_append_string(char ***list, char *entry)
992{
993 int newentry = append_null_to_list((void ***)list);
994 char *copy;
995
996 copy = must_copy_string(entry);
997 (*list)[newentry] = copy;
998}
999
d6337a5f 1000static int get_existing_subsystems(char ***klist, char ***nlist)
ccb4cabe
SH
1001{
1002 FILE *f;
1003 char *line = NULL;
1004 size_t len = 0;
1005
d6337a5f
CB
1006 f = fopen("/proc/self/cgroup", "r");
1007 if (!f)
1008 return -1;
1009
ccb4cabe
SH
1010 while (getline(&line, &len, f) != -1) {
1011 char *p, *p2, *tok, *saveptr = NULL;
235f1815 1012 p = strchr(line, ':');
ccb4cabe
SH
1013 if (!p)
1014 continue;
1015 p++;
235f1815 1016 p2 = strchr(p, ':');
ccb4cabe
SH
1017 if (!p2)
1018 continue;
1019 *p2 = '\0';
ff8d6ee9 1020
6328fd9c
CB
1021 /* If the kernel has cgroup v2 support, then /proc/self/cgroup
1022 * contains an entry of the form:
ff8d6ee9
CB
1023 *
1024 * 0::/some/path
1025 *
6328fd9c 1026 * In this case we use "cgroup2" as controller name.
ff8d6ee9 1027 */
6328fd9c
CB
1028 if ((p2 - p) == 0) {
1029 must_append_string(klist, "cgroup2");
ff8d6ee9 1030 continue;
6328fd9c 1031 }
ff8d6ee9 1032
ccb4cabe 1033 for (tok = strtok_r(p, ",", &saveptr); tok;
d6337a5f 1034 tok = strtok_r(NULL, ",", &saveptr)) {
ccb4cabe
SH
1035 if (strncmp(tok, "name=", 5) == 0)
1036 must_append_string(nlist, tok);
1037 else
1038 must_append_string(klist, tok);
1039 }
1040 }
1041
1042 free(line);
1043 fclose(f);
d6337a5f 1044 return 0;
ccb4cabe
SH
1045}
1046
1047static void trim(char *s)
1048{
1049 size_t len = strlen(s);
2c28d76b 1050 while ((len > 1) && (s[len - 1] == '\n'))
ccb4cabe
SH
1051 s[--len] = '\0';
1052}
1053
e4aeecf5
CB
1054static void lxc_cgfsng_print_handler_data(const struct cgfsng_handler_data *d)
1055{
1056 printf("Cgroup information:\n");
1057 printf(" container name: %s\n", d->name ? d->name : "(null)");
1058 printf(" lxc.cgroup.use: %s\n", cgroup_use ? cgroup_use : "(null)");
43654d34
CB
1059 printf(" lxc.cgroup.pattern: %s\n",
1060 d->cgroup_pattern ? d->cgroup_pattern : "(null)");
1061 printf(" lxc.cgroup.dir: %s\n",
1062 d->cgroup_meta.dir ? d->cgroup_meta.dir : "(null)");
1063 printf(" cgroup: %s\n",
1064 d->container_cgroup ? d->container_cgroup : "(null)");
e4aeecf5
CB
1065}
1066
1067static void lxc_cgfsng_print_hierarchies()
ccb4cabe 1068{
a7b0cc4c 1069 struct hierarchy **it;
ccb4cabe 1070 int i;
41c33dbe 1071
457ca9aa 1072 if (!hierarchies) {
c2712f64 1073 printf(" No hierarchies found\n");
ccb4cabe
SH
1074 return;
1075 }
e4aeecf5 1076 printf(" Hierarchies:\n");
a7b0cc4c
CB
1077 for (i = 0, it = hierarchies; it && *it; it++, i++) {
1078 char **cit;
ccb4cabe 1079 int j;
c2712f64
CB
1080 printf(" %d: base_cgroup: %s\n", i, (*it)->base_cgroup ? (*it)->base_cgroup : "(null)");
1081 printf(" mountpoint: %s\n", (*it)->mountpoint ? (*it)->mountpoint : "(null)");
e4aeecf5 1082 printf(" controllers:\n");
a7b0cc4c 1083 for (j = 0, cit = (*it)->controllers; cit && *cit; cit++, j++)
e4aeecf5 1084 printf(" %d: %s\n", j, *cit);
ccb4cabe
SH
1085 }
1086}
41c33dbe 1087
a3926f6a
CB
1088static void lxc_cgfsng_print_basecg_debuginfo(char *basecginfo, char **klist,
1089 char **nlist)
41c33dbe
SH
1090{
1091 int k;
a7b0cc4c 1092 char **it;
41c33dbe 1093
a7b0cc4c
CB
1094 printf("basecginfo is:\n");
1095 printf("%s\n", basecginfo);
41c33dbe 1096
a7b0cc4c
CB
1097 for (k = 0, it = klist; it && *it; it++, k++)
1098 printf("kernel subsystem %d: %s\n", k, *it);
1099 for (k = 0, it = nlist; it && *it; it++, k++)
1100 printf("named subsystem %d: %s\n", k, *it);
41c33dbe 1101}
ccb4cabe 1102
e4aeecf5
CB
1103static void lxc_cgfsng_print_debuginfo(const struct cgfsng_handler_data *d)
1104{
1105 lxc_cgfsng_print_handler_data(d);
1106 lxc_cgfsng_print_hierarchies();
1107}
1108
ccb4cabe
SH
1109/*
1110 * At startup, parse_hierarchies finds all the info we need about
1111 * cgroup mountpoints and current cgroups, and stores it in @d.
1112 */
a3926f6a 1113static bool cg_hybrid_init(void)
ccb4cabe 1114{
d6337a5f
CB
1115 int ret;
1116 char *basecginfo;
1117 bool will_escape;
ccb4cabe 1118 FILE *f;
ccb4cabe 1119 size_t len = 0;
d6337a5f
CB
1120 char *line = NULL;
1121 char **klist = NULL, **nlist = NULL;
ccb4cabe 1122
d30ec4cb
SH
1123 /*
1124 * Root spawned containers escape the current cgroup, so use init's
1125 * cgroups as our base in that case.
1126 */
d6337a5f
CB
1127 will_escape = (geteuid() == 0);
1128 if (will_escape)
ccb4cabe 1129 basecginfo = read_file("/proc/1/cgroup");
d6337a5f
CB
1130 else
1131 basecginfo = read_file("/proc/self/cgroup");
ccb4cabe
SH
1132 if (!basecginfo)
1133 return false;
1134
d6337a5f
CB
1135 ret = get_existing_subsystems(&klist, &nlist);
1136 if (ret < 0) {
1137 CGFSNG_DEBUG("Failed to retrieve available cgroup v1 controllers\n");
1138 free(basecginfo);
ccb4cabe
SH
1139 return false;
1140 }
1141
d6337a5f
CB
1142 f = fopen("/proc/self/mountinfo", "r");
1143 if (!f) {
1144 CGFSNG_DEBUG("Failed to open \"/proc/self/mountinfo\"\n");
bd01b7d5 1145 free(basecginfo);
d6337a5f
CB
1146 return false;
1147 }
41c33dbe 1148
e4aeecf5
CB
1149 if (lxc_cgfsng_debug)
1150 lxc_cgfsng_print_basecg_debuginfo(basecginfo, klist, nlist);
ccb4cabe 1151
ccb4cabe 1152 while (getline(&line, &len, f) != -1) {
49ff3958 1153 int type;
d6337a5f
CB
1154 bool writeable;
1155 struct hierarchy *new;
1156 char *mountpoint = NULL, *base_cgroup = NULL;
1157 char **controller_list = NULL;
ccb4cabe 1158
49ff3958 1159 type = get_cgroup_version(line);
d6337a5f 1160 if (type == 0)
ccb4cabe
SH
1161 continue;
1162
d6337a5f 1163 if (type == CGROUP2_SUPER_MAGIC && unified)
ccb4cabe
SH
1164 continue;
1165
d6337a5f
CB
1166 if (cgroup_layout == CGROUP_LAYOUT_UNKNOWN) {
1167 if (type == CGROUP2_SUPER_MAGIC)
1168 cgroup_layout = CGROUP_LAYOUT_UNIFIED;
1169 else if (type == CGROUP_SUPER_MAGIC)
1170 cgroup_layout = CGROUP_LAYOUT_LEGACY;
1171 } else if (cgroup_layout == CGROUP_LAYOUT_UNIFIED) {
1172 if (type == CGROUP_SUPER_MAGIC)
1173 cgroup_layout = CGROUP_LAYOUT_HYBRID;
1174 } else if (cgroup_layout == CGROUP_LAYOUT_LEGACY) {
1175 if (type == CGROUP2_SUPER_MAGIC)
1176 cgroup_layout = CGROUP_LAYOUT_HYBRID;
ccb4cabe
SH
1177 }
1178
a3926f6a 1179 controller_list = cg_hybrid_get_controllers(klist, nlist, line, type);
d6337a5f
CB
1180 if (!controller_list && type == CGROUP_SUPER_MAGIC)
1181 continue;
1182
1183 if (type == CGROUP_SUPER_MAGIC)
1184 if (controller_list_is_dup(hierarchies, controller_list))
1185 goto next;
1186
a3926f6a 1187 mountpoint = cg_hybrid_get_mountpoint(line);
ccb4cabe 1188 if (!mountpoint) {
65d78313 1189 CGFSNG_DEBUG("Failed parsing mountpoint from \"%s\"\n", line);
d6337a5f 1190 goto next;
ccb4cabe
SH
1191 }
1192
d6337a5f 1193 if (type == CGROUP_SUPER_MAGIC)
a3926f6a 1194 base_cgroup = cg_hybrid_get_current_cgroup(basecginfo, controller_list[0], CGROUP_SUPER_MAGIC);
d6337a5f 1195 else
a3926f6a 1196 base_cgroup = cg_hybrid_get_current_cgroup(basecginfo, NULL, CGROUP2_SUPER_MAGIC);
ccb4cabe 1197 if (!base_cgroup) {
d6337a5f
CB
1198 CGFSNG_DEBUG("Failed to find current cgroup\n");
1199 goto next;
ccb4cabe 1200 }
6328fd9c 1201
ccb4cabe
SH
1202 trim(base_cgroup);
1203 prune_init_scope(base_cgroup);
d6337a5f 1204 if (type == CGROUP2_SUPER_MAGIC)
6328fd9c
CB
1205 writeable = test_writeable_v2(mountpoint, base_cgroup);
1206 else
1207 writeable = test_writeable_v1(mountpoint, base_cgroup);
d6337a5f
CB
1208 if (!writeable)
1209 goto next;
1210
1211 if (type == CGROUP2_SUPER_MAGIC) {
1212 char *cgv2_ctrl_path;
1213
1214 cgv2_ctrl_path = must_make_path(mountpoint, base_cgroup,
1215 "cgroup.controllers",
1216 NULL);
1217
1218 controller_list = cg_unified_get_controllers(cgv2_ctrl_path);
1219 free(cgv2_ctrl_path);
1220 if (!controller_list)
1221 controller_list = cg_unified_make_empty_controller();
ccb4cabe 1222 }
d6337a5f
CB
1223 new = add_hierarchy(controller_list, mountpoint, base_cgroup, type);
1224 if (type == CGROUP2_SUPER_MAGIC && !unified)
1225 unified = new;
1226
1227 continue;
1228
1229 next:
1230 free_string_list(controller_list);
1231 free(mountpoint);
1232 free(base_cgroup);
ccb4cabe
SH
1233 }
1234
1235 free_string_list(klist);
1236 free_string_list(nlist);
1237
1238 free(basecginfo);
1239
1240 fclose(f);
1241 free(line);
1242
e4aeecf5
CB
1243 if (lxc_cgfsng_debug) {
1244 printf("writeable subsystems:\n");
1245 lxc_cgfsng_print_hierarchies();
1246 }
1247
ccb4cabe
SH
1248 /* verify that all controllers in cgroup.use and all crucial
1249 * controllers are accounted for
1250 */
c2712f64 1251 if (!all_controllers_found())
ccb4cabe
SH
1252 return false;
1253
1254 return true;
1255}
1256
d6337a5f
CB
1257static int cg_is_pure_unified(void) {
1258
1259 int ret;
1260 struct statfs fs;
1261
1262 ret = statfs("/sys/fs/cgroup", &fs);
1263 if (ret < 0)
1264 return -ENOMEDIUM;
1265
1266 if (is_fs_type(&fs, CGROUP2_SUPER_MAGIC))
1267 return CGROUP2_SUPER_MAGIC;
1268
1269 return 0;
1270}
1271
1272/* Get current cgroup from /proc/self/cgroup for the cgroupfs v2 hierarchy. */
a3926f6a 1273static char *cg_unified_get_current_cgroup(void)
457ca9aa 1274{
d6337a5f
CB
1275 char *basecginfo;
1276 char *base_cgroup;
1277 bool will_escape;
1278 char *copy = NULL;
1279
1280 will_escape = (geteuid() == 0);
1281 if (will_escape)
1282 basecginfo = read_file("/proc/1/cgroup");
1283 else
1284 basecginfo = read_file("/proc/self/cgroup");
1285 if (!basecginfo)
1286 return NULL;
1287
1288 base_cgroup = strstr(basecginfo, "0::/");
1289 if (!base_cgroup)
1290 goto cleanup_on_err;
1291
1292 base_cgroup = base_cgroup + 3;
1293 copy = copy_to_eol(base_cgroup);
1294 if (!copy)
1295 goto cleanup_on_err;
1296
1297cleanup_on_err:
1298 free(basecginfo);
1299 if (copy)
1300 trim(copy);
1301
1302 return copy;
1303}
1304
a3926f6a 1305static int cg_unified_init(void)
d6337a5f
CB
1306{
1307 int ret;
1308 char *mountpoint, *subtree_path;
1309 char **delegatable;
1310 char *base_cgroup = NULL;
1311
1312 ret = cg_is_pure_unified();
1313 if (ret == -ENOMEDIUM)
1314 return -ENOMEDIUM;
1315
1316 if (ret != CGROUP2_SUPER_MAGIC)
1317 return 0;
1318
a3926f6a 1319 base_cgroup = cg_unified_get_current_cgroup();
d6337a5f
CB
1320 if (!base_cgroup)
1321 return -EINVAL;
1322 prune_init_scope(base_cgroup);
1323
1324 /* We assume that we have already been given controllers to delegate
1325 * further down the hierarchy. If not it is up to the user to delegate
1326 * them to us.
1327 */
1328 mountpoint = must_copy_string("/sys/fs/cgroup");
1329 subtree_path = must_make_path(mountpoint, base_cgroup,
1330 "cgroup.subtree_control", NULL);
1331 delegatable = cg_unified_get_controllers(subtree_path);
1332 free(subtree_path);
1333 if (!delegatable)
1334 delegatable = cg_unified_make_empty_controller();
1335 if (!delegatable[0])
1336 CGFSNG_DEBUG("No controllers are enabled for delegation\n");
1337
1338 /* TODO: If the user requested specific controllers via lxc.cgroup.use
1339 * we should verify here. The reason I'm not doing it right is that I'm
1340 * not convinced that lxc.cgroup.use will be the future since it is a
1341 * global property. I much rather have an option that lets you request
1342 * controllers per container.
1343 */
1344
1345 add_hierarchy(delegatable, mountpoint, base_cgroup, CGROUP2_SUPER_MAGIC);
1346 unified = hierarchies[0];
1347
1348 cgroup_layout = CGROUP_LAYOUT_UNIFIED;
1349 return CGROUP2_SUPER_MAGIC;
1350}
1351
1352static bool cg_init(void)
1353{
1354 int ret;
457ca9aa 1355 const char *tmp;
d6337a5f 1356
457ca9aa
SH
1357 errno = 0;
1358 tmp = lxc_global_config_value("lxc.cgroup.use");
1a0e70ac 1359 if (!cgroup_use && errno != 0) { /* lxc.cgroup.use can be NULL */
65d78313 1360 CGFSNG_DEBUG("Failed to retrieve list of cgroups to use\n");
457ca9aa
SH
1361 return false;
1362 }
1363 cgroup_use = must_copy_string(tmp);
1364
a3926f6a 1365 ret = cg_unified_init();
d6337a5f
CB
1366 if (ret < 0)
1367 return false;
1368
1369 if (ret == CGROUP2_SUPER_MAGIC)
1370 return true;
1371
a3926f6a 1372 return cg_hybrid_init();
457ca9aa
SH
1373}
1374
43654d34 1375static void *cgfsng_init(struct lxc_handler *handler)
ccb4cabe 1376{
457ca9aa 1377 const char *cgroup_pattern;
43654d34 1378 struct cgfsng_handler_data *d;
ccb4cabe
SH
1379
1380 d = must_alloc(sizeof(*d));
1381 memset(d, 0, sizeof(*d));
1382
43654d34
CB
1383 /* copy container name */
1384 d->name = must_copy_string(handler->name);
1385
1386 /* copy per-container cgroup information */
ae5e6c08
CB
1387 d->cgroup_meta.dir = NULL;
1388 d->cgroup_meta.controllers = NULL;
9b5396f9
CB
1389 if (handler->conf) {
1390 d->cgroup_meta.dir = must_copy_string(handler->conf->cgroup_meta.dir);
1391 d->cgroup_meta.controllers = must_copy_string(handler->conf->cgroup_meta.controllers);
1392 }
ccb4cabe 1393
43654d34 1394 /* copy system-wide cgroup information */
ccb4cabe 1395 cgroup_pattern = lxc_global_config_value("lxc.cgroup.pattern");
43654d34
CB
1396 if (!cgroup_pattern) {
1397 /* lxc.cgroup.pattern is only NULL on error. */
ccb4cabe
SH
1398 ERROR("Error getting cgroup pattern");
1399 goto out_free;
1400 }
1401 d->cgroup_pattern = must_copy_string(cgroup_pattern);
1402
d6337a5f
CB
1403 d->cgroup_layout = cgroup_layout;
1404 if (d->cgroup_layout == CGROUP_LAYOUT_LEGACY)
1405 TRACE("Running with legacy cgroup layout");
1406 else if (d->cgroup_layout == CGROUP_LAYOUT_HYBRID)
1407 TRACE("Running with hybrid cgroup layout");
1408 else if (d->cgroup_layout == CGROUP_LAYOUT_UNIFIED)
1409 TRACE("Running with unified cgroup layout");
1410 else
1411 WARN("Running with unknown cgroup layout");
1412
e4aeecf5
CB
1413 if (lxc_cgfsng_debug)
1414 lxc_cgfsng_print_debuginfo(d);
ccb4cabe
SH
1415
1416 return d;
1417
1418out_free:
1419 free_handler_data(d);
1420 return NULL;
1421}
1422
bd8ef4e4 1423static int recursive_destroy(char *dirname)
ccb4cabe 1424{
a17f8b3f 1425 int ret;
74f96976 1426 struct dirent *direntp;
ccb4cabe
SH
1427 DIR *dir;
1428 int r = 0;
1429
1430 dir = opendir(dirname);
1431 if (!dir)
1432 return -1;
1433
74f96976 1434 while ((direntp = readdir(dir))) {
ccb4cabe 1435 char *pathname;
a17f8b3f 1436 struct stat mystat;
ccb4cabe 1437
ccb4cabe
SH
1438 if (!strcmp(direntp->d_name, ".") ||
1439 !strcmp(direntp->d_name, ".."))
1440 continue;
1441
1442 pathname = must_make_path(dirname, direntp->d_name, NULL);
1443
a17f8b3f
CB
1444 ret = lstat(pathname, &mystat);
1445 if (ret < 0) {
ccb4cabe 1446 if (!r)
a17f8b3f 1447 WARN("Failed to stat %s", pathname);
ccb4cabe
SH
1448 r = -1;
1449 goto next;
1450 }
1451
1452 if (!S_ISDIR(mystat.st_mode))
1453 goto next;
a17f8b3f 1454
bd8ef4e4 1455 ret = recursive_destroy(pathname);
a17f8b3f 1456 if (ret < 0)
ccb4cabe 1457 r = -1;
bd8ef4e4 1458 next:
ccb4cabe
SH
1459 free(pathname);
1460 }
1461
a17f8b3f
CB
1462 ret = rmdir(dirname);
1463 if (ret < 0) {
ccb4cabe 1464 if (!r)
bd8ef4e4
CB
1465 WARN("%s - Failed to delete \"%s\"", strerror(errno),
1466 dirname);
ccb4cabe
SH
1467 r = -1;
1468 }
1469
a17f8b3f
CB
1470 ret = closedir(dir);
1471 if (ret < 0) {
ccb4cabe 1472 if (!r)
bd8ef4e4
CB
1473 WARN("%s - Failed to delete \"%s\"", strerror(errno),
1474 dirname);
ccb4cabe
SH
1475 r = -1;
1476 }
a17f8b3f 1477
ccb4cabe
SH
1478 return r;
1479}
1480
bd8ef4e4
CB
1481static int cgroup_rmdir(char *container_cgroup)
1482{
1483 int i;
1484
1485 if (!container_cgroup || !hierarchies)
1486 return 0;
1487
1488 for (i = 0; hierarchies[i]; i++) {
1489 int ret;
1490 struct hierarchy *h = hierarchies[i];
1491
1492 if (!h->fullcgpath)
1493 continue;
1494
1495 ret = recursive_destroy(h->fullcgpath);
1496 if (ret < 0)
1497 WARN("Failed to destroy \"%s\"", h->fullcgpath);
1498
1499 free(h->fullcgpath);
1500 h->fullcgpath = NULL;
1501 }
1502
1503 return 0;
1504}
1505
4160c3a0
CB
1506struct generic_userns_exec_data {
1507 struct cgfsng_handler_data *d;
1508 struct lxc_conf *conf;
1509 uid_t origuid; /* target uid in parent namespace */
1510 char *path;
1511};
1512
bd8ef4e4 1513static int cgroup_rmdir_wrapper(void *data)
ccb4cabe 1514{
6efacf80 1515 int ret;
4160c3a0
CB
1516 struct generic_userns_exec_data *arg = data;
1517 uid_t nsuid = (arg->conf->root_nsuid_map != NULL) ? 0 : arg->conf->init_uid;
1518 gid_t nsgid = (arg->conf->root_nsgid_map != NULL) ? 0 : arg->conf->init_gid;
ccb4cabe 1519
6efacf80
CB
1520 ret = setresgid(nsgid, nsgid, nsgid);
1521 if (ret < 0) {
1522 SYSERROR("Failed to setresgid(%d, %d, %d)", (int)nsgid,
1523 (int)nsgid, (int)nsgid);
1524 return -1;
1525 }
1526
1527 ret = setresuid(nsuid, nsuid, nsuid);
1528 if (ret < 0) {
1529 SYSERROR("Failed to setresuid(%d, %d, %d)", (int)nsuid,
1530 (int)nsuid, (int)nsuid);
1531 return -1;
1532 }
1533
1534 ret = setgroups(0, NULL);
1535 if (ret < 0 && errno != EPERM) {
1536 SYSERROR("Failed to setgroups(0, NULL)");
1537 return -1;
1538 }
ccb4cabe 1539
bd8ef4e4 1540 return cgroup_rmdir(arg->d->container_cgroup);
ccb4cabe
SH
1541}
1542
bd8ef4e4 1543static void cgfsng_destroy(void *hdata, struct lxc_conf *conf)
ccb4cabe 1544{
bd8ef4e4
CB
1545 int ret;
1546 struct cgfsng_handler_data *d = hdata;
4160c3a0
CB
1547 struct generic_userns_exec_data wrap;
1548
bd8ef4e4
CB
1549 if (!d)
1550 return;
1551
4160c3a0 1552 wrap.origuid = 0;
bd8ef4e4 1553 wrap.d = hdata;
4160c3a0
CB
1554 wrap.conf = conf;
1555
ccb4cabe 1556 if (conf && !lxc_list_empty(&conf->id_map))
bd8ef4e4
CB
1557 ret = userns_exec_1(conf, cgroup_rmdir_wrapper, &wrap,
1558 "cgroup_rmdir_wrapper");
ccb4cabe 1559 else
bd8ef4e4
CB
1560 ret = cgroup_rmdir(d->container_cgroup);
1561 if (ret < 0) {
1562 WARN("Failed to destroy cgroups");
ccb4cabe 1563 return;
ccb4cabe
SH
1564 }
1565
1566 free_handler_data(d);
1567}
1568
1569struct cgroup_ops *cgfsng_ops_init(void)
1570{
e4aeecf5
CB
1571 if (getenv("LXC_DEBUG_CGFSNG"))
1572 lxc_cgfsng_debug = true;
1573
d6337a5f 1574 if (!cg_init())
457ca9aa 1575 return NULL;
e4aeecf5 1576
ccb4cabe
SH
1577 return &cgfsng_ops;
1578}
1579
a3926f6a 1580static bool cg_unified_create_cgroup(struct hierarchy *h, char *cgname)
0c3deb94
CB
1581{
1582 char **it;
1583 size_t i, parts_len;
1584 size_t full_len = 0;
1585 char *add_controllers = NULL, *cgroup = NULL;
1586 char **parts = NULL;
1587 bool bret = false;
1588
1589 if (h->version != CGROUP2_SUPER_MAGIC)
1590 return true;
1591
1592 if (!h->controllers)
1593 return true;
1594
1595 /* For now we simply enable all controllers that we have detected by
1596 * creating a string like "+memory +pids +cpu +io".
1597 * TODO: In the near future we might want to support "-<controller>"
1598 * etc. but whether supporting semantics like this make sense will need
1599 * some thinking.
1600 */
1601 for (it = h->controllers; it && *it; it++) {
1602 full_len += strlen(*it) + 2;
1603 add_controllers = must_realloc(add_controllers, full_len + 1);
1604 if (h->controllers[0] == *it)
1605 add_controllers[0] = '\0';
1606 strcat(add_controllers, "+");
1607 strcat(add_controllers, *it);
1608 if ((it + 1) && *(it + 1))
1609 strcat(add_controllers, " ");
1610 }
1611
1612 parts = lxc_string_split(cgname, '/');
1613 if (!parts)
1614 goto on_error;
1615 parts_len = lxc_array_len((void **)parts);
1616 if (parts_len > 0)
1617 parts_len--;
1618
1619 cgroup = must_make_path(h->mountpoint, h->base_cgroup, NULL);
1620 for (i = 0; i < parts_len; i++) {
1621 int ret;
1622 char *target;
1623
1624 cgroup = must_append_path(cgroup, parts[i], NULL);
1625 target = must_make_path(cgroup, "cgroup.subtree_control", NULL);
1626 ret = lxc_write_to_file(target, add_controllers, full_len, false);
1627 free(target);
1628 if (ret < 0) {
1629 SYSERROR("Could not enable \"%s\" controllers in the "
1630 "unified cgroup \"%s\"", add_controllers, cgroup);
1631 goto on_error;
1632 }
1633 }
1634
1635 bret = true;
1636
1637on_error:
1638 lxc_free_array((void **)parts, free);
1639 free(add_controllers);
1640 free(cgroup);
1641 return bret;
1642}
1643
ccb4cabe
SH
1644static bool create_path_for_hierarchy(struct hierarchy *h, char *cgname)
1645{
0c3deb94
CB
1646 int ret;
1647
e3a3fecf 1648 h->fullcgpath = must_make_path(h->mountpoint, h->base_cgroup, cgname, NULL);
1a0e70ac 1649 if (dir_exists(h->fullcgpath)) { /* it must not already exist */
0c3deb94 1650 ERROR("cgroup \"%s\" already existed", h->fullcgpath);
d8da679e 1651 return false;
6f9584d8 1652 }
0c3deb94 1653
a3926f6a 1654 if (!cg_legacy_handle_cpuset_hierarchy(h, cgname)) {
0c3deb94
CB
1655 ERROR("Failed to handle cgroupfs v1 cpuset controller");
1656 return false;
1657 }
1658
1659 ret = mkdir_p(h->fullcgpath, 0755);
1660 if (ret < 0) {
1661 ERROR("Failed to create cgroup \"%s\"", h->fullcgpath);
e3a3fecf 1662 return false;
6f9584d8 1663 }
0c3deb94 1664
a3926f6a 1665 return cg_unified_create_cgroup(h, cgname);
ccb4cabe
SH
1666}
1667
1668static void remove_path_for_hierarchy(struct hierarchy *h, char *cgname)
1669{
1670 if (rmdir(h->fullcgpath) < 0)
1671 SYSERROR("Failed to clean up cgroup %s from failed creation attempt", h->fullcgpath);
1672 free(h->fullcgpath);
1673 h->fullcgpath = NULL;
1674}
1675
1676/*
d30ec4cb 1677 * Try to create the same cgroup in all hierarchies.
ccb4cabe
SH
1678 * Start with cgroup_pattern; next cgroup_pattern-1, -2, ..., -999
1679 */
1680static inline bool cgfsng_create(void *hdata)
1681{
bb30b52a 1682 int i;
ccb4cabe 1683 size_t len;
0c3deb94 1684 char *container_cgroup, *offset, *tmp;
7d531e9b
CB
1685 int idx = 0;
1686 struct cgfsng_handler_data *d = hdata;
ccb4cabe
SH
1687
1688 if (!d)
1689 return false;
43654d34 1690
ccb4cabe
SH
1691 if (d->container_cgroup) {
1692 WARN("cgfsng_create called a second time");
1693 return false;
1694 }
1695
43654d34 1696 if (d->cgroup_meta.dir)
7d531e9b 1697 tmp = lxc_string_join("/", (const char *[]){d->cgroup_meta.dir, d->name, NULL}, false);
43654d34
CB
1698 else
1699 tmp = lxc_string_replace("%n", d->name, d->cgroup_pattern);
ccb4cabe
SH
1700 if (!tmp) {
1701 ERROR("Failed expanding cgroup name pattern");
1702 return false;
1703 }
1a0e70ac 1704 len = strlen(tmp) + 5; /* leave room for -NNN\0 */
0c3deb94
CB
1705 container_cgroup = must_alloc(len);
1706 strcpy(container_cgroup, tmp);
ccb4cabe 1707 free(tmp);
0c3deb94 1708 offset = container_cgroup + len - 5;
ccb4cabe
SH
1709
1710again:
95adfe93
SH
1711 if (idx == 1000) {
1712 ERROR("Too many conflicting cgroup names");
ccb4cabe 1713 goto out_free;
95adfe93 1714 }
66b66624 1715 if (idx) {
bb30b52a
CB
1716 int ret;
1717
66b66624
CB
1718 ret = snprintf(offset, 5, "-%d", idx);
1719 if (ret < 0 || (size_t)ret >= 5) {
1720 FILE *f = fopen("/dev/null", "w");
97ebced3 1721 if (f) {
66b66624
CB
1722 fprintf(f, "Workaround for GCC7 bug: "
1723 "https://gcc.gnu.org/bugzilla/"
1724 "show_bug.cgi?id=78969");
1725 fclose(f);
1726 }
1727 }
1728 }
457ca9aa 1729 for (i = 0; hierarchies[i]; i++) {
0c3deb94 1730 if (!create_path_for_hierarchy(hierarchies[i], container_cgroup)) {
ccb4cabe 1731 int j;
1a0e70ac 1732 ERROR("Failed to create \"%s\"", hierarchies[i]->fullcgpath);
457ca9aa
SH
1733 free(hierarchies[i]->fullcgpath);
1734 hierarchies[i]->fullcgpath = NULL;
ccb4cabe 1735 for (j = 0; j < i; j++)
0c3deb94 1736 remove_path_for_hierarchy(hierarchies[j], container_cgroup);
ccb4cabe
SH
1737 idx++;
1738 goto again;
1739 }
1740 }
1741 /* Done */
0c3deb94 1742 d->container_cgroup = container_cgroup;
ccb4cabe
SH
1743 return true;
1744
1745out_free:
0c3deb94 1746 free(container_cgroup);
ccb4cabe
SH
1747 return false;
1748}
1749
ccb4cabe
SH
1750static bool cgfsng_enter(void *hdata, pid_t pid)
1751{
ccb4cabe
SH
1752 char pidstr[25];
1753 int i, len;
1754
1755 len = snprintf(pidstr, 25, "%d", pid);
1756 if (len < 0 || len > 25)
1757 return false;
1758
457ca9aa
SH
1759 for (i = 0; hierarchies[i]; i++) {
1760 char *fullpath = must_make_path(hierarchies[i]->fullcgpath,
ccb4cabe
SH
1761 "cgroup.procs", NULL);
1762 if (lxc_write_to_file(fullpath, pidstr, len, false) != 0) {
d3b00a8f 1763 SYSERROR("Failed to enter %s", fullpath);
ccb4cabe
SH
1764 free(fullpath);
1765 return false;
1766 }
1767 free(fullpath);
1768 }
1769
1770 return true;
1771}
1772
6efacf80
CB
1773static int chowmod(char *path, uid_t chown_uid, gid_t chown_gid,
1774 mode_t chmod_mode)
1775{
1776 int ret;
1777
1778 ret = chown(path, chown_uid, chown_gid);
1779 if (ret < 0) {
1780 WARN("%s - Failed to chown(%s, %d, %d)", strerror(errno), path,
1781 (int)chown_uid, (int)chown_gid);
1782 return -1;
1783 }
1784
1785 ret = chmod(path, chmod_mode);
1786 if (ret < 0) {
1787 WARN("%s - Failed to chmod(%s, %d)", strerror(errno), path,
1788 (int)chmod_mode);
1789 return -1;
1790 }
1791
1792 return 0;
1793}
1794
1795/* chgrp the container cgroups to container group. We leave
c0888dfe
SH
1796 * the container owner as cgroup owner. So we must make the
1797 * directories 775 so that the container can create sub-cgroups.
43647298
SH
1798 *
1799 * Also chown the tasks and cgroup.procs files. Those may not
1800 * exist depending on kernel version.
c0888dfe 1801 */
ccb4cabe
SH
1802static int chown_cgroup_wrapper(void *data)
1803{
6efacf80 1804 int i, ret;
4160c3a0
CB
1805 uid_t destuid;
1806 struct generic_userns_exec_data *arg = data;
1807 uid_t nsuid = (arg->conf->root_nsuid_map != NULL) ? 0 : arg->conf->init_uid;
1808 gid_t nsgid = (arg->conf->root_nsgid_map != NULL) ? 0 : arg->conf->init_gid;
ccb4cabe 1809
6efacf80
CB
1810 ret = setresgid(nsgid, nsgid, nsgid);
1811 if (ret < 0) {
1812 SYSERROR("Failed to setresgid(%d, %d, %d)",
1813 (int)nsgid, (int)nsgid, (int)nsgid);
1814 return -1;
1815 }
1816
1817 ret = setresuid(nsuid, nsuid, nsuid);
1818 if (ret < 0) {
1819 SYSERROR("Failed to setresuid(%d, %d, %d)",
1820 (int)nsuid, (int)nsuid, (int)nsuid);
1821 return -1;
1822 }
1823
1824 ret = setgroups(0, NULL);
1825 if (ret < 0 && errno != EPERM) {
1826 SYSERROR("Failed to setgroups(0, NULL)");
1827 return -1;
1828 }
ccb4cabe
SH
1829
1830 destuid = get_ns_uid(arg->origuid);
1831
457ca9aa 1832 for (i = 0; hierarchies[i]; i++) {
6efacf80
CB
1833 char *fullpath;
1834 char *path = hierarchies[i]->fullcgpath;
43647298 1835
63e42fee 1836 ret = chowmod(path, destuid, nsgid, 0775);
6efacf80 1837 if (ret < 0)
ccb4cabe 1838 return -1;
c0888dfe 1839
6efacf80
CB
1840 /* Failures to chown() these are inconvenient but not
1841 * detrimental We leave these owned by the container launcher,
1842 * so that container root can write to the files to attach. We
1843 * chmod() them 664 so that container systemd can write to the
1844 * files (which systemd in wily insists on doing).
ab8f5424 1845 */
6efacf80
CB
1846
1847 if (hierarchies[i]->version == CGROUP_SUPER_MAGIC) {
1848 fullpath = must_make_path(path, "tasks", NULL);
1849 (void)chowmod(fullpath, destuid, nsgid, 0664);
1850 free(fullpath);
1851 }
43647298
SH
1852
1853 fullpath = must_make_path(path, "cgroup.procs", NULL);
6efacf80 1854 (void)chowmod(fullpath, destuid, 0, 0664);
ccb4cabe 1855 free(fullpath);
0e17357c 1856
d6337a5f 1857 if (hierarchies[i]->version != CGROUP2_SUPER_MAGIC)
0e17357c
CB
1858 continue;
1859
1860 fullpath = must_make_path(path, "cgroup.subtree_control", NULL);
6efacf80 1861 (void)chowmod(fullpath, destuid, nsgid, 0664);
0e17357c
CB
1862 free(fullpath);
1863
1864 fullpath = must_make_path(path, "cgroup.threads", NULL);
6efacf80 1865 (void)chowmod(fullpath, destuid, nsgid, 0664);
0e17357c 1866 free(fullpath);
ccb4cabe
SH
1867 }
1868
1869 return 0;
1870}
1871
058c1cb6 1872static bool cgfsng_chown(void *hdata, struct lxc_conf *conf)
ccb4cabe
SH
1873{
1874 struct cgfsng_handler_data *d = hdata;
4160c3a0 1875 struct generic_userns_exec_data wrap;
ccb4cabe
SH
1876
1877 if (!d)
1878 return false;
1879
1880 if (lxc_list_empty(&conf->id_map))
1881 return true;
1882
ccb4cabe 1883 wrap.origuid = geteuid();
4160c3a0
CB
1884 wrap.path = NULL;
1885 wrap.d = d;
1886 wrap.conf = conf;
ccb4cabe 1887
c9b7c33e
CB
1888 if (userns_exec_1(conf, chown_cgroup_wrapper, &wrap,
1889 "chown_cgroup_wrapper") < 0) {
ccb4cabe
SH
1890 ERROR("Error requesting cgroup chown in new namespace");
1891 return false;
1892 }
1893
1894 return true;
1895}
1896
8aa1044f
SH
1897/*
1898 * We've safe-mounted a tmpfs as parent, so we don't need to protect against
1899 * symlinks any more - just use mount
1900 */
1901
1902/* mount cgroup-full if requested */
1903static int mount_cgroup_full(int type, struct hierarchy *h, char *dest,
a3926f6a 1904 char *container_cgroup)
8aa1044f
SH
1905{
1906 if (type < LXC_AUTO_CGROUP_FULL_RO || type > LXC_AUTO_CGROUP_FULL_MIXED)
1907 return 0;
1908 if (mount(h->mountpoint, dest, "cgroup", MS_BIND, NULL) < 0) {
1909 SYSERROR("Error bind-mounting %s cgroup onto %s", h->mountpoint,
1910 dest);
1911 return -1;
1912 }
1913 if (type != LXC_AUTO_CGROUP_FULL_RW) {
5b6f9369
SH
1914 unsigned long flags = MS_BIND | MS_NOSUID | MS_NOEXEC | MS_NODEV |
1915 MS_REMOUNT | MS_RDONLY;
1916 if (mount(NULL, dest, "cgroup", flags, NULL) < 0) {
8aa1044f
SH
1917 SYSERROR("Error remounting %s readonly", dest);
1918 return -1;
1919 }
1920 }
1921
1922 INFO("Bind mounted %s onto %s", h->mountpoint, dest);
1923 if (type != LXC_AUTO_CGROUP_FULL_MIXED)
1924 return 0;
1925
1926 /* mount just the container path rw */
1927 char *source = must_make_path(h->mountpoint, h->base_cgroup, container_cgroup, NULL);
5b6f9369 1928 char *rwpath = must_make_path(dest, h->base_cgroup, container_cgroup, NULL);
8aa1044f 1929 if (mount(source, rwpath, "cgroup", MS_BIND, NULL) < 0)
13277ec4 1930 WARN("Failed to mount %s read-write: %s", rwpath,
1931 strerror(errno));
8aa1044f
SH
1932 INFO("Made %s read-write", rwpath);
1933 free(rwpath);
1934 free(source);
1935 return 0;
1936}
1937
1938/* cgroup-full:* is done, no need to create subdirs */
1939static bool cg_mount_needs_subdirs(int type)
1940{
1941 if (type >= LXC_AUTO_CGROUP_FULL_RO)
1942 return false;
a3926f6a 1943
8aa1044f
SH
1944 return true;
1945}
1946
886cac86
CB
1947/* After $rootfs/sys/fs/container/controller/the/cg/path has been created,
1948 * remount controller ro if needed and bindmount the cgroupfs onto
1949 * controll/the/cg/path.
8aa1044f 1950 */
a3926f6a
CB
1951static int do_secondstage_mounts_if_needed(int type, struct hierarchy *h,
1952 char *controllerpath, char *cgpath,
1953 const char *container_cgroup)
8aa1044f 1954{
5285689c 1955 int ret, remount_flags;
886cac86
CB
1956 char *sourcepath;
1957 int flags = MS_BIND;
1958
8aa1044f 1959 if (type == LXC_AUTO_CGROUP_RO || type == LXC_AUTO_CGROUP_MIXED) {
886cac86
CB
1960 ret = mount(controllerpath, controllerpath, "cgroup", MS_BIND, NULL);
1961 if (ret < 0) {
1962 SYSERROR("Failed to bind mount \"%s\" onto \"%s\"",
1963 controllerpath, controllerpath);
8aa1044f
SH
1964 return -1;
1965 }
886cac86 1966
5285689c
CB
1967 remount_flags = add_required_remount_flags(controllerpath,
1968 controllerpath,
1969 flags | MS_REMOUNT);
886cac86
CB
1970 ret = mount(controllerpath, controllerpath, "cgroup",
1971 MS_REMOUNT | MS_BIND | MS_RDONLY, NULL);
1972 if (ret < 0) {
1973 SYSERROR("Failed to remount \"%s\" ro", controllerpath);
8aa1044f
SH
1974 return -1;
1975 }
886cac86 1976
8aa1044f
SH
1977 INFO("Remounted %s read-only", controllerpath);
1978 }
886cac86
CB
1979
1980 sourcepath = must_make_path(h->mountpoint, h->base_cgroup,
1981 container_cgroup, NULL);
8aa1044f
SH
1982 if (type == LXC_AUTO_CGROUP_RO)
1983 flags |= MS_RDONLY;
886cac86
CB
1984
1985 ret = mount(sourcepath, cgpath, "cgroup", flags, NULL);
1986 if (ret < 0) {
1987 SYSERROR("Failed to mount \"%s\" onto \"%s\"", h->controllers[0], cgpath);
8aa1044f 1988 free(sourcepath);
8aa1044f
SH
1989 return -1;
1990 }
886cac86 1991 INFO("Mounted \"%s\" onto \"%s\"", h->controllers[0], cgpath);
f8c40ffa
L
1992
1993 if (flags & MS_RDONLY) {
5285689c
CB
1994 remount_flags = add_required_remount_flags(sourcepath, cgpath,
1995 flags | MS_REMOUNT);
1996 ret = mount(sourcepath, cgpath, "cgroup", remount_flags, NULL);
886cac86
CB
1997 if (ret < 0) {
1998 SYSERROR("Failed to remount \"%s\" ro", cgpath);
f8c40ffa 1999 free(sourcepath);
f8c40ffa
L
2000 return -1;
2001 }
5285689c 2002 INFO("Remounted %s read-only", cgpath);
f8c40ffa
L
2003 }
2004
8aa1044f 2005 free(sourcepath);
886cac86 2006 INFO("Completed second stage cgroup automounts for \"%s\"", cgpath);
8aa1044f
SH
2007 return 0;
2008}
2009
5285689c
CB
2010static int cg_mount_in_cgroup_namespace(int type, struct hierarchy *h,
2011 const char *controllerpath)
b635e92d
CB
2012{
2013 int ret;
2014 char *controllers = NULL;
a760603e
CB
2015 char *fstype = "cgroup2";
2016 unsigned long flags = 0;
b635e92d 2017
a760603e
CB
2018 flags |= MS_NOSUID;
2019 flags |= MS_NOEXEC;
2020 flags |= MS_NODEV;
2021 flags |= MS_RELATIME;
2022
2023 if (type == LXC_AUTO_CGROUP_RO || type == LXC_AUTO_CGROUP_FULL_RO)
2024 flags |= MS_RDONLY;
2025
d6337a5f 2026 if (h->version != CGROUP2_SUPER_MAGIC) {
a760603e
CB
2027 controllers = lxc_string_join(",", (const char **)h->controllers, false);
2028 if (!controllers)
2029 return -ENOMEM;
2030 fstype = "cgroup";
b635e92d
CB
2031 }
2032
a760603e 2033 ret = mount("cgroup", controllerpath, fstype, flags, controllers);
b635e92d
CB
2034 free(controllers);
2035 if (ret < 0) {
a760603e 2036 SYSERROR("Failed to mount %s with cgroup filesystem type %s", controllerpath, fstype);
b635e92d
CB
2037 return -1;
2038 }
2039
a760603e 2040 DEBUG("Mounted %s with cgroup filesystem type %s", controllerpath, fstype);
b635e92d
CB
2041 return 0;
2042}
2043
ccb4cabe
SH
2044static bool cgfsng_mount(void *hdata, const char *root, int type)
2045{
b635e92d 2046 int i;
8aa1044f
SH
2047 char *tmpfspath = NULL;
2048 bool retval = false;
b635e92d
CB
2049 struct lxc_handler *handler = hdata;
2050 struct cgfsng_handler_data *d = handler->cgroup_data;
2051 bool has_cgns = false, has_sys_admin = true;
8aa1044f
SH
2052
2053 if ((type & LXC_AUTO_CGROUP_MASK) == 0)
2054 return true;
2055
b635e92d
CB
2056 has_cgns = cgns_supported();
2057 if (!lxc_list_empty(&handler->conf->keepcaps))
2058 has_sys_admin = in_caplist(CAP_SYS_ADMIN, &handler->conf->keepcaps);
2059 else
2060 has_sys_admin = !in_caplist(CAP_SYS_ADMIN, &handler->conf->caps);
2061
2062 if (has_cgns && has_sys_admin)
ccb4cabe 2063 return true;
8aa1044f
SH
2064
2065 tmpfspath = must_make_path(root, "/sys/fs/cgroup", NULL);
2066
2067 if (type == LXC_AUTO_CGROUP_NOSPEC)
2068 type = LXC_AUTO_CGROUP_MIXED;
2069 else if (type == LXC_AUTO_CGROUP_FULL_NOSPEC)
2070 type = LXC_AUTO_CGROUP_FULL_MIXED;
2071
2072 /* Mount tmpfs */
2073 if (safe_mount("cgroup_root", tmpfspath, "tmpfs",
2074 MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_RELATIME,
2075 "size=10240k,mode=755",
2076 root) < 0)
2077 goto bad;
2078
457ca9aa 2079 for (i = 0; hierarchies[i]; i++) {
8aa1044f 2080 char *controllerpath, *path2;
457ca9aa 2081 struct hierarchy *h = hierarchies[i];
8aa1044f
SH
2082 char *controller = strrchr(h->mountpoint, '/');
2083 int r;
2084
2085 if (!controller)
2086 continue;
2087 controller++;
2088 controllerpath = must_make_path(tmpfspath, controller, NULL);
2089 if (dir_exists(controllerpath)) {
2090 free(controllerpath);
2091 continue;
2092 }
2093 if (mkdir(controllerpath, 0755) < 0) {
2094 SYSERROR("Error creating cgroup path: %s", controllerpath);
2095 free(controllerpath);
2096 goto bad;
2097 }
b635e92d
CB
2098
2099 if (has_cgns && !has_sys_admin) {
2100 /* If cgroup namespaces are supported but the container
2101 * will not have CAP_SYS_ADMIN after it has started we
2102 * need to mount the cgroups manually.
2103 */
5285689c 2104 r = cg_mount_in_cgroup_namespace(type, h, controllerpath);
b635e92d
CB
2105 free(controllerpath);
2106 if (r < 0)
2107 goto bad;
2108 continue;
2109 }
2110
8aa1044f
SH
2111 if (mount_cgroup_full(type, h, controllerpath, d->container_cgroup) < 0) {
2112 free(controllerpath);
2113 goto bad;
2114 }
2115 if (!cg_mount_needs_subdirs(type)) {
2116 free(controllerpath);
2117 continue;
2118 }
ef4413fa 2119 path2 = must_make_path(controllerpath, h->base_cgroup, d->container_cgroup, NULL);
8aa1044f
SH
2120 if (mkdir_p(path2, 0755) < 0) {
2121 free(controllerpath);
8e0c6620 2122 free(path2);
8aa1044f
SH
2123 goto bad;
2124 }
2f62fb00 2125
8aa1044f
SH
2126 r = do_secondstage_mounts_if_needed(type, h, controllerpath, path2,
2127 d->container_cgroup);
2128 free(controllerpath);
2129 free(path2);
2130 if (r < 0)
2131 goto bad;
2132 }
2133 retval = true;
2134
2135bad:
2136 free(tmpfspath);
2137 return retval;
ccb4cabe
SH
2138}
2139
2140static int recursive_count_nrtasks(char *dirname)
2141{
74f96976 2142 struct dirent *direntp;
ccb4cabe
SH
2143 DIR *dir;
2144 int count = 0, ret;
2145 char *path;
2146
2147 dir = opendir(dirname);
2148 if (!dir)
2149 return 0;
2150
74f96976 2151 while ((direntp = readdir(dir))) {
ccb4cabe
SH
2152 struct stat mystat;
2153
2154 if (!direntp)
2155 break;
2156
2157 if (!strcmp(direntp->d_name, ".") ||
2158 !strcmp(direntp->d_name, ".."))
2159 continue;
2160
2161 path = must_make_path(dirname, direntp->d_name, NULL);
2162
2163 if (lstat(path, &mystat))
2164 goto next;
2165
2166 if (!S_ISDIR(mystat.st_mode))
2167 goto next;
2168
2169 count += recursive_count_nrtasks(path);
2170next:
2171 free(path);
2172 }
2173
2174 path = must_make_path(dirname, "cgroup.procs", NULL);
2175 ret = lxc_count_file_lines(path);
2176 if (ret != -1)
2177 count += ret;
2178 free(path);
2179
2180 (void) closedir(dir);
2181
2182 return count;
2183}
2184
2185static int cgfsng_nrtasks(void *hdata) {
2186 struct cgfsng_handler_data *d = hdata;
2187 char *path;
2188 int count;
2189
457ca9aa 2190 if (!d || !d->container_cgroup || !hierarchies)
ccb4cabe 2191 return -1;
a3926f6a 2192
457ca9aa 2193 path = must_make_path(hierarchies[0]->fullcgpath, NULL);
ccb4cabe
SH
2194 count = recursive_count_nrtasks(path);
2195 free(path);
2196 return count;
2197}
2198
2199/* Only root needs to escape to the cgroup of its init */
7103fe6f 2200static bool cgfsng_escape()
ccb4cabe 2201{
ccb4cabe
SH
2202 int i;
2203
2204 if (geteuid())
2205 return true;
2206
457ca9aa
SH
2207 for (i = 0; hierarchies[i]; i++) {
2208 char *fullpath = must_make_path(hierarchies[i]->mountpoint,
2209 hierarchies[i]->base_cgroup,
ccb4cabe
SH
2210 "cgroup.procs", NULL);
2211 if (lxc_write_to_file(fullpath, "0", 2, false) != 0) {
d3b00a8f 2212 SYSERROR("Failed to escape to %s", fullpath);
ccb4cabe 2213 free(fullpath);
6df334d1 2214 return false;
ccb4cabe
SH
2215 }
2216 free(fullpath);
2217 }
2218
6df334d1 2219 return true;
ccb4cabe
SH
2220}
2221
36662416
TA
2222static int cgfsng_num_hierarchies(void)
2223{
2224 int i;
2225
2226 for (i = 0; hierarchies[i]; i++)
2227 ;
2228
2229 return i;
2230}
2231
2232static bool cgfsng_get_hierarchies(int n, char ***out)
2233{
2234 int i;
2235
2236 /* sanity check n */
6b38e644 2237 for (i = 0; i < n; i++)
36662416
TA
2238 if (!hierarchies[i])
2239 return false;
36662416
TA
2240
2241 *out = hierarchies[i]->controllers;
2242
2243 return true;
2244}
2245
ccb4cabe
SH
2246#define THAWED "THAWED"
2247#define THAWED_LEN (strlen(THAWED))
2248
d6337a5f
CB
2249/* TODO: If the unified cgroup hierarchy grows a freezer controller this needs
2250 * to be adapted.
2251 */
ccb4cabe
SH
2252static bool cgfsng_unfreeze(void *hdata)
2253{
d6337a5f 2254 int ret;
ccb4cabe 2255 char *fullpath;
d6337a5f 2256 struct hierarchy *h;
ccb4cabe 2257
d6337a5f 2258 h = get_hierarchy("freezer");
457ca9aa 2259 if (!h)
ccb4cabe 2260 return false;
d6337a5f 2261
ccb4cabe 2262 fullpath = must_make_path(h->fullcgpath, "freezer.state", NULL);
d6337a5f 2263 ret = lxc_write_to_file(fullpath, THAWED, THAWED_LEN, false);
ccb4cabe 2264 free(fullpath);
d6337a5f
CB
2265 if (ret < 0)
2266 return false;
2267
ccb4cabe
SH
2268 return true;
2269}
2270
2271static const char *cgfsng_get_cgroup(void *hdata, const char *subsystem)
2272{
d6337a5f
CB
2273 struct hierarchy *h;
2274
2275 h = get_hierarchy(subsystem);
ccb4cabe
SH
2276 if (!h)
2277 return NULL;
2278
371f834d
SH
2279 return h->fullcgpath ? h->fullcgpath + strlen(h->mountpoint) : NULL;
2280}
2281
2282/*
2283 * Given a cgroup path returned from lxc_cmd_get_cgroup_path, build a
2284 * full path, which must be freed by the caller.
2285 */
2286static char *build_full_cgpath_from_monitorpath(struct hierarchy *h,
2287 const char *inpath,
2288 const char *filename)
2289{
371f834d 2290 return must_make_path(h->mountpoint, inpath, filename, NULL);
ccb4cabe
SH
2291}
2292
c2aed66d
CB
2293/* Technically, we're always at a delegation boundary here. (This is especially
2294 * true when cgroup namespaces are available.) The reasoning is that in order
2295 * for us to have been able to start a container in the first place the root
2296 * cgroup must have been a leaf node. Now, either the container's init system
2297 * has populated the cgroup and kept it as a leaf node or it has created
2298 * subtrees. In the former case we will simply attach to the leaf node we
2299 * created when we started the container in the latter case we create our own
2300 * cgroup for the attaching process.
2301 */
a3926f6a
CB
2302static int __cg_unified_attach(const struct hierarchy *h, const char *name,
2303 const char *lxcpath, const char *pidstr,
2304 size_t pidstr_len, const char *controller)
c2aed66d
CB
2305{
2306 int ret;
2307 size_t len;
2308 int fret = -1, idx = 0;
2309 char *base_path = NULL, *container_cgroup = NULL, *full_path = NULL;
2310
2311 container_cgroup = lxc_cmd_get_cgroup_path(name, lxcpath, controller);
2312 /* not running */
2313 if (!container_cgroup)
2314 return 0;
2315
2316 base_path = must_make_path(h->mountpoint, container_cgroup, NULL);
2317 full_path = must_make_path(base_path, "cgroup.procs", NULL);
2318 /* cgroup is populated */
2319 ret = lxc_write_to_file(full_path, pidstr, pidstr_len, false);
2320 if (ret < 0 && errno != EBUSY)
2321 goto on_error;
2322
2323 if (ret == 0)
2324 goto on_success;
2325
2326 free(full_path);
2327
2328 len = strlen(base_path) + sizeof("/lxc-1000") - 1 +
2329 sizeof("/cgroup-procs") - 1;
2330 full_path = must_alloc(len + 1);
2331 do {
2332 if (idx)
2333 ret = snprintf(full_path, len + 1, "%s/lxc-%d",
2334 base_path, idx);
2335 else
2336 ret = snprintf(full_path, len + 1, "%s/lxc", base_path);
2337 if (ret < 0 || (size_t)ret >= len + 1)
2338 goto on_error;
2339
2340 ret = mkdir_p(full_path, 0755);
2341 if (ret < 0 && errno != EEXIST)
2342 goto on_error;
2343
2344 strcat(full_path, "/cgroup.procs");
2345 ret = lxc_write_to_file(full_path, pidstr, len, false);
2346 if (ret == 0)
2347 goto on_success;
2348
2349 /* this is a non-leaf node */
2350 if (errno != EBUSY)
2351 goto on_error;
2352
2353 } while (++idx > 0 && idx < 1000);
2354
2355on_success:
2356 if (idx < 1000)
2357 fret = 0;
2358
2359on_error:
2360 free(base_path);
2361 free(container_cgroup);
2362 free(full_path);
2363
2364 return fret;
2365}
2366
ccb4cabe
SH
2367static bool cgfsng_attach(const char *name, const char *lxcpath, pid_t pid)
2368{
c2aed66d 2369 int i, len, ret;
ccb4cabe 2370 char pidstr[25];
ccb4cabe
SH
2371
2372 len = snprintf(pidstr, 25, "%d", pid);
2373 if (len < 0 || len > 25)
2374 return false;
2375
457ca9aa 2376 for (i = 0; hierarchies[i]; i++) {
c2aed66d
CB
2377 char *path;
2378 char *fullpath = NULL;
457ca9aa 2379 struct hierarchy *h = hierarchies[i];
ccb4cabe 2380
c2aed66d 2381 if (h->version == CGROUP2_SUPER_MAGIC) {
a3926f6a
CB
2382 ret = __cg_unified_attach(h, name, lxcpath, pidstr, len,
2383 h->controllers[0]);
c2aed66d
CB
2384 if (ret < 0)
2385 return false;
2386
2387 continue;
2388 }
2389
ccb4cabe 2390 path = lxc_cmd_get_cgroup_path(name, lxcpath, h->controllers[0]);
c2aed66d
CB
2391 /* not running */
2392 if (!path)
ccb4cabe
SH
2393 continue;
2394
371f834d 2395 fullpath = build_full_cgpath_from_monitorpath(h, path, "cgroup.procs");
c2aed66d
CB
2396 ret = lxc_write_to_file(fullpath, pidstr, len, false);
2397 if (ret < 0) {
ccb4cabe
SH
2398 SYSERROR("Failed to attach %d to %s", (int)pid, fullpath);
2399 free(fullpath);
ccb4cabe
SH
2400 return false;
2401 }
ccb4cabe
SH
2402 free(fullpath);
2403 }
2404
ccb4cabe
SH
2405 return true;
2406}
2407
2408/*
2409 * Called externally (i.e. from 'lxc-cgroup') to query cgroup limits.
2410 * Here we don't have a cgroup_data set up, so we ask the running
2411 * container through the commands API for the cgroup path
2412 */
0069cc61
CB
2413static int cgfsng_get(const char *filename, char *value, size_t len,
2414 const char *name, const char *lxcpath)
ccb4cabe 2415{
ccb4cabe 2416 int ret = -1;
0069cc61
CB
2417 size_t controller_len;
2418 char *controller, *p, *path;
2419 struct hierarchy *h;
ccb4cabe 2420
0069cc61
CB
2421 controller_len = strlen(filename);
2422 controller = alloca(controller_len + 1);
2423 strcpy(controller, filename);
2424 p = strchr(controller, '.');
2425 if (p)
ccb4cabe
SH
2426 *p = '\0';
2427
0069cc61
CB
2428 path = lxc_cmd_get_cgroup_path(name, lxcpath, controller);
2429 /* not running */
2430 if (!path)
ccb4cabe
SH
2431 return -1;
2432
0069cc61 2433 h = get_hierarchy(controller);
ccb4cabe 2434 if (h) {
0069cc61
CB
2435 char *fullpath;
2436
2437 fullpath = build_full_cgpath_from_monitorpath(h, path, filename);
ccb4cabe
SH
2438 ret = lxc_read_from_file(fullpath, value, len);
2439 free(fullpath);
2440 }
ccb4cabe
SH
2441 free(path);
2442
2443 return ret;
2444}
2445
2446/*
2447 * Called externally (i.e. from 'lxc-cgroup') to set new cgroup limits.
2448 * Here we don't have a cgroup_data set up, so we ask the running
2449 * container through the commands API for the cgroup path
2450 */
87777968
CB
2451static int cgfsng_set(const char *filename, const char *value, const char *name,
2452 const char *lxcpath)
ccb4cabe 2453{
ccb4cabe 2454 int ret = -1;
87777968
CB
2455 size_t controller_len;
2456 char *controller, *p, *path;
2457 struct hierarchy *h;
ccb4cabe 2458
87777968
CB
2459 controller_len = strlen(filename);
2460 controller = alloca(controller_len + 1);
2461 strcpy(controller, filename);
2462 p = strchr(controller, '.');
2463 if (p)
ccb4cabe
SH
2464 *p = '\0';
2465
87777968
CB
2466 path = lxc_cmd_get_cgroup_path(name, lxcpath, controller);
2467 /* not running */
2468 if (!path)
ccb4cabe
SH
2469 return -1;
2470
87777968 2471 h = get_hierarchy(controller);
ccb4cabe 2472 if (h) {
87777968
CB
2473 char *fullpath;
2474
2475 fullpath = build_full_cgpath_from_monitorpath(h, path, filename);
ccb4cabe
SH
2476 ret = lxc_write_to_file(fullpath, value, strlen(value), false);
2477 free(fullpath);
2478 }
ccb4cabe
SH
2479 free(path);
2480
2481 return ret;
2482}
2483
72add155
SH
2484/*
2485 * take devices cgroup line
2486 * /dev/foo rwx
2487 * and convert it to a valid
2488 * type major:minor mode
2489 * line. Return <0 on error. Dest is a preallocated buffer
2490 * long enough to hold the output.
2491 */
2492static int convert_devpath(const char *invalue, char *dest)
2493{
2a06d041
CB
2494 int n_parts;
2495 char *p, *path, type;
72add155
SH
2496 struct stat sb;
2497 unsigned long minor, major;
2a06d041
CB
2498 int ret = -EINVAL;
2499 char *mode = NULL;
72add155
SH
2500
2501 path = must_copy_string(invalue);
2502
2503 /*
2504 * read path followed by mode; ignore any trailing text.
2505 * A ' # comment' would be legal. Technically other text
2506 * is not legal, we could check for that if we cared to
2507 */
2508 for (n_parts = 1, p = path; *p && n_parts < 3; p++) {
2c2d6c49
SH
2509 if (*p != ' ')
2510 continue;
2511 *p = '\0';
2512 if (n_parts != 1)
2513 break;
2514 p++;
2515 n_parts++;
2516 while (*p == ' ')
2517 p++;
2518 mode = p;
2519 if (*p == '\0')
2520 goto out;
72add155 2521 }
2c2d6c49
SH
2522
2523 if (n_parts == 1)
72add155 2524 goto out;
72add155
SH
2525
2526 ret = stat(path, &sb);
2527 if (ret < 0)
2528 goto out;
2529
72add155
SH
2530 mode_t m = sb.st_mode & S_IFMT;
2531 switch (m) {
2532 case S_IFBLK:
2533 type = 'b';
2534 break;
2535 case S_IFCHR:
2536 type = 'c';
2537 break;
2c2d6c49 2538 default:
72add155
SH
2539 ERROR("Unsupported device type %i for %s", m, path);
2540 ret = -EINVAL;
2541 goto out;
2542 }
2c2d6c49
SH
2543
2544 major = MAJOR(sb.st_rdev);
2545 minor = MINOR(sb.st_rdev);
2546 ret = snprintf(dest, 50, "%c %lu:%lu %s", type, major, minor, mode);
72add155 2547 if (ret < 0 || ret >= 50) {
2a06d041
CB
2548 ERROR("Error on configuration value \"%c %lu:%lu %s\" (max 50 "
2549 "chars)", type, major, minor, mode);
72add155
SH
2550 ret = -ENAMETOOLONG;
2551 goto out;
2552 }
2553 ret = 0;
2554
2555out:
2556 free(path);
2557 return ret;
2558}
2559
ccb4cabe
SH
2560/*
2561 * Called from setup_limits - here we have the container's cgroup_data because
2562 * we created the cgroups
2563 */
a3926f6a
CB
2564static int cg_legacy_set_data(const char *filename, const char *value,
2565 struct cgfsng_handler_data *d)
ccb4cabe 2566{
b3646d7e 2567 char *fullpath, *p;
ab1a6cac 2568 size_t len;
1a0e70ac
CB
2569 /* "b|c <2^64-1>:<2^64-1> r|w|m" = 47 chars max */
2570 char converted_value[50];
b3646d7e
CB
2571 struct hierarchy *h;
2572 int ret = 0;
2573 char *controller = NULL;
ccb4cabe 2574
ab1a6cac
CB
2575 len = strlen(filename);
2576 controller = alloca(len + 1);
b3646d7e 2577 strcpy(controller, filename);
ab1a6cac
CB
2578 p = strchr(controller, '.');
2579 if (p)
ccb4cabe
SH
2580 *p = '\0';
2581
c8bf519d 2582 if (strcmp("devices.allow", filename) == 0 && value[0] == '/') {
72add155
SH
2583 ret = convert_devpath(value, converted_value);
2584 if (ret < 0)
c8bf519d 2585 return ret;
72add155 2586 value = converted_value;
c8bf519d 2587 }
2588
b3646d7e
CB
2589 h = get_hierarchy(controller);
2590 if (!h) {
2591 ERROR("Failed to setup limits for the \"%s\" controller. "
2592 "The controller seems to be unused by \"cgfsng\" cgroup "
2593 "driver or not enabled on the cgroup hierarchy",
2594 controller);
d1953b26 2595 errno = ENOENT;
ab1a6cac 2596 return -ENOENT;
ccb4cabe 2597 }
b3646d7e
CB
2598
2599 fullpath = must_make_path(h->fullcgpath, filename, NULL);
2600 ret = lxc_write_to_file(fullpath, value, strlen(value), false);
2601 free(fullpath);
ccb4cabe
SH
2602 return ret;
2603}
2604
a3926f6a
CB
2605static bool __cg_legacy_setup_limits(void *hdata,
2606 struct lxc_list *cgroup_settings,
2607 bool do_devices)
ccb4cabe
SH
2608{
2609 struct cgfsng_handler_data *d = hdata;
2610 struct lxc_list *iterator, *sorted_cgroup_settings, *next;
2611 struct lxc_cgroup *cg;
ccb4cabe
SH
2612 bool ret = false;
2613
2614 if (lxc_list_empty(cgroup_settings))
2615 return true;
2616
2617 sorted_cgroup_settings = sort_cgroup_settings(cgroup_settings);
6b38e644 2618 if (!sorted_cgroup_settings)
ccb4cabe 2619 return false;
ccb4cabe 2620
ccb4cabe
SH
2621 lxc_list_for_each(iterator, sorted_cgroup_settings) {
2622 cg = iterator->elem;
2623
2624 if (do_devices == !strncmp("devices", cg->subsystem, 7)) {
a3926f6a 2625 if (cg_legacy_set_data(cg->subsystem, cg->value, d)) {
ccb4cabe
SH
2626 if (do_devices && (errno == EACCES || errno == EPERM)) {
2627 WARN("Error setting %s to %s for %s",
2628 cg->subsystem, cg->value, d->name);
2629 continue;
2630 }
2631 SYSERROR("Error setting %s to %s for %s",
2632 cg->subsystem, cg->value, d->name);
2633 goto out;
2634 }
6a628f4a 2635 DEBUG("cgroup '%s' set to '%s'", cg->subsystem, cg->value);
ccb4cabe 2636 }
ccb4cabe
SH
2637 }
2638
2639 ret = true;
6b38e644 2640 INFO("Limits for the legacy cgroup hierarchies have been setup");
ccb4cabe 2641out:
ccb4cabe
SH
2642 lxc_list_for_each_safe(iterator, sorted_cgroup_settings, next) {
2643 lxc_list_del(iterator);
2644 free(iterator);
2645 }
2646 free(sorted_cgroup_settings);
2647 return ret;
2648}
2649
a3926f6a
CB
2650static bool __cg_unified_setup_limits(void *hdata,
2651 struct lxc_list *cgroup_settings)
6b38e644
CB
2652{
2653 struct lxc_list *iterator;
2654 struct hierarchy *h = unified;
2655
2656 if (lxc_list_empty(cgroup_settings))
2657 return true;
2658
2659 if (!h)
2660 return false;
2661
2662 lxc_list_for_each(iterator, cgroup_settings) {
2663 int ret;
2664 char *fullpath;
2665 struct lxc_cgroup *cg = iterator->elem;
2666
2667 fullpath = must_make_path(h->fullcgpath, cg->subsystem, NULL);
2668 ret = lxc_write_to_file(fullpath, cg->value, strlen(cg->value), false);
2669 free(fullpath);
2670 if (ret < 0) {
2671 SYSERROR("Failed to set \"%s\" to \"%s\"", cg->subsystem, cg->value);
2672 return false;
2673 }
2674 TRACE("Set \"%s\" to \"%s\"", cg->subsystem, cg->value);
2675 }
2676
2677 INFO("Limits for the unified cgroup hierarchy have been setup");
2678 return true;
2679}
2680
2681static bool cgfsng_setup_limits(void *hdata, struct lxc_conf *conf,
2682 bool do_devices)
2683{
2684 bool bret;
2685
a3926f6a 2686 bret = __cg_legacy_setup_limits(hdata, &conf->cgroup, do_devices);
6b38e644
CB
2687 if (!bret)
2688 return false;
2689
a3926f6a 2690 return __cg_unified_setup_limits(hdata, &conf->cgroup2);
6b38e644
CB
2691}
2692
ccb4cabe
SH
2693static struct cgroup_ops cgfsng_ops = {
2694 .init = cgfsng_init,
2695 .destroy = cgfsng_destroy,
2696 .create = cgfsng_create,
2697 .enter = cgfsng_enter,
ccb4cabe 2698 .escape = cgfsng_escape,
36662416
TA
2699 .num_hierarchies = cgfsng_num_hierarchies,
2700 .get_hierarchies = cgfsng_get_hierarchies,
ccb4cabe
SH
2701 .get_cgroup = cgfsng_get_cgroup,
2702 .get = cgfsng_get,
2703 .set = cgfsng_set,
2704 .unfreeze = cgfsng_unfreeze,
2705 .setup_limits = cgfsng_setup_limits,
2706 .name = "cgroupfs-ng",
2707 .attach = cgfsng_attach,
058c1cb6 2708 .chown = cgfsng_chown,
ccb4cabe
SH
2709 .mount_cgroup = cgfsng_mount,
2710 .nrtasks = cgfsng_nrtasks,
2711 .driver = CGFSNG,
2712
2713 /* unsupported */
2714 .create_legacy = NULL,
2715};